@vibeframe/cli 0.27.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/LICENSE +21 -0
  2. package/dist/agent/adapters/index.d.ts +1 -0
  3. package/dist/agent/adapters/index.d.ts.map +1 -1
  4. package/dist/agent/adapters/index.js +5 -0
  5. package/dist/agent/adapters/index.js.map +1 -1
  6. package/dist/agent/adapters/openrouter.d.ts +16 -0
  7. package/dist/agent/adapters/openrouter.d.ts.map +1 -0
  8. package/dist/agent/adapters/openrouter.js +100 -0
  9. package/dist/agent/adapters/openrouter.js.map +1 -0
  10. package/dist/agent/types.d.ts +1 -1
  11. package/dist/agent/types.d.ts.map +1 -1
  12. package/dist/commands/agent.d.ts.map +1 -1
  13. package/dist/commands/agent.js +3 -1
  14. package/dist/commands/agent.js.map +1 -1
  15. package/dist/commands/ai-edit-cli.d.ts.map +1 -1
  16. package/dist/commands/ai-edit-cli.js +18 -0
  17. package/dist/commands/ai-edit-cli.js.map +1 -1
  18. package/dist/commands/generate.js +14 -0
  19. package/dist/commands/generate.js.map +1 -1
  20. package/dist/commands/schema.d.ts +1 -0
  21. package/dist/commands/schema.d.ts.map +1 -1
  22. package/dist/commands/schema.js +122 -21
  23. package/dist/commands/schema.js.map +1 -1
  24. package/dist/commands/setup.js +5 -2
  25. package/dist/commands/setup.js.map +1 -1
  26. package/dist/config/schema.d.ts +2 -1
  27. package/dist/config/schema.d.ts.map +1 -1
  28. package/dist/config/schema.js +2 -0
  29. package/dist/config/schema.js.map +1 -1
  30. package/dist/index.js +0 -0
  31. package/package.json +16 -12
  32. package/.turbo/turbo-build.log +0 -4
  33. package/.turbo/turbo-lint.log +0 -21
  34. package/.turbo/turbo-test.log +0 -689
  35. package/src/agent/adapters/claude.ts +0 -143
  36. package/src/agent/adapters/gemini.ts +0 -159
  37. package/src/agent/adapters/index.ts +0 -61
  38. package/src/agent/adapters/ollama.ts +0 -231
  39. package/src/agent/adapters/openai.ts +0 -116
  40. package/src/agent/adapters/xai.ts +0 -119
  41. package/src/agent/index.ts +0 -251
  42. package/src/agent/memory/index.ts +0 -151
  43. package/src/agent/prompts/system.ts +0 -106
  44. package/src/agent/tools/ai-editing.ts +0 -845
  45. package/src/agent/tools/ai-generation.ts +0 -1073
  46. package/src/agent/tools/ai-pipeline.ts +0 -1055
  47. package/src/agent/tools/ai.ts +0 -21
  48. package/src/agent/tools/batch.ts +0 -429
  49. package/src/agent/tools/e2e.test.ts +0 -545
  50. package/src/agent/tools/export.ts +0 -184
  51. package/src/agent/tools/filesystem.ts +0 -237
  52. package/src/agent/tools/index.ts +0 -150
  53. package/src/agent/tools/integration.test.ts +0 -775
  54. package/src/agent/tools/media.ts +0 -697
  55. package/src/agent/tools/project.ts +0 -313
  56. package/src/agent/tools/timeline.ts +0 -951
  57. package/src/agent/types.ts +0 -68
  58. package/src/commands/agent.ts +0 -340
  59. package/src/commands/ai-analyze.ts +0 -429
  60. package/src/commands/ai-animated-caption.ts +0 -390
  61. package/src/commands/ai-audio.ts +0 -941
  62. package/src/commands/ai-broll.ts +0 -490
  63. package/src/commands/ai-edit-cli.ts +0 -658
  64. package/src/commands/ai-edit.ts +0 -1542
  65. package/src/commands/ai-fill-gaps.ts +0 -566
  66. package/src/commands/ai-helpers.ts +0 -65
  67. package/src/commands/ai-highlights.ts +0 -1303
  68. package/src/commands/ai-image.ts +0 -761
  69. package/src/commands/ai-motion.ts +0 -347
  70. package/src/commands/ai-narrate.ts +0 -451
  71. package/src/commands/ai-review.ts +0 -309
  72. package/src/commands/ai-script-pipeline-cli.ts +0 -1710
  73. package/src/commands/ai-script-pipeline.ts +0 -1365
  74. package/src/commands/ai-suggest-edit.ts +0 -264
  75. package/src/commands/ai-video-fx.ts +0 -445
  76. package/src/commands/ai-video.ts +0 -915
  77. package/src/commands/ai-viral.ts +0 -595
  78. package/src/commands/ai-visual-fx.ts +0 -601
  79. package/src/commands/ai.test.ts +0 -627
  80. package/src/commands/ai.ts +0 -307
  81. package/src/commands/analyze.ts +0 -282
  82. package/src/commands/audio.ts +0 -644
  83. package/src/commands/batch.test.ts +0 -279
  84. package/src/commands/batch.ts +0 -440
  85. package/src/commands/detect.ts +0 -329
  86. package/src/commands/doctor.ts +0 -237
  87. package/src/commands/edit-cmd.ts +0 -1014
  88. package/src/commands/export.ts +0 -918
  89. package/src/commands/generate.ts +0 -2146
  90. package/src/commands/media.ts +0 -177
  91. package/src/commands/output.ts +0 -142
  92. package/src/commands/pipeline.ts +0 -398
  93. package/src/commands/project.test.ts +0 -127
  94. package/src/commands/project.ts +0 -149
  95. package/src/commands/sanitize.ts +0 -60
  96. package/src/commands/schema.ts +0 -130
  97. package/src/commands/setup.ts +0 -509
  98. package/src/commands/timeline.test.ts +0 -499
  99. package/src/commands/timeline.ts +0 -529
  100. package/src/commands/validate.ts +0 -77
  101. package/src/config/config.test.ts +0 -197
  102. package/src/config/index.ts +0 -125
  103. package/src/config/schema.ts +0 -82
  104. package/src/engine/index.ts +0 -2
  105. package/src/engine/project.test.ts +0 -702
  106. package/src/engine/project.ts +0 -439
  107. package/src/index.ts +0 -146
  108. package/src/utils/api-key.test.ts +0 -41
  109. package/src/utils/api-key.ts +0 -247
  110. package/src/utils/audio.ts +0 -83
  111. package/src/utils/exec-safe.ts +0 -75
  112. package/src/utils/first-run.ts +0 -52
  113. package/src/utils/provider-resolver.ts +0 -56
  114. package/src/utils/remotion.ts +0 -951
  115. package/src/utils/subtitle.test.ts +0 -227
  116. package/src/utils/subtitle.ts +0 -169
  117. package/src/utils/tty.ts +0 -196
  118. package/tsconfig.json +0 -20
@@ -1,1542 +0,0 @@
1
- /**
2
- * @module ai-edit
3
- *
4
- * Video/audio editing execute functions and supporting types.
5
- *
6
- * CLI commands: silence-cut, jump-cut, caption, noise-reduce, fade,
7
- * translate-srt, text-overlay
8
- *
9
- * Execute functions (also used by agent tools via ai.ts re-exports):
10
- * executeSilenceCut, executeJumpCut, executeCaption, executeNoiseReduce,
11
- * executeFade, executeTranslateSrt, applyTextOverlays, executeTextOverlay
12
- *
13
- * CLI command registrations live in ai-edit-cli.ts (registerEditCommands).
14
- * Extracted from ai.ts as part of modularisation.
15
- *
16
- * @dependencies FFmpeg, Whisper (OpenAI), Gemini (Google), Claude/OpenAI (translation)
17
- */
18
-
19
- import { resolve, dirname, basename, extname, join } from 'node:path';
20
- import { readFile, writeFile, mkdir } from 'node:fs/promises';
21
- import { existsSync } from 'node:fs';
22
- import {
23
- GeminiProvider,
24
- WhisperProvider,
25
- } from '@vibeframe/ai-providers';
26
- import { getApiKey } from '../utils/api-key.js';
27
- import { getVideoDuration } from '../utils/audio.js';
28
- import { formatSRT, parseSRT } from '../utils/subtitle.js';
29
- import { execSafe, commandExists } from '../utils/exec-safe.js';
30
-
31
-
32
- // ── Exported types and execute functions ────────────────────────────────────
33
-
34
- // ============================================================================
35
- // Silence Cut
36
- // ============================================================================
37
-
38
- /** A detected silent segment within a media file. */
39
- export interface SilencePeriod {
40
- /** Start time in seconds */
41
- start: number;
42
- /** End time in seconds */
43
- end: number;
44
- /** Duration of the silent period in seconds */
45
- duration: number;
46
- }
47
-
48
- /** Options for {@link executeSilenceCut}. */
49
- export interface SilenceCutOptions {
50
- /** Path to the input video file */
51
- videoPath: string;
52
- /** Path for the output video (silent segments removed) */
53
- outputPath: string;
54
- /** FFmpeg noise threshold in dB (default: -30) */
55
- noiseThreshold?: number;
56
- /** Minimum silence duration in seconds to detect (default: 0.5) */
57
- minDuration?: number;
58
- /** Padding in seconds kept around cuts (default: 0.1) */
59
- padding?: number;
60
- /** If true, only analyze without producing output video */
61
- analyzeOnly?: boolean;
62
- /** Use Gemini multimodal analysis instead of FFmpeg silencedetect */
63
- useGemini?: boolean;
64
- /** Gemini model shorthand: "flash", "flash-2.5", "pro" */
65
- model?: string;
66
- /** Use low-resolution mode for Gemini (longer videos) */
67
- lowRes?: boolean;
68
- /** Override API key (Google for Gemini mode) */
69
- apiKey?: string;
70
- }
71
-
72
- /** Result from {@link executeSilenceCut}. */
73
- export interface SilenceCutResult {
74
- /** Whether the operation succeeded */
75
- success: boolean;
76
- /** Path to the output video (undefined in analyze-only mode) */
77
- outputPath?: string;
78
- /** Total duration of the source video in seconds */
79
- totalDuration?: number;
80
- /** Detected silent periods */
81
- silentPeriods?: SilencePeriod[];
82
- /** Total silent duration removed in seconds */
83
- silentDuration?: number;
84
- /** Detection method used */
85
- method?: "ffmpeg" | "gemini";
86
- /** Error message on failure */
87
- error?: string;
88
- }
89
-
90
- /**
91
- * Detect silent periods in a media file using FFmpeg silencedetect
92
- */
93
- async function detectSilencePeriods(
94
- videoPath: string,
95
- noiseThreshold: number,
96
- minDuration: number,
97
- ): Promise<{ periods: SilencePeriod[]; totalDuration: number }> {
98
- // Get total duration
99
- const totalDuration = await getVideoDuration(videoPath);
100
-
101
- // Run silence detection
102
- const { stdout, stderr } = await execSafe("ffmpeg", [
103
- "-i", videoPath,
104
- "-af", `silencedetect=noise=${noiseThreshold}dB:d=${minDuration}`,
105
- "-f", "null", "-",
106
- ], { maxBuffer: 50 * 1024 * 1024 }).catch((err) => {
107
- // ffmpeg writes filter output to stderr and exits non-zero with -f null
108
- if (err.stdout !== undefined || err.stderr !== undefined) {
109
- return { stdout: err.stdout || "", stderr: err.stderr || "" };
110
- }
111
- throw err;
112
- });
113
- const silenceOutput = stdout + stderr;
114
-
115
- const periods: SilencePeriod[] = [];
116
- const startRegex = /silence_start: (\d+\.?\d*)/g;
117
- const endRegex = /silence_end: (\d+\.?\d*) \| silence_duration: (\d+\.?\d*)/g;
118
-
119
- const starts: number[] = [];
120
- let match;
121
- while ((match = startRegex.exec(silenceOutput)) !== null) {
122
- starts.push(parseFloat(match[1]));
123
- }
124
-
125
- let i = 0;
126
- while ((match = endRegex.exec(silenceOutput)) !== null) {
127
- const end = parseFloat(match[1]);
128
- const duration = parseFloat(match[2]);
129
- const start = i < starts.length ? starts[i] : end - duration;
130
- periods.push({ start, end, duration });
131
- i++;
132
- }
133
-
134
- return { periods, totalDuration };
135
- }
136
-
137
- /**
138
- * Detect silent/dead segments using Gemini Video Understanding (multimodal analysis)
139
- */
140
- async function detectSilencePeriodsWithGemini(
141
- videoPath: string,
142
- minDuration: number,
143
- options: { model?: string; lowRes?: boolean; apiKey?: string },
144
- ): Promise<{ periods: SilencePeriod[]; totalDuration: number }> {
145
- const totalDuration = await getVideoDuration(videoPath);
146
-
147
- const geminiApiKey = options.apiKey || await getApiKey("GOOGLE_API_KEY", "Google");
148
- if (!geminiApiKey) {
149
- throw new Error("Google API key required for Gemini Video Understanding. Run 'vibe setup' or set GOOGLE_API_KEY in .env");
150
- }
151
-
152
- const gemini = new GeminiProvider();
153
- await gemini.initialize({ apiKey: geminiApiKey });
154
-
155
- const videoBuffer = await readFile(videoPath);
156
-
157
- // Map model shorthand to full model ID
158
- const modelMap: Record<string, string> = {
159
- flash: "gemini-3-flash-preview",
160
- "flash-2.5": "gemini-2.5-flash",
161
- pro: "gemini-2.5-pro",
162
- };
163
- const modelId = options.model ? (modelMap[options.model] || modelMap.flash) : undefined;
164
-
165
- const prompt = `Analyze this video and identify all silent or dead segments where there is NO meaningful content.
166
-
167
- Detect these as silent/dead segments:
168
- - Complete silence (no audio at all)
169
- - Dead air / ambient noise with no speech or meaningful sound
170
- - Long pauses between speakers or topics (${minDuration}+ seconds)
171
- - Technical silence (e.g., blank screen with no audio)
172
- - Sections with only background noise and no intentional content
173
-
174
- Do NOT mark these as silent (keep them):
175
- - Intentional dramatic pauses (short, part of storytelling)
176
- - Music-only sections (background music, intros, outros)
177
- - Natural breathing pauses within sentences (under ${minDuration} seconds)
178
- - Applause, laughter, or audience reactions
179
- - Sound effects or ambient audio that is part of the content
180
-
181
- Only include segments that are at least ${minDuration} seconds long.
182
- The video total duration is ${totalDuration.toFixed(1)} seconds.
183
-
184
- IMPORTANT: Respond ONLY with valid JSON in this exact format:
185
- {
186
- "silentSegments": [
187
- {
188
- "start": 5.2,
189
- "end": 8.7,
190
- "reason": "Dead air between speakers"
191
- }
192
- ]
193
- }
194
-
195
- If there are no silent segments, return: { "silentSegments": [] }`;
196
-
197
- const result = await gemini.analyzeVideo(videoBuffer, prompt, {
198
- fps: 1,
199
- lowResolution: options.lowRes,
200
- ...(modelId ? { model: modelId as "gemini-3-flash-preview" | "gemini-2.5-flash" | "gemini-2.5-pro" } : {}),
201
- });
202
-
203
- if (!result.success || !result.response) {
204
- throw new Error(`Gemini analysis failed: ${result.error || "No response"}`);
205
- }
206
-
207
- // Parse JSON from Gemini response
208
- let jsonStr = result.response;
209
- const jsonMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
210
- if (jsonMatch) jsonStr = jsonMatch[1];
211
- const objectMatch = jsonStr.match(/\{[\s\S]*"silentSegments"[\s\S]*\}/);
212
- if (objectMatch) jsonStr = objectMatch[0];
213
-
214
- const parsed = JSON.parse(jsonStr);
215
-
216
- const periods: SilencePeriod[] = [];
217
- if (parsed.silentSegments && Array.isArray(parsed.silentSegments)) {
218
- for (const seg of parsed.silentSegments) {
219
- const rawStart = Number(seg.start);
220
- const rawEnd = Number(seg.end);
221
- if (isNaN(rawStart) || isNaN(rawEnd)) continue;
222
- // Clamp to video duration, then validate
223
- const start = Math.max(0, rawStart);
224
- const end = Math.min(rawEnd, totalDuration);
225
- const duration = end - start;
226
- if (duration >= minDuration) {
227
- periods.push({ start, end, duration });
228
- }
229
- }
230
- }
231
-
232
- // Sort by start time
233
- periods.sort((a, b) => a.start - b.start);
234
-
235
- return { periods, totalDuration };
236
- }
237
-
238
- /**
239
- * Remove silent segments from a video using FFmpeg or Gemini detection.
240
- *
241
- * Detects silence via FFmpeg silencedetect (default) or Gemini multimodal
242
- * analysis, then trims and concatenates the non-silent segments.
243
- *
244
- * @param options - Silence cut configuration
245
- * @returns Result with output path and detected silent periods
246
- */
247
- export async function executeSilenceCut(options: SilenceCutOptions): Promise<SilenceCutResult> {
248
- const {
249
- videoPath,
250
- outputPath,
251
- noiseThreshold = -30,
252
- minDuration = 0.5,
253
- padding = 0.1,
254
- analyzeOnly = false,
255
- useGemini = false,
256
- } = options;
257
-
258
- if (!existsSync(videoPath)) {
259
- return { success: false, error: `Video not found: ${videoPath}` };
260
- }
261
-
262
- if (!commandExists("ffmpeg")) {
263
- return { success: false, error: "FFmpeg not found. Please install FFmpeg." };
264
- }
265
-
266
- const method = useGemini ? "gemini" : "ffmpeg";
267
-
268
- try {
269
- const { periods, totalDuration } = useGemini
270
- ? await detectSilencePeriodsWithGemini(videoPath, minDuration, {
271
- model: options.model,
272
- lowRes: options.lowRes,
273
- apiKey: options.apiKey,
274
- })
275
- : await detectSilencePeriods(videoPath, noiseThreshold, minDuration);
276
- const silentDuration = periods.reduce((sum, p) => sum + p.duration, 0);
277
-
278
- if (analyzeOnly || periods.length === 0) {
279
- return {
280
- success: true,
281
- totalDuration,
282
- silentPeriods: periods,
283
- silentDuration,
284
- method,
285
- };
286
- }
287
-
288
- // Compute non-silent segments with padding
289
- const segments: { start: number; end: number }[] = [];
290
- let cursor = 0;
291
-
292
- for (const period of periods) {
293
- const segEnd = Math.min(period.start + padding, totalDuration);
294
- if (segEnd > cursor) {
295
- segments.push({ start: Math.max(0, cursor - padding), end: segEnd });
296
- }
297
- cursor = period.end;
298
- }
299
- // Add final segment after last silence
300
- if (cursor < totalDuration) {
301
- segments.push({ start: Math.max(0, cursor - padding), end: totalDuration });
302
- }
303
-
304
- if (segments.length === 0) {
305
- return { success: false, error: "No non-silent segments found" };
306
- }
307
-
308
- // Build filter_complex with trim+concat per segment.
309
- // aselect is broken on FFmpeg 8.x (audio duration unchanged), so we use
310
- // atrim/trim per segment and concat them all.
311
- const vParts: string[] = [];
312
- const aParts: string[] = [];
313
- const concatInputs: string[] = [];
314
-
315
- for (let i = 0; i < segments.length; i++) {
316
- const s = segments[i].start.toFixed(4);
317
- const e = segments[i].end.toFixed(4);
318
- vParts.push(`[0:v]trim=${s}:${e},setpts=PTS-STARTPTS[v${i}]`);
319
- aParts.push(`[0:a]atrim=${s}:${e},asetpts=PTS-STARTPTS[a${i}]`);
320
- concatInputs.push(`[v${i}][a${i}]`);
321
- }
322
-
323
- const filterComplex = [
324
- ...vParts,
325
- ...aParts,
326
- `${concatInputs.join("")}concat=n=${segments.length}:v=1:a=1[outv][outa]`,
327
- ].join(";");
328
-
329
- await execSafe("ffmpeg", [
330
- "-i", videoPath,
331
- "-filter_complex", filterComplex,
332
- "-map", "[outv]", "-map", "[outa]",
333
- "-c:v", "libx264", "-preset", "fast", "-crf", "18",
334
- "-c:a", "aac", "-b:a", "192k",
335
- outputPath, "-y",
336
- ], { timeout: 600000, maxBuffer: 50 * 1024 * 1024 });
337
-
338
- return {
339
- success: true,
340
- outputPath,
341
- totalDuration,
342
- silentPeriods: periods,
343
- silentDuration,
344
- method,
345
- };
346
- } catch (error) {
347
- return {
348
- success: false,
349
- error: `Silence cut failed: ${error instanceof Error ? error.message : String(error)}`,
350
- };
351
- }
352
- }
353
-
354
- // ============================================================================
355
- // Jump Cut (Filler Word Removal)
356
- // ============================================================================
357
-
358
- /** A detected filler word with its time range. */
359
- export interface FillerWord {
360
- /** The filler word or merged phrase */
361
- word: string;
362
- /** Start time in seconds */
363
- start: number;
364
- /** End time in seconds */
365
- end: number;
366
- }
367
-
368
- /** Options for {@link executeJumpCut}. */
369
- export interface JumpCutOptions {
370
- /** Path to the input video file */
371
- videoPath: string;
372
- /** Path for the output video (filler words removed) */
373
- outputPath: string;
374
- /** Custom filler words to detect (default: {@link DEFAULT_FILLER_WORDS}) */
375
- fillers?: string[];
376
- /** Padding in seconds around filler cuts (default: 0.05) */
377
- padding?: number;
378
- /** Language code for Whisper transcription */
379
- language?: string;
380
- /** If true, only analyze without producing output video */
381
- analyzeOnly?: boolean;
382
- /** Override OpenAI API key */
383
- apiKey?: string;
384
- }
385
-
386
- /** Result from {@link executeJumpCut}. */
387
- export interface JumpCutResult {
388
- /** Whether the operation succeeded */
389
- success: boolean;
390
- /** Path to the output video (undefined in analyze-only mode) */
391
- outputPath?: string;
392
- /** Total duration of the source video in seconds */
393
- totalDuration?: number;
394
- /** Number of filler word occurrences detected */
395
- fillerCount?: number;
396
- /** Total duration of filler words in seconds */
397
- fillerDuration?: number;
398
- /** Detected filler word ranges */
399
- fillers?: FillerWord[];
400
- /** Error message on failure */
401
- error?: string;
402
- }
403
-
404
- /** Default set of filler words detected by jump-cut. */
405
- export const DEFAULT_FILLER_WORDS = [
406
- "um", "uh", "uh-huh", "hmm", "like", "you know", "so",
407
- "basically", "literally", "right", "okay", "well", "i mean", "actually",
408
- ];
409
-
410
- /**
411
- * Transcribe audio with word-level timestamps using Whisper API directly.
412
- * Uses timestamp_granularities[]=word for filler detection.
413
- */
414
- export async function transcribeWithWords(
415
- audioPath: string,
416
- apiKey: string,
417
- language?: string,
418
- ): Promise<{ words: { word: string; start: number; end: number }[]; text: string }> {
419
- const audioBuffer = await readFile(audioPath);
420
- const audioBlob = new Blob([audioBuffer]);
421
-
422
- const formData = new FormData();
423
- formData.append("file", audioBlob, "audio.wav");
424
- formData.append("model", "whisper-1");
425
- formData.append("response_format", "verbose_json");
426
- formData.append("timestamp_granularities[]", "word");
427
-
428
- if (language) {
429
- formData.append("language", language);
430
- }
431
-
432
- const response = await fetch("https://api.openai.com/v1/audio/transcriptions", {
433
- method: "POST",
434
- headers: {
435
- Authorization: `Bearer ${apiKey}`,
436
- },
437
- body: formData,
438
- });
439
-
440
- if (!response.ok) {
441
- const error = await response.text();
442
- throw new Error(`Whisper transcription failed: ${error}`);
443
- }
444
-
445
- const data = await response.json() as {
446
- text: string;
447
- words?: Array<{ word: string; start: number; end: number }>;
448
- };
449
-
450
- return {
451
- words: data.words || [],
452
- text: data.text,
453
- };
454
- }
455
-
456
- /**
457
- * Detect filler word ranges and merge adjacent ones within padding distance.
458
- *
459
- * @param words - Word-level transcript with timestamps
460
- * @param fillers - List of filler words/phrases to match
461
- * @param padding - Maximum gap in seconds to merge adjacent fillers
462
- * @returns Merged filler word ranges sorted by start time
463
- */
464
- export function detectFillerRanges(
465
- words: { word: string; start: number; end: number }[],
466
- fillers: string[],
467
- padding: number,
468
- ): FillerWord[] {
469
- const fillerSet = new Set(fillers.map((f) => f.toLowerCase().trim()));
470
-
471
- // Find individual filler words
472
- const matches: FillerWord[] = [];
473
- for (const w of words) {
474
- const cleaned = w.word.toLowerCase().replace(/[^a-z\s-]/g, "").trim();
475
- if (fillerSet.has(cleaned)) {
476
- matches.push({ word: w.word, start: w.start, end: w.end });
477
- }
478
- }
479
-
480
- if (matches.length === 0) return [];
481
-
482
- // Merge adjacent filler ranges (within padding distance)
483
- const merged: FillerWord[] = [{ ...matches[0] }];
484
- for (let i = 1; i < matches.length; i++) {
485
- const last = merged[merged.length - 1];
486
- if (matches[i].start - last.end <= padding * 2) {
487
- last.end = matches[i].end;
488
- last.word += ` ${matches[i].word}`;
489
- } else {
490
- merged.push({ ...matches[i] });
491
- }
492
- }
493
-
494
- return merged;
495
- }
496
-
497
- /**
498
- * Remove filler words from a video using Whisper word-level timestamps + FFmpeg concat.
499
- *
500
- * Pipeline: extract audio -> Whisper transcription (word-level) -> detect fillers ->
501
- * invert to keep-segments -> FFmpeg stream-copy concat.
502
- *
503
- * @param options - Jump cut configuration
504
- * @returns Result with output path and detected fillers
505
- */
506
- export async function executeJumpCut(options: JumpCutOptions): Promise<JumpCutResult> {
507
- const {
508
- videoPath,
509
- outputPath,
510
- fillers = DEFAULT_FILLER_WORDS,
511
- padding = 0.05,
512
- language,
513
- analyzeOnly = false,
514
- apiKey,
515
- } = options;
516
-
517
- if (!existsSync(videoPath)) {
518
- return { success: false, error: `Video not found: ${videoPath}` };
519
- }
520
-
521
- if (!commandExists("ffmpeg")) {
522
- return { success: false, error: "FFmpeg not found. Please install FFmpeg." };
523
- }
524
-
525
- const openaiKey = apiKey || process.env.OPENAI_API_KEY;
526
- if (!openaiKey) {
527
- return { success: false, error: "OpenAI API key required for Whisper transcription. Run 'vibe setup' or set OPENAI_API_KEY in .env" };
528
- }
529
-
530
- try {
531
- const tmpDir = `/tmp/vibe_jumpcut_${Date.now()}`;
532
- await mkdir(tmpDir, { recursive: true });
533
- const audioPath = join(tmpDir, "audio.wav");
534
-
535
- try {
536
- // Step 1: Extract audio
537
- await execSafe("ffmpeg", [
538
- "-i", videoPath, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audioPath, "-y",
539
- ], { timeout: 300000, maxBuffer: 50 * 1024 * 1024 });
540
-
541
- // Step 2: Transcribe with word-level timestamps
542
- const { words } = await transcribeWithWords(audioPath, openaiKey, language);
543
-
544
- if (words.length === 0) {
545
- return { success: false, error: "No words detected in audio" };
546
- }
547
-
548
- // Step 3: Detect filler ranges
549
- const fillerRanges = detectFillerRanges(words, fillers, padding);
550
- const totalDuration = await getVideoDuration(videoPath);
551
- const fillerDuration = fillerRanges.reduce((sum, f) => sum + (f.end - f.start), 0);
552
-
553
- if (analyzeOnly || fillerRanges.length === 0) {
554
- return {
555
- success: true,
556
- totalDuration,
557
- fillerCount: fillerRanges.length,
558
- fillerDuration,
559
- fillers: fillerRanges,
560
- };
561
- }
562
-
563
- // Step 4: Compute keep-segments (invert filler ranges)
564
- const segments: { start: number; end: number }[] = [];
565
- let cursor = 0;
566
-
567
- for (const filler of fillerRanges) {
568
- const segStart = Math.max(0, cursor);
569
- const segEnd = Math.max(segStart, filler.start - padding);
570
- if (segEnd > segStart) {
571
- segments.push({ start: segStart, end: segEnd });
572
- }
573
- cursor = filler.end + padding;
574
- }
575
- // Add final segment after last filler
576
- if (cursor < totalDuration) {
577
- segments.push({ start: cursor, end: totalDuration });
578
- }
579
-
580
- if (segments.length === 0) {
581
- return { success: false, error: "No non-filler segments found" };
582
- }
583
-
584
- // Step 5: Extract segments and concat with FFmpeg (stream copy)
585
- const segmentPaths: string[] = [];
586
- for (let i = 0; i < segments.length; i++) {
587
- const seg = segments[i];
588
- const segPath = join(tmpDir, `seg-${i.toString().padStart(4, "0")}.ts`);
589
- const duration = seg.end - seg.start;
590
- await execSafe("ffmpeg", [
591
- "-i", videoPath, "-ss", String(seg.start), "-t", String(duration),
592
- "-c", "copy", "-avoid_negative_ts", "make_zero", segPath, "-y",
593
- ], { timeout: 300000, maxBuffer: 50 * 1024 * 1024 });
594
- segmentPaths.push(segPath);
595
- }
596
-
597
- // Create concat list
598
- const concatList = segmentPaths.map((p) => `file '${p}'`).join("\n");
599
- const listPath = join(tmpDir, "concat.txt");
600
- await writeFile(listPath, concatList);
601
-
602
- // Concat segments
603
- await execSafe("ffmpeg", [
604
- "-f", "concat", "-safe", "0", "-i", listPath, "-c", "copy", outputPath, "-y",
605
- ], { timeout: 300000, maxBuffer: 50 * 1024 * 1024 });
606
-
607
- return {
608
- success: true,
609
- outputPath,
610
- totalDuration,
611
- fillerCount: fillerRanges.length,
612
- fillerDuration,
613
- fillers: fillerRanges,
614
- };
615
- } finally {
616
- // Cleanup temp files
617
- try {
618
- const { rm } = await import("node:fs/promises");
619
- await rm(tmpDir, { recursive: true, force: true });
620
- } catch {
621
- // Ignore cleanup errors
622
- }
623
- }
624
- } catch (error) {
625
- return {
626
- success: false,
627
- error: `Jump cut failed: ${error instanceof Error ? error.message : String(error)}`,
628
- };
629
- }
630
- }
631
-
632
- // ============================================================================
633
- // Caption
634
- // ============================================================================
635
-
636
- /** Visual style preset for burned-in captions. */
637
- export type CaptionStyle = "minimal" | "bold" | "outline" | "karaoke";
638
-
639
- /** Options for {@link executeCaption}. */
640
- export interface CaptionOptions {
641
- /** Path to the input video file */
642
- videoPath: string;
643
- /** Path for the output video with burned-in captions */
644
- outputPath: string;
645
- /** Caption visual style preset (default: "bold") */
646
- style?: CaptionStyle;
647
- /** Font size override (auto-calculated from video height if omitted) */
648
- fontSize?: number;
649
- /** Font color name (default: "white") */
650
- fontColor?: string;
651
- /** Language code for Whisper transcription */
652
- language?: string;
653
- /** Vertical position of captions (default: "bottom") */
654
- position?: "top" | "center" | "bottom";
655
- /** Override OpenAI API key */
656
- apiKey?: string;
657
- }
658
-
659
- /** Result from {@link executeCaption}. */
660
- export interface CaptionResult {
661
- /** Whether the operation succeeded */
662
- success: boolean;
663
- /** Path to the output video with captions */
664
- outputPath?: string;
665
- /** Path to the generated SRT file */
666
- srtPath?: string;
667
- /** Number of transcript segments */
668
- segmentCount?: number;
669
- /** Error message on failure */
670
- error?: string;
671
- }
672
-
673
- /**
674
- * Get ASS force_style string for caption preset
675
- */
676
- function getCaptionForceStyle(
677
- style: CaptionStyle,
678
- fontSize: number,
679
- fontColor: string,
680
- position: "top" | "center" | "bottom",
681
- ): string {
682
- // ASS alignment: 1-3 bottom, 4-6 middle, 7-9 top (left/center/right)
683
- const alignment = position === "top" ? 8 : position === "center" ? 5 : 2;
684
- const marginV = position === "center" ? 0 : 30;
685
-
686
- switch (style) {
687
- case "minimal":
688
- return `FontSize=${fontSize},FontName=Arial,PrimaryColour=&H00FFFFFF,OutlineColour=&H80000000,Outline=1,Shadow=0,Alignment=${alignment},MarginV=${marginV}`;
689
- case "bold":
690
- return `FontSize=${fontSize},FontName=Arial,Bold=1,PrimaryColour=&H00${fontColor === "yellow" ? "00FFFF" : "FFFFFF"},OutlineColour=&H00000000,Outline=3,Shadow=1,Alignment=${alignment},MarginV=${marginV}`;
691
- case "outline":
692
- return `FontSize=${fontSize},FontName=Arial,Bold=1,PrimaryColour=&H00FFFFFF,OutlineColour=&H000000FF,Outline=4,Shadow=0,Alignment=${alignment},MarginV=${marginV}`;
693
- case "karaoke":
694
- return `FontSize=${fontSize},FontName=Arial,Bold=1,PrimaryColour=&H0000FFFF,OutlineColour=&H00000000,Outline=2,Shadow=1,Alignment=${alignment},MarginV=${marginV}`;
695
- default:
696
- return `FontSize=${fontSize},FontName=Arial,Bold=1,PrimaryColour=&H00FFFFFF,OutlineColour=&H00000000,Outline=3,Shadow=1,Alignment=${alignment},MarginV=${marginV}`;
697
- }
698
- }
699
-
700
- /**
701
- * Transcribe video audio and burn styled captions using Whisper + FFmpeg.
702
- *
703
- * Pipeline: extract audio -> Whisper transcription -> generate SRT ->
704
- * burn captions via FFmpeg subtitles filter (or Remotion fallback).
705
- *
706
- * @param options - Caption configuration
707
- * @returns Result with output video path and SRT path
708
- */
709
- export async function executeCaption(options: CaptionOptions): Promise<CaptionResult> {
710
- const {
711
- videoPath,
712
- outputPath,
713
- style = "bold",
714
- fontSize: customFontSize,
715
- fontColor = "white",
716
- language,
717
- position = "bottom",
718
- apiKey,
719
- } = options;
720
-
721
- if (!existsSync(videoPath)) {
722
- return { success: false, error: `Video not found: ${videoPath}` };
723
- }
724
-
725
- if (!commandExists("ffmpeg")) {
726
- return { success: false, error: "FFmpeg not found. Please install FFmpeg." };
727
- }
728
-
729
- const openaiKey = apiKey || process.env.OPENAI_API_KEY;
730
- if (!openaiKey) {
731
- return { success: false, error: "OpenAI API key required for Whisper transcription. Run 'vibe setup' or set OPENAI_API_KEY in .env" };
732
- }
733
-
734
- try {
735
- // Step 1: Extract audio from video
736
- const tmpDir = `/tmp/vibe_caption_${Date.now()}`;
737
- await mkdir(tmpDir, { recursive: true });
738
- const audioPath = join(tmpDir, "audio.wav");
739
- const srtPath = join(tmpDir, "captions.srt");
740
-
741
- try {
742
- await execSafe("ffmpeg", [
743
- "-i", videoPath, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audioPath, "-y",
744
- ], { timeout: 300000, maxBuffer: 50 * 1024 * 1024 });
745
-
746
- // Step 2: Transcribe with Whisper
747
- const whisper = new WhisperProvider();
748
- await whisper.initialize({ apiKey: openaiKey });
749
-
750
- const audioBuffer = await readFile(audioPath);
751
- const audioBlob = new Blob([audioBuffer]);
752
- const transcriptResult = await whisper.transcribe(audioBlob, language);
753
-
754
- if (transcriptResult.status === "failed" || !transcriptResult.segments || transcriptResult.segments.length === 0) {
755
- return { success: false, error: `Transcription failed: ${transcriptResult.error || "No segments detected"}` };
756
- }
757
-
758
- // Step 3: Generate SRT
759
- const srtContent = formatSRT(transcriptResult.segments);
760
- await writeFile(srtPath, srtContent);
761
-
762
- // Step 4: Get video resolution for auto font size
763
- const { width, height } = await getVideoResolution(videoPath);
764
- const fontSize = customFontSize || Math.round(height / 18);
765
-
766
- // Step 5: Check FFmpeg subtitle filter support
767
- let hasSubtitles = false;
768
- try {
769
- const { stdout: filterList } = await execSafe("ffmpeg", ["-filters"], { maxBuffer: 10 * 1024 * 1024 });
770
- hasSubtitles = filterList.includes("subtitles");
771
- } catch {
772
- // If filter check fails, continue and let FFmpeg error naturally
773
- }
774
-
775
- // Step 6: Burn captions
776
- if (hasSubtitles) {
777
- // Fast path: FFmpeg subtitles filter (requires libass)
778
- const forceStyle = getCaptionForceStyle(style, fontSize, fontColor, position);
779
- const escapedSrtPath = srtPath.replace(/\\/g, "/").replace(/:/g, "\\:");
780
- await execSafe("ffmpeg", [
781
- "-i", videoPath, "-vf", `subtitles=${escapedSrtPath}:force_style='${forceStyle}'`,
782
- "-c:a", "copy", outputPath, "-y",
783
- ], { timeout: 600000, maxBuffer: 50 * 1024 * 1024 });
784
- } else {
785
- // Remotion fallback: embed video + captions in a single Remotion composition
786
- console.log("FFmpeg missing subtitles filter (libass) — using Remotion fallback...");
787
- const { generateCaptionComponent, renderWithEmbeddedVideo, ensureRemotionInstalled } = await import("../utils/remotion.js");
788
-
789
- const remotionErr = await ensureRemotionInstalled();
790
- if (remotionErr) {
791
- // Save SRT so the user still gets something
792
- const outputDir = dirname(outputPath);
793
- const outputSrtPath = join(outputDir, basename(outputPath, extname(outputPath)) + ".srt");
794
- await writeFile(outputSrtPath, srtContent);
795
- return { success: false, error: `${remotionErr}\nSRT saved to: ${outputSrtPath}` };
796
- }
797
-
798
- const videoDuration = await getVideoDuration(videoPath);
799
- const fps = 30;
800
- const durationInFrames = Math.ceil(videoDuration * fps);
801
- const videoFileName = "source_video.mp4";
802
-
803
- const { code, name } = generateCaptionComponent({
804
- segments: transcriptResult.segments.map((s) => ({
805
- start: s.startTime,
806
- end: s.endTime,
807
- text: s.text,
808
- })),
809
- style,
810
- fontSize,
811
- fontColor,
812
- position,
813
- width,
814
- height,
815
- videoFileName,
816
- });
817
-
818
- const renderResult = await renderWithEmbeddedVideo({
819
- componentCode: code,
820
- componentName: name,
821
- width,
822
- height,
823
- fps,
824
- durationInFrames,
825
- videoPath,
826
- videoFileName,
827
- outputPath,
828
- });
829
-
830
- if (!renderResult.success) {
831
- const outputDir = dirname(outputPath);
832
- const outputSrtPath = join(outputDir, basename(outputPath, extname(outputPath)) + ".srt");
833
- await writeFile(outputSrtPath, srtContent);
834
- return { success: false, error: `${renderResult.error}\nSRT saved to: ${outputSrtPath}` };
835
- }
836
- }
837
-
838
- // Copy SRT to output directory for user reference
839
- const outputDir = dirname(outputPath);
840
- const outputSrtPath = join(outputDir, basename(outputPath, extname(outputPath)) + ".srt");
841
- await writeFile(outputSrtPath, srtContent);
842
-
843
- return {
844
- success: true,
845
- outputPath,
846
- srtPath: outputSrtPath,
847
- segmentCount: transcriptResult.segments.length,
848
- };
849
- } finally {
850
- // Cleanup temp files
851
- try {
852
- const { rm } = await import("node:fs/promises");
853
- await rm(tmpDir, { recursive: true, force: true });
854
- } catch {
855
- // Ignore cleanup errors
856
- }
857
- }
858
- } catch (error) {
859
- return {
860
- success: false,
861
- error: `Caption failed: ${error instanceof Error ? error.message : String(error)}`,
862
- };
863
- }
864
- }
865
-
866
- // ============================================================================
867
- // Noise Reduce
868
- // ============================================================================
869
-
870
- /** Options for {@link executeNoiseReduce}. */
871
- export interface NoiseReduceOptions {
872
- /** Path to the input audio or video file */
873
- inputPath: string;
874
- /** Path for the noise-reduced output file */
875
- outputPath: string;
876
- /** Reduction strength preset (default: "medium") */
877
- strength?: "low" | "medium" | "high";
878
- /** Custom noise floor in dB (overrides strength preset) */
879
- noiseFloor?: number;
880
- }
881
-
882
- /** Result from {@link executeNoiseReduce}. */
883
- export interface NoiseReduceResult {
884
- /** Whether the operation succeeded */
885
- success: boolean;
886
- /** Path to the noise-reduced output file */
887
- outputPath?: string;
888
- /** Duration of the input file in seconds */
889
- inputDuration?: number;
890
- /** Error message on failure */
891
- error?: string;
892
- }
893
-
894
- /**
895
- * Reduce audio noise in a video or audio file using FFmpeg afftdn filter.
896
- *
897
- * Supports three strength presets (low/medium/high) with optional highpass/lowpass
898
- * for the "high" setting. Video streams are copied without re-encoding.
899
- *
900
- * @param options - Noise reduction configuration
901
- * @returns Result with output path and input duration
902
- */
903
- export async function executeNoiseReduce(options: NoiseReduceOptions): Promise<NoiseReduceResult> {
904
- const {
905
- inputPath,
906
- outputPath,
907
- strength = "medium",
908
- noiseFloor,
909
- } = options;
910
-
911
- if (!existsSync(inputPath)) {
912
- return { success: false, error: `File not found: ${inputPath}` };
913
- }
914
-
915
- if (!commandExists("ffmpeg")) {
916
- return { success: false, error: "FFmpeg not found. Please install FFmpeg." };
917
- }
918
-
919
- try {
920
- const inputDuration = await getVideoDuration(inputPath);
921
-
922
- // Map strength to noise floor dB value
923
- const nf = noiseFloor ?? (strength === "low" ? -20 : strength === "high" ? -35 : -25);
924
-
925
- // Build audio filter
926
- let audioFilter = `afftdn=nf=${nf}`;
927
- if (strength === "high") {
928
- audioFilter = `${audioFilter},highpass=f=80,lowpass=f=12000`;
929
- }
930
-
931
- // Check if input has video stream
932
- let hasVideo = false;
933
- try {
934
- const { stdout } = await execSafe("ffprobe", [
935
- "-v", "error", "-select_streams", "v", "-show_entries", "stream=codec_type", "-of", "csv=p=0", inputPath,
936
- ], { maxBuffer: 10 * 1024 * 1024 });
937
- hasVideo = stdout.trim().includes("video");
938
- } catch {
939
- // No video stream
940
- }
941
-
942
- const args = ["-i", inputPath, "-af", audioFilter];
943
- if (hasVideo) args.push("-c:v", "copy");
944
- args.push(outputPath, "-y");
945
- await execSafe("ffmpeg", args, { timeout: 600000, maxBuffer: 50 * 1024 * 1024 });
946
-
947
- return {
948
- success: true,
949
- outputPath,
950
- inputDuration,
951
- };
952
- } catch (error) {
953
- return {
954
- success: false,
955
- error: `Noise reduction failed: ${error instanceof Error ? error.message : String(error)}`,
956
- };
957
- }
958
- }
959
-
960
- // ============================================================================
961
- // Fade
962
- // ============================================================================
963
-
964
- /** Options for {@link executeFade}. */
965
- export interface FadeOptions {
966
- /** Path to the input video file */
967
- videoPath: string;
968
- /** Path for the output video with fade effects */
969
- outputPath: string;
970
- /** Fade-in duration in seconds (default: 1) */
971
- fadeIn?: number;
972
- /** Fade-out duration in seconds (default: 1) */
973
- fadeOut?: number;
974
- /** Apply fade to audio only (video copied) */
975
- audioOnly?: boolean;
976
- /** Apply fade to video only (audio copied) */
977
- videoOnly?: boolean;
978
- }
979
-
980
- /** Result from {@link executeFade}. */
981
- export interface FadeResult {
982
- /** Whether the operation succeeded */
983
- success: boolean;
984
- /** Path to the output video */
985
- outputPath?: string;
986
- /** Total duration of the source video in seconds */
987
- totalDuration?: number;
988
- /** Whether fade-in was applied */
989
- fadeInApplied?: boolean;
990
- /** Whether fade-out was applied */
991
- fadeOutApplied?: boolean;
992
- /** Error message on failure */
993
- error?: string;
994
- }
995
-
996
- /**
997
- * Apply fade-in and/or fade-out effects to video and/or audio using FFmpeg.
998
- *
999
- * @param options - Fade configuration
1000
- * @returns Result with output path and which fades were applied
1001
- */
1002
- export async function executeFade(options: FadeOptions): Promise<FadeResult> {
1003
- const {
1004
- videoPath,
1005
- outputPath,
1006
- fadeIn = 1,
1007
- fadeOut = 1,
1008
- audioOnly = false,
1009
- videoOnly = false,
1010
- } = options;
1011
-
1012
- if (!existsSync(videoPath)) {
1013
- return { success: false, error: `Video not found: ${videoPath}` };
1014
- }
1015
-
1016
- if (!commandExists("ffmpeg")) {
1017
- return { success: false, error: "FFmpeg not found. Please install FFmpeg." };
1018
- }
1019
-
1020
- try {
1021
- const totalDuration = await getVideoDuration(videoPath);
1022
-
1023
- const videoFilters: string[] = [];
1024
- const audioFilters: string[] = [];
1025
-
1026
- // Video fade filters
1027
- if (!audioOnly) {
1028
- if (fadeIn > 0) {
1029
- videoFilters.push(`fade=t=in:st=0:d=${fadeIn}`);
1030
- }
1031
- if (fadeOut > 0) {
1032
- const fadeOutStart = Math.max(0, totalDuration - fadeOut);
1033
- videoFilters.push(`fade=t=out:st=${fadeOutStart}:d=${fadeOut}`);
1034
- }
1035
- }
1036
-
1037
- // Audio fade filters
1038
- if (!videoOnly) {
1039
- if (fadeIn > 0) {
1040
- audioFilters.push(`afade=t=in:st=0:d=${fadeIn}`);
1041
- }
1042
- if (fadeOut > 0) {
1043
- const fadeOutStart = Math.max(0, totalDuration - fadeOut);
1044
- audioFilters.push(`afade=t=out:st=${fadeOutStart}:d=${fadeOut}`);
1045
- }
1046
- }
1047
-
1048
- // Build FFmpeg command
1049
- const ffmpegArgs: string[] = ["-i", videoPath];
1050
-
1051
- if (videoFilters.length > 0) {
1052
- ffmpegArgs.push("-vf", videoFilters.join(","));
1053
- } else if (audioOnly) {
1054
- ffmpegArgs.push("-c:v", "copy");
1055
- }
1056
-
1057
- if (audioFilters.length > 0) {
1058
- ffmpegArgs.push("-af", audioFilters.join(","));
1059
- } else if (videoOnly) {
1060
- ffmpegArgs.push("-c:a", "copy");
1061
- }
1062
-
1063
- ffmpegArgs.push(outputPath, "-y");
1064
-
1065
- await execSafe("ffmpeg", ffmpegArgs, { timeout: 600000, maxBuffer: 50 * 1024 * 1024 });
1066
-
1067
- return {
1068
- success: true,
1069
- outputPath,
1070
- totalDuration,
1071
- fadeInApplied: fadeIn > 0,
1072
- fadeOutApplied: fadeOut > 0,
1073
- };
1074
- } catch (error) {
1075
- return {
1076
- success: false,
1077
- error: `Fade failed: ${error instanceof Error ? error.message : String(error)}`,
1078
- };
1079
- }
1080
- }
1081
- // ============================================================================
1082
- // Translate SRT
1083
- // ============================================================================
1084
-
1085
- /** Options for {@link executeTranslateSrt}. */
1086
- export interface TranslateSrtOptions {
1087
- /** Path to the source SRT subtitle file */
1088
- srtPath: string;
1089
- /** Path for the translated SRT output */
1090
- outputPath: string;
1091
- /** Target language name (e.g. "Korean", "Spanish") */
1092
- targetLanguage: string;
1093
- /** LLM provider for translation (default: "claude") */
1094
- provider?: "claude" | "openai";
1095
- /** Source language hint (auto-detected if omitted) */
1096
- sourceLanguage?: string;
1097
- /** Override API key for the chosen provider */
1098
- apiKey?: string;
1099
- }
1100
-
1101
- /** Result from {@link executeTranslateSrt}. */
1102
- export interface TranslateSrtResult {
1103
- /** Whether the operation succeeded */
1104
- success: boolean;
1105
- /** Path to the translated SRT file */
1106
- outputPath?: string;
1107
- /** Number of subtitle segments translated */
1108
- segmentCount?: number;
1109
- /** Detected or specified source language */
1110
- sourceLanguage?: string;
1111
- /** Target language used for translation */
1112
- targetLanguage?: string;
1113
- /** Error message on failure */
1114
- error?: string;
1115
- }
1116
-
1117
- /**
1118
- * Translate an SRT subtitle file to a target language using Claude or OpenAI.
1119
- *
1120
- * Segments are batched (~30 at a time) for efficient API usage. Preserves
1121
- * original timestamps; only text content is translated.
1122
- *
1123
- * @param options - Translation configuration
1124
- * @returns Result with output path and segment count
1125
- */
1126
- export async function executeTranslateSrt(options: TranslateSrtOptions): Promise<TranslateSrtResult> {
1127
- const {
1128
- srtPath,
1129
- outputPath,
1130
- targetLanguage,
1131
- provider = "claude",
1132
- sourceLanguage,
1133
- apiKey,
1134
- } = options;
1135
-
1136
- if (!existsSync(srtPath)) {
1137
- return { success: false, error: `SRT file not found: ${srtPath}` };
1138
- }
1139
-
1140
- try {
1141
- const srtContent = await readFile(srtPath, "utf-8");
1142
- const segments = parseSRT(srtContent);
1143
-
1144
- if (segments.length === 0) {
1145
- return { success: false, error: "No subtitle segments found in SRT file" };
1146
- }
1147
-
1148
- // Batch translate segments (~30 at a time)
1149
- const batchSize = 30;
1150
- const translatedSegments: { startTime: number; endTime: number; text: string }[] = [];
1151
-
1152
- for (let i = 0; i < segments.length; i += batchSize) {
1153
- const batch = segments.slice(i, i + batchSize);
1154
- const textsToTranslate = batch.map((s, idx) => `[${idx}] ${s.text}`).join("\n");
1155
-
1156
- const translatePrompt =
1157
- `Translate the following subtitle texts to ${targetLanguage}.` +
1158
- (sourceLanguage ? ` The source language is ${sourceLanguage}.` : "") +
1159
- ` Return ONLY the translated texts, one per line, preserving the [N] prefix format exactly. ` +
1160
- `Do not add explanations.\n\n${textsToTranslate}`;
1161
-
1162
- let translatedText: string;
1163
-
1164
- if (provider === "openai") {
1165
- const openaiKey = apiKey || process.env.OPENAI_API_KEY;
1166
- if (!openaiKey) {
1167
- return { success: false, error: "OpenAI API key required for translation. Run 'vibe setup' or set OPENAI_API_KEY in .env" };
1168
- }
1169
- const response = await fetch("https://api.openai.com/v1/chat/completions", {
1170
- method: "POST",
1171
- headers: {
1172
- "Content-Type": "application/json",
1173
- Authorization: `Bearer ${openaiKey}`,
1174
- },
1175
- body: JSON.stringify({
1176
- model: "gpt-5-mini",
1177
- messages: [{ role: "user", content: translatePrompt }],
1178
- temperature: 0.3,
1179
- }),
1180
- });
1181
- if (!response.ok) {
1182
- return { success: false, error: `OpenAI API error: ${response.status} ${response.statusText}` };
1183
- }
1184
- const data = await response.json() as { choices: Array<{ message: { content: string } }> };
1185
- translatedText = data.choices[0]?.message?.content || "";
1186
- } else {
1187
- const claudeKey = apiKey || process.env.ANTHROPIC_API_KEY;
1188
- if (!claudeKey) {
1189
- return { success: false, error: "Anthropic API key required for translation. Run 'vibe setup' or set ANTHROPIC_API_KEY in .env" };
1190
- }
1191
- const response = await fetch("https://api.anthropic.com/v1/messages", {
1192
- method: "POST",
1193
- headers: {
1194
- "Content-Type": "application/json",
1195
- "x-api-key": claudeKey,
1196
- "anthropic-version": "2023-06-01",
1197
- },
1198
- body: JSON.stringify({
1199
- model: "claude-sonnet-4-6-20250514",
1200
- max_tokens: 4096,
1201
- messages: [{ role: "user", content: translatePrompt }],
1202
- }),
1203
- });
1204
- if (!response.ok) {
1205
- return { success: false, error: `Claude API error: ${response.status} ${response.statusText}` };
1206
- }
1207
- const data = await response.json() as { content: Array<{ type: string; text: string }> };
1208
- translatedText = data.content?.find((c) => c.type === "text")?.text || "";
1209
- }
1210
-
1211
- // Parse translated lines
1212
- const translatedLines = translatedText.trim().split("\n");
1213
- for (let j = 0; j < batch.length; j++) {
1214
- const seg = batch[j];
1215
- // Try to match [N] prefix
1216
- const line = translatedLines[j];
1217
- let text: string;
1218
- if (line) {
1219
- text = line.replace(/^\[\d+\]\s*/, "").trim();
1220
- } else {
1221
- // Fallback: use original text if translation is missing
1222
- text = seg.text;
1223
- }
1224
- translatedSegments.push({
1225
- startTime: seg.startTime,
1226
- endTime: seg.endTime,
1227
- text,
1228
- });
1229
- }
1230
- }
1231
-
1232
- // Format as SRT and write
1233
- const translatedSrt = formatSRT(translatedSegments);
1234
- await writeFile(outputPath, translatedSrt);
1235
-
1236
- return {
1237
- success: true,
1238
- outputPath,
1239
- segmentCount: translatedSegments.length,
1240
- sourceLanguage: sourceLanguage || "auto",
1241
- targetLanguage,
1242
- };
1243
- } catch (error) {
1244
- return {
1245
- success: false,
1246
- error: `Translation failed: ${error instanceof Error ? error.message : String(error)}`,
1247
- };
1248
- }
1249
- }
1250
-
1251
- // ============================================================================
1252
- // Text Overlay
1253
- // ============================================================================
1254
-
1255
- /** Visual style preset for text overlays. */
1256
- export type TextOverlayStyle = "lower-third" | "center-bold" | "subtitle" | "minimal";
1257
-
1258
- /** Options for {@link applyTextOverlays} and {@link executeTextOverlay}. */
1259
- export interface TextOverlayOptions {
1260
- /** Path to the input video file */
1261
- videoPath: string;
1262
- /** Array of text lines to overlay */
1263
- texts: string[];
1264
- /** Path for the output video */
1265
- outputPath: string;
1266
- /** Text overlay style preset (default: "lower-third") */
1267
- style?: TextOverlayStyle;
1268
- /** Font size override (auto-calculated from video height if omitted) */
1269
- fontSize?: number;
1270
- /** Font color name (default: "white") */
1271
- fontColor?: string;
1272
- /** Fade in/out duration for text in seconds (default: 0.3) */
1273
- fadeDuration?: number;
1274
- /** Start time for text display in seconds (default: 0) */
1275
- startTime?: number;
1276
- /** End time for text display in seconds (default: video duration) */
1277
- endTime?: number;
1278
- }
1279
-
1280
- /** Result from {@link applyTextOverlays} and {@link executeTextOverlay}. */
1281
- export interface TextOverlayResult {
1282
- /** Whether the operation succeeded */
1283
- success: boolean;
1284
- /** Absolute path to the output video */
1285
- outputPath?: string;
1286
- /** Error message on failure */
1287
- error?: string;
1288
- }
1289
-
1290
- /**
1291
- * Detect system font path for FFmpeg drawtext
1292
- */
1293
- function detectSystemFont(): string | null {
1294
- const platform = process.platform;
1295
- if (platform === "darwin") {
1296
- const candidates = [
1297
- "/System/Library/Fonts/Helvetica.ttc",
1298
- "/System/Library/Fonts/HelveticaNeue.ttc",
1299
- "/Library/Fonts/Arial.ttf",
1300
- ];
1301
- for (const f of candidates) {
1302
- if (existsSync(f)) return f;
1303
- }
1304
- } else if (platform === "linux") {
1305
- const candidates = [
1306
- "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
1307
- "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
1308
- "/usr/share/fonts/TTF/DejaVuSans-Bold.ttf",
1309
- ];
1310
- for (const f of candidates) {
1311
- if (existsSync(f)) return f;
1312
- }
1313
- } else if (platform === "win32") {
1314
- const candidates = [
1315
- "C:\\Windows\\Fonts\\arial.ttf",
1316
- "C:\\Windows\\Fonts\\segoeui.ttf",
1317
- ];
1318
- for (const f of candidates) {
1319
- if (existsSync(f)) return f;
1320
- }
1321
- }
1322
- return null;
1323
- }
1324
-
1325
- /**
1326
- * Get video resolution via ffprobe
1327
- */
1328
- async function getVideoResolution(videoPath: string): Promise<{ width: number; height: number }> {
1329
- const { stdout } = await execSafe("ffprobe", [
1330
- "-v", "error", "-select_streams", "v:0", "-show_entries", "stream=width,height", "-of", "csv=p=0", videoPath,
1331
- ]);
1332
- const [w, h] = stdout.trim().split(",").map(Number);
1333
- return { width: w || 1920, height: h || 1080 };
1334
- }
1335
-
1336
- /**
1337
- * Escape text for FFmpeg drawtext filter
1338
- */
1339
- function escapeDrawtext(text: string): string {
1340
- return text
1341
- .replace(/\\/g, "\\\\\\\\")
1342
- .replace(/'/g, "'\\\\\\''")
1343
- .replace(/:/g, "\\\\:")
1344
- .replace(/%/g, "\\\\%");
1345
- }
1346
-
1347
- /**
1348
- * Apply text overlays to a video using FFmpeg drawtext filter.
1349
- *
1350
- * Supports multiple text lines with configurable style, position, font,
1351
- * and fade-in/out. Auto-detects system fonts across macOS, Linux, and Windows.
1352
- *
1353
- * @param options - Text overlay configuration
1354
- * @returns Result with absolute output path
1355
- */
1356
- export async function applyTextOverlays(options: TextOverlayOptions): Promise<TextOverlayResult> {
1357
- const {
1358
- videoPath,
1359
- texts,
1360
- outputPath,
1361
- style = "lower-third",
1362
- fontSize: customFontSize,
1363
- fontColor = "white",
1364
- fadeDuration = 0.3,
1365
- startTime = 0,
1366
- } = options;
1367
-
1368
- if (!texts || texts.length === 0) {
1369
- return { success: false, error: "No texts provided" };
1370
- }
1371
-
1372
- const absVideoPath = resolve(process.cwd(), videoPath);
1373
- const absOutputPath = resolve(process.cwd(), outputPath);
1374
-
1375
- // Check video exists
1376
- if (!existsSync(absVideoPath)) {
1377
- return { success: false, error: `Video not found: ${absVideoPath}` };
1378
- }
1379
-
1380
- // Check FFmpeg
1381
- if (!commandExists("ffmpeg")) {
1382
- return { success: false, error: "FFmpeg not found. Please install FFmpeg." };
1383
- }
1384
-
1385
- // Check drawtext filter availability
1386
- try {
1387
- const { stdout } = await execSafe("ffmpeg", ["-filters"]);
1388
- if (!stdout.includes("drawtext")) {
1389
- const platform = process.platform;
1390
- let hint = "";
1391
- if (platform === "darwin") {
1392
- hint = "\n\nFix: brew uninstall ffmpeg && brew install ffmpeg\n(The default homebrew formula includes libfreetype)";
1393
- } else if (platform === "linux") {
1394
- hint = "\n\nFix: sudo apt install ffmpeg (Ubuntu/Debian)\n or rebuild FFmpeg with --enable-libfreetype";
1395
- }
1396
- return {
1397
- success: false,
1398
- error: `FFmpeg 'drawtext' filter not available. Your FFmpeg was built without libfreetype.${hint}`,
1399
- };
1400
- }
1401
- } catch {
1402
- // If filter check fails, continue and let FFmpeg error naturally
1403
- }
1404
-
1405
- // Get video resolution for scaling
1406
- const { width, height } = await getVideoResolution(absVideoPath);
1407
- const baseFontSize = customFontSize || Math.round(height / 20);
1408
-
1409
- // Get video duration for endTime default
1410
- const videoDuration = await getVideoDuration(absVideoPath);
1411
- const endTime = options.endTime ?? videoDuration;
1412
-
1413
- // Detect font
1414
- const fontPath = detectSystemFont();
1415
- const fontFile = fontPath ? `fontfile=${fontPath}:` : "";
1416
-
1417
- // Build drawtext filters based on style
1418
- const filters: string[] = [];
1419
-
1420
- for (let i = 0; i < texts.length; i++) {
1421
- const escaped = escapeDrawtext(texts[i]);
1422
- let x: string;
1423
- let y: string;
1424
- let fs: number;
1425
- let fc: string = fontColor;
1426
- let boxEnabled = 0;
1427
- let boxColor = "black@0.5";
1428
- let borderW = 0;
1429
-
1430
- switch (style) {
1431
- case "center-bold":
1432
- x = "(w-text_w)/2";
1433
- y = `(h-text_h)/2+${i * Math.round(baseFontSize * 1.4)}`;
1434
- fs = Math.round(baseFontSize * 1.5);
1435
- borderW = 3;
1436
- break;
1437
- case "subtitle":
1438
- x = "(w-text_w)/2";
1439
- y = `h-${Math.round(height * 0.12)}+${i * Math.round(baseFontSize * 1.3)}`;
1440
- fs = baseFontSize;
1441
- boxEnabled = 1;
1442
- boxColor = "black@0.6";
1443
- break;
1444
- case "minimal":
1445
- x = `${Math.round(width * 0.05)}`;
1446
- y = `${Math.round(height * 0.05)}+${i * Math.round(baseFontSize * 1.3)}`;
1447
- fs = Math.round(baseFontSize * 0.8);
1448
- fc = "white@0.85";
1449
- break;
1450
- case "lower-third":
1451
- default:
1452
- x = `${Math.round(width * 0.05)}`;
1453
- y = `h-${Math.round(height * 0.18)}+${i * Math.round(baseFontSize * 1.3)}`;
1454
- fs = i === 0 ? Math.round(baseFontSize * 1.2) : baseFontSize;
1455
- boxEnabled = 1;
1456
- boxColor = "black@0.5";
1457
- break;
1458
- }
1459
-
1460
- // Build alpha expression for fade in/out
1461
- const fadeIn = `if(lt(t-${startTime}\\,${fadeDuration})\\,(t-${startTime})/${fadeDuration}\\,1)`;
1462
- const fadeOut = `if(gt(t\\,${endTime - fadeDuration})\\,( ${endTime}-t)/${fadeDuration}\\,1)`;
1463
- const alpha = `min(${fadeIn}\\,${fadeOut})`;
1464
-
1465
- let filter = `drawtext=${fontFile}text='${escaped}':fontsize=${fs}:fontcolor=${fc}:x=${x}:y=${y}:borderw=${borderW}:enable='between(t\\,${startTime}\\,${endTime})'`;
1466
- filter += `:alpha='${alpha}'`;
1467
- if (boxEnabled) {
1468
- filter += `:box=1:boxcolor=${boxColor}:boxborderw=8`;
1469
- }
1470
-
1471
- filters.push(filter);
1472
- }
1473
-
1474
- const filterChain = filters.join(",");
1475
- try {
1476
- await execSafe("ffmpeg", [
1477
- "-i", absVideoPath, "-vf", filterChain, "-c:a", "copy", absOutputPath, "-y",
1478
- ], { timeout: 600000, maxBuffer: 50 * 1024 * 1024 });
1479
- return { success: true, outputPath: absOutputPath };
1480
- } catch (error) {
1481
- return {
1482
- success: false,
1483
- error: `FFmpeg failed: ${error instanceof Error ? error.message : String(error)}`,
1484
- };
1485
- }
1486
- }
1487
-
1488
- /**
1489
- * Execute text overlay for CLI/Agent usage. Delegates to {@link applyTextOverlays}.
1490
- *
1491
- * @param options - Text overlay configuration
1492
- * @returns Result with absolute output path
1493
- */
1494
- export async function executeTextOverlay(options: TextOverlayOptions): Promise<TextOverlayResult> {
1495
- return applyTextOverlays(options);
1496
- }
1497
-
1498
- // ============================================================================
1499
- // Video Review (Gemini)
1500
- // ============================================================================
1501
-
1502
- /** A single auto-fixable issue identified during video review. */
1503
- export interface AutoFix {
1504
- /** Category of the fix */
1505
- type: "color_grade" | "text_overlay_adjust" | "speed_adjust" | "crop";
1506
- /** Human-readable description of the issue */
1507
- description: string;
1508
- /** FFmpeg filter string to apply the fix (if applicable) */
1509
- ffmpegFilter?: string;
1510
- }
1511
-
1512
- /** Scored review for a single quality category. */
1513
- export interface VideoReviewCategory {
1514
- /** Quality score from 1-10 */
1515
- score: number;
1516
- /** List of identified issues */
1517
- issues: string[];
1518
- /** Whether the issues can be auto-fixed */
1519
- fixable: boolean;
1520
- /** Suggested FFmpeg filter for fixing (color category) */
1521
- suggestedFilter?: string;
1522
- /** Improvement suggestions (text readability category) */
1523
- suggestions?: string[];
1524
- }
1525
-
1526
- /** Complete AI video review feedback from Gemini analysis. */
1527
- export interface VideoReviewFeedback {
1528
- /** Overall quality score from 1-10 */
1529
- overallScore: number;
1530
- /** Per-category quality assessments */
1531
- categories: {
1532
- pacing: VideoReviewCategory;
1533
- color: VideoReviewCategory;
1534
- textReadability: VideoReviewCategory;
1535
- audioVisualSync: VideoReviewCategory;
1536
- composition: VideoReviewCategory;
1537
- };
1538
- /** List of auto-fixable issues with FFmpeg filter suggestions */
1539
- autoFixable: AutoFix[];
1540
- /** General improvement recommendations */
1541
- recommendations: string[];
1542
- }