@vibeframe/cli 0.27.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/agent/adapters/index.d.ts +1 -0
- package/dist/agent/adapters/index.d.ts.map +1 -1
- package/dist/agent/adapters/index.js +5 -0
- package/dist/agent/adapters/index.js.map +1 -1
- package/dist/agent/adapters/openrouter.d.ts +16 -0
- package/dist/agent/adapters/openrouter.d.ts.map +1 -0
- package/dist/agent/adapters/openrouter.js +100 -0
- package/dist/agent/adapters/openrouter.js.map +1 -0
- package/dist/agent/types.d.ts +1 -1
- package/dist/agent/types.d.ts.map +1 -1
- package/dist/commands/agent.d.ts.map +1 -1
- package/dist/commands/agent.js +3 -1
- package/dist/commands/agent.js.map +1 -1
- package/dist/commands/setup.js +5 -2
- package/dist/commands/setup.js.map +1 -1
- package/dist/config/schema.d.ts +2 -1
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +2 -0
- package/dist/config/schema.js.map +1 -1
- package/dist/index.js +0 -0
- package/package.json +16 -12
- package/.turbo/turbo-build.log +0 -4
- package/.turbo/turbo-lint.log +0 -21
- package/.turbo/turbo-test.log +0 -689
- package/src/agent/adapters/claude.ts +0 -143
- package/src/agent/adapters/gemini.ts +0 -159
- package/src/agent/adapters/index.ts +0 -61
- package/src/agent/adapters/ollama.ts +0 -231
- package/src/agent/adapters/openai.ts +0 -116
- package/src/agent/adapters/xai.ts +0 -119
- package/src/agent/index.ts +0 -251
- package/src/agent/memory/index.ts +0 -151
- package/src/agent/prompts/system.ts +0 -106
- package/src/agent/tools/ai-editing.ts +0 -845
- package/src/agent/tools/ai-generation.ts +0 -1073
- package/src/agent/tools/ai-pipeline.ts +0 -1055
- package/src/agent/tools/ai.ts +0 -21
- package/src/agent/tools/batch.ts +0 -429
- package/src/agent/tools/e2e.test.ts +0 -545
- package/src/agent/tools/export.ts +0 -184
- package/src/agent/tools/filesystem.ts +0 -237
- package/src/agent/tools/index.ts +0 -150
- package/src/agent/tools/integration.test.ts +0 -775
- package/src/agent/tools/media.ts +0 -697
- package/src/agent/tools/project.ts +0 -313
- package/src/agent/tools/timeline.ts +0 -951
- package/src/agent/types.ts +0 -68
- package/src/commands/agent.ts +0 -340
- package/src/commands/ai-analyze.ts +0 -429
- package/src/commands/ai-animated-caption.ts +0 -390
- package/src/commands/ai-audio.ts +0 -941
- package/src/commands/ai-broll.ts +0 -490
- package/src/commands/ai-edit-cli.ts +0 -658
- package/src/commands/ai-edit.ts +0 -1542
- package/src/commands/ai-fill-gaps.ts +0 -566
- package/src/commands/ai-helpers.ts +0 -65
- package/src/commands/ai-highlights.ts +0 -1303
- package/src/commands/ai-image.ts +0 -761
- package/src/commands/ai-motion.ts +0 -347
- package/src/commands/ai-narrate.ts +0 -451
- package/src/commands/ai-review.ts +0 -309
- package/src/commands/ai-script-pipeline-cli.ts +0 -1710
- package/src/commands/ai-script-pipeline.ts +0 -1365
- package/src/commands/ai-suggest-edit.ts +0 -264
- package/src/commands/ai-video-fx.ts +0 -445
- package/src/commands/ai-video.ts +0 -915
- package/src/commands/ai-viral.ts +0 -595
- package/src/commands/ai-visual-fx.ts +0 -601
- package/src/commands/ai.test.ts +0 -627
- package/src/commands/ai.ts +0 -307
- package/src/commands/analyze.ts +0 -282
- package/src/commands/audio.ts +0 -644
- package/src/commands/batch.test.ts +0 -279
- package/src/commands/batch.ts +0 -440
- package/src/commands/detect.ts +0 -329
- package/src/commands/doctor.ts +0 -237
- package/src/commands/edit-cmd.ts +0 -1014
- package/src/commands/export.ts +0 -918
- package/src/commands/generate.ts +0 -2146
- package/src/commands/media.ts +0 -177
- package/src/commands/output.ts +0 -142
- package/src/commands/pipeline.ts +0 -398
- package/src/commands/project.test.ts +0 -127
- package/src/commands/project.ts +0 -149
- package/src/commands/sanitize.ts +0 -60
- package/src/commands/schema.ts +0 -130
- package/src/commands/setup.ts +0 -509
- package/src/commands/timeline.test.ts +0 -499
- package/src/commands/timeline.ts +0 -529
- package/src/commands/validate.ts +0 -77
- package/src/config/config.test.ts +0 -197
- package/src/config/index.ts +0 -125
- package/src/config/schema.ts +0 -82
- package/src/engine/index.ts +0 -2
- package/src/engine/project.test.ts +0 -702
- package/src/engine/project.ts +0 -439
- package/src/index.ts +0 -146
- package/src/utils/api-key.test.ts +0 -41
- package/src/utils/api-key.ts +0 -247
- package/src/utils/audio.ts +0 -83
- package/src/utils/exec-safe.ts +0 -75
- package/src/utils/first-run.ts +0 -52
- package/src/utils/provider-resolver.ts +0 -56
- package/src/utils/remotion.ts +0 -951
- package/src/utils/subtitle.test.ts +0 -227
- package/src/utils/subtitle.ts +0 -169
- package/src/utils/tty.ts +0 -196
- package/tsconfig.json +0 -20
package/src/commands/ai-edit.ts
DELETED
|
@@ -1,1542 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @module ai-edit
|
|
3
|
-
*
|
|
4
|
-
* Video/audio editing execute functions and supporting types.
|
|
5
|
-
*
|
|
6
|
-
* CLI commands: silence-cut, jump-cut, caption, noise-reduce, fade,
|
|
7
|
-
* translate-srt, text-overlay
|
|
8
|
-
*
|
|
9
|
-
* Execute functions (also used by agent tools via ai.ts re-exports):
|
|
10
|
-
* executeSilenceCut, executeJumpCut, executeCaption, executeNoiseReduce,
|
|
11
|
-
* executeFade, executeTranslateSrt, applyTextOverlays, executeTextOverlay
|
|
12
|
-
*
|
|
13
|
-
* CLI command registrations live in ai-edit-cli.ts (registerEditCommands).
|
|
14
|
-
* Extracted from ai.ts as part of modularisation.
|
|
15
|
-
*
|
|
16
|
-
* @dependencies FFmpeg, Whisper (OpenAI), Gemini (Google), Claude/OpenAI (translation)
|
|
17
|
-
*/
|
|
18
|
-
|
|
19
|
-
import { resolve, dirname, basename, extname, join } from 'node:path';
|
|
20
|
-
import { readFile, writeFile, mkdir } from 'node:fs/promises';
|
|
21
|
-
import { existsSync } from 'node:fs';
|
|
22
|
-
import {
|
|
23
|
-
GeminiProvider,
|
|
24
|
-
WhisperProvider,
|
|
25
|
-
} from '@vibeframe/ai-providers';
|
|
26
|
-
import { getApiKey } from '../utils/api-key.js';
|
|
27
|
-
import { getVideoDuration } from '../utils/audio.js';
|
|
28
|
-
import { formatSRT, parseSRT } from '../utils/subtitle.js';
|
|
29
|
-
import { execSafe, commandExists } from '../utils/exec-safe.js';
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
// ── Exported types and execute functions ────────────────────────────────────
|
|
33
|
-
|
|
34
|
-
// ============================================================================
|
|
35
|
-
// Silence Cut
|
|
36
|
-
// ============================================================================
|
|
37
|
-
|
|
38
|
-
/** A detected silent segment within a media file. */
|
|
39
|
-
export interface SilencePeriod {
|
|
40
|
-
/** Start time in seconds */
|
|
41
|
-
start: number;
|
|
42
|
-
/** End time in seconds */
|
|
43
|
-
end: number;
|
|
44
|
-
/** Duration of the silent period in seconds */
|
|
45
|
-
duration: number;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
/** Options for {@link executeSilenceCut}. */
|
|
49
|
-
export interface SilenceCutOptions {
|
|
50
|
-
/** Path to the input video file */
|
|
51
|
-
videoPath: string;
|
|
52
|
-
/** Path for the output video (silent segments removed) */
|
|
53
|
-
outputPath: string;
|
|
54
|
-
/** FFmpeg noise threshold in dB (default: -30) */
|
|
55
|
-
noiseThreshold?: number;
|
|
56
|
-
/** Minimum silence duration in seconds to detect (default: 0.5) */
|
|
57
|
-
minDuration?: number;
|
|
58
|
-
/** Padding in seconds kept around cuts (default: 0.1) */
|
|
59
|
-
padding?: number;
|
|
60
|
-
/** If true, only analyze without producing output video */
|
|
61
|
-
analyzeOnly?: boolean;
|
|
62
|
-
/** Use Gemini multimodal analysis instead of FFmpeg silencedetect */
|
|
63
|
-
useGemini?: boolean;
|
|
64
|
-
/** Gemini model shorthand: "flash", "flash-2.5", "pro" */
|
|
65
|
-
model?: string;
|
|
66
|
-
/** Use low-resolution mode for Gemini (longer videos) */
|
|
67
|
-
lowRes?: boolean;
|
|
68
|
-
/** Override API key (Google for Gemini mode) */
|
|
69
|
-
apiKey?: string;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
/** Result from {@link executeSilenceCut}. */
|
|
73
|
-
export interface SilenceCutResult {
|
|
74
|
-
/** Whether the operation succeeded */
|
|
75
|
-
success: boolean;
|
|
76
|
-
/** Path to the output video (undefined in analyze-only mode) */
|
|
77
|
-
outputPath?: string;
|
|
78
|
-
/** Total duration of the source video in seconds */
|
|
79
|
-
totalDuration?: number;
|
|
80
|
-
/** Detected silent periods */
|
|
81
|
-
silentPeriods?: SilencePeriod[];
|
|
82
|
-
/** Total silent duration removed in seconds */
|
|
83
|
-
silentDuration?: number;
|
|
84
|
-
/** Detection method used */
|
|
85
|
-
method?: "ffmpeg" | "gemini";
|
|
86
|
-
/** Error message on failure */
|
|
87
|
-
error?: string;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
/**
|
|
91
|
-
* Detect silent periods in a media file using FFmpeg silencedetect
|
|
92
|
-
*/
|
|
93
|
-
async function detectSilencePeriods(
|
|
94
|
-
videoPath: string,
|
|
95
|
-
noiseThreshold: number,
|
|
96
|
-
minDuration: number,
|
|
97
|
-
): Promise<{ periods: SilencePeriod[]; totalDuration: number }> {
|
|
98
|
-
// Get total duration
|
|
99
|
-
const totalDuration = await getVideoDuration(videoPath);
|
|
100
|
-
|
|
101
|
-
// Run silence detection
|
|
102
|
-
const { stdout, stderr } = await execSafe("ffmpeg", [
|
|
103
|
-
"-i", videoPath,
|
|
104
|
-
"-af", `silencedetect=noise=${noiseThreshold}dB:d=${minDuration}`,
|
|
105
|
-
"-f", "null", "-",
|
|
106
|
-
], { maxBuffer: 50 * 1024 * 1024 }).catch((err) => {
|
|
107
|
-
// ffmpeg writes filter output to stderr and exits non-zero with -f null
|
|
108
|
-
if (err.stdout !== undefined || err.stderr !== undefined) {
|
|
109
|
-
return { stdout: err.stdout || "", stderr: err.stderr || "" };
|
|
110
|
-
}
|
|
111
|
-
throw err;
|
|
112
|
-
});
|
|
113
|
-
const silenceOutput = stdout + stderr;
|
|
114
|
-
|
|
115
|
-
const periods: SilencePeriod[] = [];
|
|
116
|
-
const startRegex = /silence_start: (\d+\.?\d*)/g;
|
|
117
|
-
const endRegex = /silence_end: (\d+\.?\d*) \| silence_duration: (\d+\.?\d*)/g;
|
|
118
|
-
|
|
119
|
-
const starts: number[] = [];
|
|
120
|
-
let match;
|
|
121
|
-
while ((match = startRegex.exec(silenceOutput)) !== null) {
|
|
122
|
-
starts.push(parseFloat(match[1]));
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
let i = 0;
|
|
126
|
-
while ((match = endRegex.exec(silenceOutput)) !== null) {
|
|
127
|
-
const end = parseFloat(match[1]);
|
|
128
|
-
const duration = parseFloat(match[2]);
|
|
129
|
-
const start = i < starts.length ? starts[i] : end - duration;
|
|
130
|
-
periods.push({ start, end, duration });
|
|
131
|
-
i++;
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
return { periods, totalDuration };
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
/**
|
|
138
|
-
* Detect silent/dead segments using Gemini Video Understanding (multimodal analysis)
|
|
139
|
-
*/
|
|
140
|
-
async function detectSilencePeriodsWithGemini(
|
|
141
|
-
videoPath: string,
|
|
142
|
-
minDuration: number,
|
|
143
|
-
options: { model?: string; lowRes?: boolean; apiKey?: string },
|
|
144
|
-
): Promise<{ periods: SilencePeriod[]; totalDuration: number }> {
|
|
145
|
-
const totalDuration = await getVideoDuration(videoPath);
|
|
146
|
-
|
|
147
|
-
const geminiApiKey = options.apiKey || await getApiKey("GOOGLE_API_KEY", "Google");
|
|
148
|
-
if (!geminiApiKey) {
|
|
149
|
-
throw new Error("Google API key required for Gemini Video Understanding. Run 'vibe setup' or set GOOGLE_API_KEY in .env");
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
const gemini = new GeminiProvider();
|
|
153
|
-
await gemini.initialize({ apiKey: geminiApiKey });
|
|
154
|
-
|
|
155
|
-
const videoBuffer = await readFile(videoPath);
|
|
156
|
-
|
|
157
|
-
// Map model shorthand to full model ID
|
|
158
|
-
const modelMap: Record<string, string> = {
|
|
159
|
-
flash: "gemini-3-flash-preview",
|
|
160
|
-
"flash-2.5": "gemini-2.5-flash",
|
|
161
|
-
pro: "gemini-2.5-pro",
|
|
162
|
-
};
|
|
163
|
-
const modelId = options.model ? (modelMap[options.model] || modelMap.flash) : undefined;
|
|
164
|
-
|
|
165
|
-
const prompt = `Analyze this video and identify all silent or dead segments where there is NO meaningful content.
|
|
166
|
-
|
|
167
|
-
Detect these as silent/dead segments:
|
|
168
|
-
- Complete silence (no audio at all)
|
|
169
|
-
- Dead air / ambient noise with no speech or meaningful sound
|
|
170
|
-
- Long pauses between speakers or topics (${minDuration}+ seconds)
|
|
171
|
-
- Technical silence (e.g., blank screen with no audio)
|
|
172
|
-
- Sections with only background noise and no intentional content
|
|
173
|
-
|
|
174
|
-
Do NOT mark these as silent (keep them):
|
|
175
|
-
- Intentional dramatic pauses (short, part of storytelling)
|
|
176
|
-
- Music-only sections (background music, intros, outros)
|
|
177
|
-
- Natural breathing pauses within sentences (under ${minDuration} seconds)
|
|
178
|
-
- Applause, laughter, or audience reactions
|
|
179
|
-
- Sound effects or ambient audio that is part of the content
|
|
180
|
-
|
|
181
|
-
Only include segments that are at least ${minDuration} seconds long.
|
|
182
|
-
The video total duration is ${totalDuration.toFixed(1)} seconds.
|
|
183
|
-
|
|
184
|
-
IMPORTANT: Respond ONLY with valid JSON in this exact format:
|
|
185
|
-
{
|
|
186
|
-
"silentSegments": [
|
|
187
|
-
{
|
|
188
|
-
"start": 5.2,
|
|
189
|
-
"end": 8.7,
|
|
190
|
-
"reason": "Dead air between speakers"
|
|
191
|
-
}
|
|
192
|
-
]
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
If there are no silent segments, return: { "silentSegments": [] }`;
|
|
196
|
-
|
|
197
|
-
const result = await gemini.analyzeVideo(videoBuffer, prompt, {
|
|
198
|
-
fps: 1,
|
|
199
|
-
lowResolution: options.lowRes,
|
|
200
|
-
...(modelId ? { model: modelId as "gemini-3-flash-preview" | "gemini-2.5-flash" | "gemini-2.5-pro" } : {}),
|
|
201
|
-
});
|
|
202
|
-
|
|
203
|
-
if (!result.success || !result.response) {
|
|
204
|
-
throw new Error(`Gemini analysis failed: ${result.error || "No response"}`);
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
// Parse JSON from Gemini response
|
|
208
|
-
let jsonStr = result.response;
|
|
209
|
-
const jsonMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
210
|
-
if (jsonMatch) jsonStr = jsonMatch[1];
|
|
211
|
-
const objectMatch = jsonStr.match(/\{[\s\S]*"silentSegments"[\s\S]*\}/);
|
|
212
|
-
if (objectMatch) jsonStr = objectMatch[0];
|
|
213
|
-
|
|
214
|
-
const parsed = JSON.parse(jsonStr);
|
|
215
|
-
|
|
216
|
-
const periods: SilencePeriod[] = [];
|
|
217
|
-
if (parsed.silentSegments && Array.isArray(parsed.silentSegments)) {
|
|
218
|
-
for (const seg of parsed.silentSegments) {
|
|
219
|
-
const rawStart = Number(seg.start);
|
|
220
|
-
const rawEnd = Number(seg.end);
|
|
221
|
-
if (isNaN(rawStart) || isNaN(rawEnd)) continue;
|
|
222
|
-
// Clamp to video duration, then validate
|
|
223
|
-
const start = Math.max(0, rawStart);
|
|
224
|
-
const end = Math.min(rawEnd, totalDuration);
|
|
225
|
-
const duration = end - start;
|
|
226
|
-
if (duration >= minDuration) {
|
|
227
|
-
periods.push({ start, end, duration });
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
// Sort by start time
|
|
233
|
-
periods.sort((a, b) => a.start - b.start);
|
|
234
|
-
|
|
235
|
-
return { periods, totalDuration };
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
/**
|
|
239
|
-
* Remove silent segments from a video using FFmpeg or Gemini detection.
|
|
240
|
-
*
|
|
241
|
-
* Detects silence via FFmpeg silencedetect (default) or Gemini multimodal
|
|
242
|
-
* analysis, then trims and concatenates the non-silent segments.
|
|
243
|
-
*
|
|
244
|
-
* @param options - Silence cut configuration
|
|
245
|
-
* @returns Result with output path and detected silent periods
|
|
246
|
-
*/
|
|
247
|
-
export async function executeSilenceCut(options: SilenceCutOptions): Promise<SilenceCutResult> {
|
|
248
|
-
const {
|
|
249
|
-
videoPath,
|
|
250
|
-
outputPath,
|
|
251
|
-
noiseThreshold = -30,
|
|
252
|
-
minDuration = 0.5,
|
|
253
|
-
padding = 0.1,
|
|
254
|
-
analyzeOnly = false,
|
|
255
|
-
useGemini = false,
|
|
256
|
-
} = options;
|
|
257
|
-
|
|
258
|
-
if (!existsSync(videoPath)) {
|
|
259
|
-
return { success: false, error: `Video not found: ${videoPath}` };
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
if (!commandExists("ffmpeg")) {
|
|
263
|
-
return { success: false, error: "FFmpeg not found. Please install FFmpeg." };
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
const method = useGemini ? "gemini" : "ffmpeg";
|
|
267
|
-
|
|
268
|
-
try {
|
|
269
|
-
const { periods, totalDuration } = useGemini
|
|
270
|
-
? await detectSilencePeriodsWithGemini(videoPath, minDuration, {
|
|
271
|
-
model: options.model,
|
|
272
|
-
lowRes: options.lowRes,
|
|
273
|
-
apiKey: options.apiKey,
|
|
274
|
-
})
|
|
275
|
-
: await detectSilencePeriods(videoPath, noiseThreshold, minDuration);
|
|
276
|
-
const silentDuration = periods.reduce((sum, p) => sum + p.duration, 0);
|
|
277
|
-
|
|
278
|
-
if (analyzeOnly || periods.length === 0) {
|
|
279
|
-
return {
|
|
280
|
-
success: true,
|
|
281
|
-
totalDuration,
|
|
282
|
-
silentPeriods: periods,
|
|
283
|
-
silentDuration,
|
|
284
|
-
method,
|
|
285
|
-
};
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
// Compute non-silent segments with padding
|
|
289
|
-
const segments: { start: number; end: number }[] = [];
|
|
290
|
-
let cursor = 0;
|
|
291
|
-
|
|
292
|
-
for (const period of periods) {
|
|
293
|
-
const segEnd = Math.min(period.start + padding, totalDuration);
|
|
294
|
-
if (segEnd > cursor) {
|
|
295
|
-
segments.push({ start: Math.max(0, cursor - padding), end: segEnd });
|
|
296
|
-
}
|
|
297
|
-
cursor = period.end;
|
|
298
|
-
}
|
|
299
|
-
// Add final segment after last silence
|
|
300
|
-
if (cursor < totalDuration) {
|
|
301
|
-
segments.push({ start: Math.max(0, cursor - padding), end: totalDuration });
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
if (segments.length === 0) {
|
|
305
|
-
return { success: false, error: "No non-silent segments found" };
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
// Build filter_complex with trim+concat per segment.
|
|
309
|
-
// aselect is broken on FFmpeg 8.x (audio duration unchanged), so we use
|
|
310
|
-
// atrim/trim per segment and concat them all.
|
|
311
|
-
const vParts: string[] = [];
|
|
312
|
-
const aParts: string[] = [];
|
|
313
|
-
const concatInputs: string[] = [];
|
|
314
|
-
|
|
315
|
-
for (let i = 0; i < segments.length; i++) {
|
|
316
|
-
const s = segments[i].start.toFixed(4);
|
|
317
|
-
const e = segments[i].end.toFixed(4);
|
|
318
|
-
vParts.push(`[0:v]trim=${s}:${e},setpts=PTS-STARTPTS[v${i}]`);
|
|
319
|
-
aParts.push(`[0:a]atrim=${s}:${e},asetpts=PTS-STARTPTS[a${i}]`);
|
|
320
|
-
concatInputs.push(`[v${i}][a${i}]`);
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
const filterComplex = [
|
|
324
|
-
...vParts,
|
|
325
|
-
...aParts,
|
|
326
|
-
`${concatInputs.join("")}concat=n=${segments.length}:v=1:a=1[outv][outa]`,
|
|
327
|
-
].join(";");
|
|
328
|
-
|
|
329
|
-
await execSafe("ffmpeg", [
|
|
330
|
-
"-i", videoPath,
|
|
331
|
-
"-filter_complex", filterComplex,
|
|
332
|
-
"-map", "[outv]", "-map", "[outa]",
|
|
333
|
-
"-c:v", "libx264", "-preset", "fast", "-crf", "18",
|
|
334
|
-
"-c:a", "aac", "-b:a", "192k",
|
|
335
|
-
outputPath, "-y",
|
|
336
|
-
], { timeout: 600000, maxBuffer: 50 * 1024 * 1024 });
|
|
337
|
-
|
|
338
|
-
return {
|
|
339
|
-
success: true,
|
|
340
|
-
outputPath,
|
|
341
|
-
totalDuration,
|
|
342
|
-
silentPeriods: periods,
|
|
343
|
-
silentDuration,
|
|
344
|
-
method,
|
|
345
|
-
};
|
|
346
|
-
} catch (error) {
|
|
347
|
-
return {
|
|
348
|
-
success: false,
|
|
349
|
-
error: `Silence cut failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
350
|
-
};
|
|
351
|
-
}
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
// ============================================================================
|
|
355
|
-
// Jump Cut (Filler Word Removal)
|
|
356
|
-
// ============================================================================
|
|
357
|
-
|
|
358
|
-
/** A detected filler word with its time range. */
|
|
359
|
-
export interface FillerWord {
|
|
360
|
-
/** The filler word or merged phrase */
|
|
361
|
-
word: string;
|
|
362
|
-
/** Start time in seconds */
|
|
363
|
-
start: number;
|
|
364
|
-
/** End time in seconds */
|
|
365
|
-
end: number;
|
|
366
|
-
}
|
|
367
|
-
|
|
368
|
-
/** Options for {@link executeJumpCut}. */
|
|
369
|
-
export interface JumpCutOptions {
|
|
370
|
-
/** Path to the input video file */
|
|
371
|
-
videoPath: string;
|
|
372
|
-
/** Path for the output video (filler words removed) */
|
|
373
|
-
outputPath: string;
|
|
374
|
-
/** Custom filler words to detect (default: {@link DEFAULT_FILLER_WORDS}) */
|
|
375
|
-
fillers?: string[];
|
|
376
|
-
/** Padding in seconds around filler cuts (default: 0.05) */
|
|
377
|
-
padding?: number;
|
|
378
|
-
/** Language code for Whisper transcription */
|
|
379
|
-
language?: string;
|
|
380
|
-
/** If true, only analyze without producing output video */
|
|
381
|
-
analyzeOnly?: boolean;
|
|
382
|
-
/** Override OpenAI API key */
|
|
383
|
-
apiKey?: string;
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
/** Result from {@link executeJumpCut}. */
|
|
387
|
-
export interface JumpCutResult {
|
|
388
|
-
/** Whether the operation succeeded */
|
|
389
|
-
success: boolean;
|
|
390
|
-
/** Path to the output video (undefined in analyze-only mode) */
|
|
391
|
-
outputPath?: string;
|
|
392
|
-
/** Total duration of the source video in seconds */
|
|
393
|
-
totalDuration?: number;
|
|
394
|
-
/** Number of filler word occurrences detected */
|
|
395
|
-
fillerCount?: number;
|
|
396
|
-
/** Total duration of filler words in seconds */
|
|
397
|
-
fillerDuration?: number;
|
|
398
|
-
/** Detected filler word ranges */
|
|
399
|
-
fillers?: FillerWord[];
|
|
400
|
-
/** Error message on failure */
|
|
401
|
-
error?: string;
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
/** Default set of filler words detected by jump-cut. */
|
|
405
|
-
export const DEFAULT_FILLER_WORDS = [
|
|
406
|
-
"um", "uh", "uh-huh", "hmm", "like", "you know", "so",
|
|
407
|
-
"basically", "literally", "right", "okay", "well", "i mean", "actually",
|
|
408
|
-
];
|
|
409
|
-
|
|
410
|
-
/**
|
|
411
|
-
* Transcribe audio with word-level timestamps using Whisper API directly.
|
|
412
|
-
* Uses timestamp_granularities[]=word for filler detection.
|
|
413
|
-
*/
|
|
414
|
-
export async function transcribeWithWords(
|
|
415
|
-
audioPath: string,
|
|
416
|
-
apiKey: string,
|
|
417
|
-
language?: string,
|
|
418
|
-
): Promise<{ words: { word: string; start: number; end: number }[]; text: string }> {
|
|
419
|
-
const audioBuffer = await readFile(audioPath);
|
|
420
|
-
const audioBlob = new Blob([audioBuffer]);
|
|
421
|
-
|
|
422
|
-
const formData = new FormData();
|
|
423
|
-
formData.append("file", audioBlob, "audio.wav");
|
|
424
|
-
formData.append("model", "whisper-1");
|
|
425
|
-
formData.append("response_format", "verbose_json");
|
|
426
|
-
formData.append("timestamp_granularities[]", "word");
|
|
427
|
-
|
|
428
|
-
if (language) {
|
|
429
|
-
formData.append("language", language);
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
const response = await fetch("https://api.openai.com/v1/audio/transcriptions", {
|
|
433
|
-
method: "POST",
|
|
434
|
-
headers: {
|
|
435
|
-
Authorization: `Bearer ${apiKey}`,
|
|
436
|
-
},
|
|
437
|
-
body: formData,
|
|
438
|
-
});
|
|
439
|
-
|
|
440
|
-
if (!response.ok) {
|
|
441
|
-
const error = await response.text();
|
|
442
|
-
throw new Error(`Whisper transcription failed: ${error}`);
|
|
443
|
-
}
|
|
444
|
-
|
|
445
|
-
const data = await response.json() as {
|
|
446
|
-
text: string;
|
|
447
|
-
words?: Array<{ word: string; start: number; end: number }>;
|
|
448
|
-
};
|
|
449
|
-
|
|
450
|
-
return {
|
|
451
|
-
words: data.words || [],
|
|
452
|
-
text: data.text,
|
|
453
|
-
};
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
/**
|
|
457
|
-
* Detect filler word ranges and merge adjacent ones within padding distance.
|
|
458
|
-
*
|
|
459
|
-
* @param words - Word-level transcript with timestamps
|
|
460
|
-
* @param fillers - List of filler words/phrases to match
|
|
461
|
-
* @param padding - Maximum gap in seconds to merge adjacent fillers
|
|
462
|
-
* @returns Merged filler word ranges sorted by start time
|
|
463
|
-
*/
|
|
464
|
-
export function detectFillerRanges(
|
|
465
|
-
words: { word: string; start: number; end: number }[],
|
|
466
|
-
fillers: string[],
|
|
467
|
-
padding: number,
|
|
468
|
-
): FillerWord[] {
|
|
469
|
-
const fillerSet = new Set(fillers.map((f) => f.toLowerCase().trim()));
|
|
470
|
-
|
|
471
|
-
// Find individual filler words
|
|
472
|
-
const matches: FillerWord[] = [];
|
|
473
|
-
for (const w of words) {
|
|
474
|
-
const cleaned = w.word.toLowerCase().replace(/[^a-z\s-]/g, "").trim();
|
|
475
|
-
if (fillerSet.has(cleaned)) {
|
|
476
|
-
matches.push({ word: w.word, start: w.start, end: w.end });
|
|
477
|
-
}
|
|
478
|
-
}
|
|
479
|
-
|
|
480
|
-
if (matches.length === 0) return [];
|
|
481
|
-
|
|
482
|
-
// Merge adjacent filler ranges (within padding distance)
|
|
483
|
-
const merged: FillerWord[] = [{ ...matches[0] }];
|
|
484
|
-
for (let i = 1; i < matches.length; i++) {
|
|
485
|
-
const last = merged[merged.length - 1];
|
|
486
|
-
if (matches[i].start - last.end <= padding * 2) {
|
|
487
|
-
last.end = matches[i].end;
|
|
488
|
-
last.word += ` ${matches[i].word}`;
|
|
489
|
-
} else {
|
|
490
|
-
merged.push({ ...matches[i] });
|
|
491
|
-
}
|
|
492
|
-
}
|
|
493
|
-
|
|
494
|
-
return merged;
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
/**
|
|
498
|
-
* Remove filler words from a video using Whisper word-level timestamps + FFmpeg concat.
|
|
499
|
-
*
|
|
500
|
-
* Pipeline: extract audio -> Whisper transcription (word-level) -> detect fillers ->
|
|
501
|
-
* invert to keep-segments -> FFmpeg stream-copy concat.
|
|
502
|
-
*
|
|
503
|
-
* @param options - Jump cut configuration
|
|
504
|
-
* @returns Result with output path and detected fillers
|
|
505
|
-
*/
|
|
506
|
-
export async function executeJumpCut(options: JumpCutOptions): Promise<JumpCutResult> {
|
|
507
|
-
const {
|
|
508
|
-
videoPath,
|
|
509
|
-
outputPath,
|
|
510
|
-
fillers = DEFAULT_FILLER_WORDS,
|
|
511
|
-
padding = 0.05,
|
|
512
|
-
language,
|
|
513
|
-
analyzeOnly = false,
|
|
514
|
-
apiKey,
|
|
515
|
-
} = options;
|
|
516
|
-
|
|
517
|
-
if (!existsSync(videoPath)) {
|
|
518
|
-
return { success: false, error: `Video not found: ${videoPath}` };
|
|
519
|
-
}
|
|
520
|
-
|
|
521
|
-
if (!commandExists("ffmpeg")) {
|
|
522
|
-
return { success: false, error: "FFmpeg not found. Please install FFmpeg." };
|
|
523
|
-
}
|
|
524
|
-
|
|
525
|
-
const openaiKey = apiKey || process.env.OPENAI_API_KEY;
|
|
526
|
-
if (!openaiKey) {
|
|
527
|
-
return { success: false, error: "OpenAI API key required for Whisper transcription. Run 'vibe setup' or set OPENAI_API_KEY in .env" };
|
|
528
|
-
}
|
|
529
|
-
|
|
530
|
-
try {
|
|
531
|
-
const tmpDir = `/tmp/vibe_jumpcut_${Date.now()}`;
|
|
532
|
-
await mkdir(tmpDir, { recursive: true });
|
|
533
|
-
const audioPath = join(tmpDir, "audio.wav");
|
|
534
|
-
|
|
535
|
-
try {
|
|
536
|
-
// Step 1: Extract audio
|
|
537
|
-
await execSafe("ffmpeg", [
|
|
538
|
-
"-i", videoPath, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audioPath, "-y",
|
|
539
|
-
], { timeout: 300000, maxBuffer: 50 * 1024 * 1024 });
|
|
540
|
-
|
|
541
|
-
// Step 2: Transcribe with word-level timestamps
|
|
542
|
-
const { words } = await transcribeWithWords(audioPath, openaiKey, language);
|
|
543
|
-
|
|
544
|
-
if (words.length === 0) {
|
|
545
|
-
return { success: false, error: "No words detected in audio" };
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
// Step 3: Detect filler ranges
|
|
549
|
-
const fillerRanges = detectFillerRanges(words, fillers, padding);
|
|
550
|
-
const totalDuration = await getVideoDuration(videoPath);
|
|
551
|
-
const fillerDuration = fillerRanges.reduce((sum, f) => sum + (f.end - f.start), 0);
|
|
552
|
-
|
|
553
|
-
if (analyzeOnly || fillerRanges.length === 0) {
|
|
554
|
-
return {
|
|
555
|
-
success: true,
|
|
556
|
-
totalDuration,
|
|
557
|
-
fillerCount: fillerRanges.length,
|
|
558
|
-
fillerDuration,
|
|
559
|
-
fillers: fillerRanges,
|
|
560
|
-
};
|
|
561
|
-
}
|
|
562
|
-
|
|
563
|
-
// Step 4: Compute keep-segments (invert filler ranges)
|
|
564
|
-
const segments: { start: number; end: number }[] = [];
|
|
565
|
-
let cursor = 0;
|
|
566
|
-
|
|
567
|
-
for (const filler of fillerRanges) {
|
|
568
|
-
const segStart = Math.max(0, cursor);
|
|
569
|
-
const segEnd = Math.max(segStart, filler.start - padding);
|
|
570
|
-
if (segEnd > segStart) {
|
|
571
|
-
segments.push({ start: segStart, end: segEnd });
|
|
572
|
-
}
|
|
573
|
-
cursor = filler.end + padding;
|
|
574
|
-
}
|
|
575
|
-
// Add final segment after last filler
|
|
576
|
-
if (cursor < totalDuration) {
|
|
577
|
-
segments.push({ start: cursor, end: totalDuration });
|
|
578
|
-
}
|
|
579
|
-
|
|
580
|
-
if (segments.length === 0) {
|
|
581
|
-
return { success: false, error: "No non-filler segments found" };
|
|
582
|
-
}
|
|
583
|
-
|
|
584
|
-
// Step 5: Extract segments and concat with FFmpeg (stream copy)
|
|
585
|
-
const segmentPaths: string[] = [];
|
|
586
|
-
for (let i = 0; i < segments.length; i++) {
|
|
587
|
-
const seg = segments[i];
|
|
588
|
-
const segPath = join(tmpDir, `seg-${i.toString().padStart(4, "0")}.ts`);
|
|
589
|
-
const duration = seg.end - seg.start;
|
|
590
|
-
await execSafe("ffmpeg", [
|
|
591
|
-
"-i", videoPath, "-ss", String(seg.start), "-t", String(duration),
|
|
592
|
-
"-c", "copy", "-avoid_negative_ts", "make_zero", segPath, "-y",
|
|
593
|
-
], { timeout: 300000, maxBuffer: 50 * 1024 * 1024 });
|
|
594
|
-
segmentPaths.push(segPath);
|
|
595
|
-
}
|
|
596
|
-
|
|
597
|
-
// Create concat list
|
|
598
|
-
const concatList = segmentPaths.map((p) => `file '${p}'`).join("\n");
|
|
599
|
-
const listPath = join(tmpDir, "concat.txt");
|
|
600
|
-
await writeFile(listPath, concatList);
|
|
601
|
-
|
|
602
|
-
// Concat segments
|
|
603
|
-
await execSafe("ffmpeg", [
|
|
604
|
-
"-f", "concat", "-safe", "0", "-i", listPath, "-c", "copy", outputPath, "-y",
|
|
605
|
-
], { timeout: 300000, maxBuffer: 50 * 1024 * 1024 });
|
|
606
|
-
|
|
607
|
-
return {
|
|
608
|
-
success: true,
|
|
609
|
-
outputPath,
|
|
610
|
-
totalDuration,
|
|
611
|
-
fillerCount: fillerRanges.length,
|
|
612
|
-
fillerDuration,
|
|
613
|
-
fillers: fillerRanges,
|
|
614
|
-
};
|
|
615
|
-
} finally {
|
|
616
|
-
// Cleanup temp files
|
|
617
|
-
try {
|
|
618
|
-
const { rm } = await import("node:fs/promises");
|
|
619
|
-
await rm(tmpDir, { recursive: true, force: true });
|
|
620
|
-
} catch {
|
|
621
|
-
// Ignore cleanup errors
|
|
622
|
-
}
|
|
623
|
-
}
|
|
624
|
-
} catch (error) {
|
|
625
|
-
return {
|
|
626
|
-
success: false,
|
|
627
|
-
error: `Jump cut failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
628
|
-
};
|
|
629
|
-
}
|
|
630
|
-
}
|
|
631
|
-
|
|
632
|
-
// ============================================================================
|
|
633
|
-
// Caption
|
|
634
|
-
// ============================================================================
|
|
635
|
-
|
|
636
|
-
/** Visual style preset for burned-in captions. */
|
|
637
|
-
export type CaptionStyle = "minimal" | "bold" | "outline" | "karaoke";
|
|
638
|
-
|
|
639
|
-
/** Options for {@link executeCaption}. */
|
|
640
|
-
export interface CaptionOptions {
|
|
641
|
-
/** Path to the input video file */
|
|
642
|
-
videoPath: string;
|
|
643
|
-
/** Path for the output video with burned-in captions */
|
|
644
|
-
outputPath: string;
|
|
645
|
-
/** Caption visual style preset (default: "bold") */
|
|
646
|
-
style?: CaptionStyle;
|
|
647
|
-
/** Font size override (auto-calculated from video height if omitted) */
|
|
648
|
-
fontSize?: number;
|
|
649
|
-
/** Font color name (default: "white") */
|
|
650
|
-
fontColor?: string;
|
|
651
|
-
/** Language code for Whisper transcription */
|
|
652
|
-
language?: string;
|
|
653
|
-
/** Vertical position of captions (default: "bottom") */
|
|
654
|
-
position?: "top" | "center" | "bottom";
|
|
655
|
-
/** Override OpenAI API key */
|
|
656
|
-
apiKey?: string;
|
|
657
|
-
}
|
|
658
|
-
|
|
659
|
-
/** Result from {@link executeCaption}. */
|
|
660
|
-
export interface CaptionResult {
|
|
661
|
-
/** Whether the operation succeeded */
|
|
662
|
-
success: boolean;
|
|
663
|
-
/** Path to the output video with captions */
|
|
664
|
-
outputPath?: string;
|
|
665
|
-
/** Path to the generated SRT file */
|
|
666
|
-
srtPath?: string;
|
|
667
|
-
/** Number of transcript segments */
|
|
668
|
-
segmentCount?: number;
|
|
669
|
-
/** Error message on failure */
|
|
670
|
-
error?: string;
|
|
671
|
-
}
|
|
672
|
-
|
|
673
|
-
/**
|
|
674
|
-
* Get ASS force_style string for caption preset
|
|
675
|
-
*/
|
|
676
|
-
function getCaptionForceStyle(
|
|
677
|
-
style: CaptionStyle,
|
|
678
|
-
fontSize: number,
|
|
679
|
-
fontColor: string,
|
|
680
|
-
position: "top" | "center" | "bottom",
|
|
681
|
-
): string {
|
|
682
|
-
// ASS alignment: 1-3 bottom, 4-6 middle, 7-9 top (left/center/right)
|
|
683
|
-
const alignment = position === "top" ? 8 : position === "center" ? 5 : 2;
|
|
684
|
-
const marginV = position === "center" ? 0 : 30;
|
|
685
|
-
|
|
686
|
-
switch (style) {
|
|
687
|
-
case "minimal":
|
|
688
|
-
return `FontSize=${fontSize},FontName=Arial,PrimaryColour=&H00FFFFFF,OutlineColour=&H80000000,Outline=1,Shadow=0,Alignment=${alignment},MarginV=${marginV}`;
|
|
689
|
-
case "bold":
|
|
690
|
-
return `FontSize=${fontSize},FontName=Arial,Bold=1,PrimaryColour=&H00${fontColor === "yellow" ? "00FFFF" : "FFFFFF"},OutlineColour=&H00000000,Outline=3,Shadow=1,Alignment=${alignment},MarginV=${marginV}`;
|
|
691
|
-
case "outline":
|
|
692
|
-
return `FontSize=${fontSize},FontName=Arial,Bold=1,PrimaryColour=&H00FFFFFF,OutlineColour=&H000000FF,Outline=4,Shadow=0,Alignment=${alignment},MarginV=${marginV}`;
|
|
693
|
-
case "karaoke":
|
|
694
|
-
return `FontSize=${fontSize},FontName=Arial,Bold=1,PrimaryColour=&H0000FFFF,OutlineColour=&H00000000,Outline=2,Shadow=1,Alignment=${alignment},MarginV=${marginV}`;
|
|
695
|
-
default:
|
|
696
|
-
return `FontSize=${fontSize},FontName=Arial,Bold=1,PrimaryColour=&H00FFFFFF,OutlineColour=&H00000000,Outline=3,Shadow=1,Alignment=${alignment},MarginV=${marginV}`;
|
|
697
|
-
}
|
|
698
|
-
}
|
|
699
|
-
|
|
700
|
-
/**
|
|
701
|
-
* Transcribe video audio and burn styled captions using Whisper + FFmpeg.
|
|
702
|
-
*
|
|
703
|
-
* Pipeline: extract audio -> Whisper transcription -> generate SRT ->
|
|
704
|
-
* burn captions via FFmpeg subtitles filter (or Remotion fallback).
|
|
705
|
-
*
|
|
706
|
-
* @param options - Caption configuration
|
|
707
|
-
* @returns Result with output video path and SRT path
|
|
708
|
-
*/
|
|
709
|
-
export async function executeCaption(options: CaptionOptions): Promise<CaptionResult> {
|
|
710
|
-
const {
|
|
711
|
-
videoPath,
|
|
712
|
-
outputPath,
|
|
713
|
-
style = "bold",
|
|
714
|
-
fontSize: customFontSize,
|
|
715
|
-
fontColor = "white",
|
|
716
|
-
language,
|
|
717
|
-
position = "bottom",
|
|
718
|
-
apiKey,
|
|
719
|
-
} = options;
|
|
720
|
-
|
|
721
|
-
if (!existsSync(videoPath)) {
|
|
722
|
-
return { success: false, error: `Video not found: ${videoPath}` };
|
|
723
|
-
}
|
|
724
|
-
|
|
725
|
-
if (!commandExists("ffmpeg")) {
|
|
726
|
-
return { success: false, error: "FFmpeg not found. Please install FFmpeg." };
|
|
727
|
-
}
|
|
728
|
-
|
|
729
|
-
const openaiKey = apiKey || process.env.OPENAI_API_KEY;
|
|
730
|
-
if (!openaiKey) {
|
|
731
|
-
return { success: false, error: "OpenAI API key required for Whisper transcription. Run 'vibe setup' or set OPENAI_API_KEY in .env" };
|
|
732
|
-
}
|
|
733
|
-
|
|
734
|
-
try {
|
|
735
|
-
// Step 1: Extract audio from video
|
|
736
|
-
const tmpDir = `/tmp/vibe_caption_${Date.now()}`;
|
|
737
|
-
await mkdir(tmpDir, { recursive: true });
|
|
738
|
-
const audioPath = join(tmpDir, "audio.wav");
|
|
739
|
-
const srtPath = join(tmpDir, "captions.srt");
|
|
740
|
-
|
|
741
|
-
try {
|
|
742
|
-
await execSafe("ffmpeg", [
|
|
743
|
-
"-i", videoPath, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audioPath, "-y",
|
|
744
|
-
], { timeout: 300000, maxBuffer: 50 * 1024 * 1024 });
|
|
745
|
-
|
|
746
|
-
// Step 2: Transcribe with Whisper
|
|
747
|
-
const whisper = new WhisperProvider();
|
|
748
|
-
await whisper.initialize({ apiKey: openaiKey });
|
|
749
|
-
|
|
750
|
-
const audioBuffer = await readFile(audioPath);
|
|
751
|
-
const audioBlob = new Blob([audioBuffer]);
|
|
752
|
-
const transcriptResult = await whisper.transcribe(audioBlob, language);
|
|
753
|
-
|
|
754
|
-
if (transcriptResult.status === "failed" || !transcriptResult.segments || transcriptResult.segments.length === 0) {
|
|
755
|
-
return { success: false, error: `Transcription failed: ${transcriptResult.error || "No segments detected"}` };
|
|
756
|
-
}
|
|
757
|
-
|
|
758
|
-
// Step 3: Generate SRT
|
|
759
|
-
const srtContent = formatSRT(transcriptResult.segments);
|
|
760
|
-
await writeFile(srtPath, srtContent);
|
|
761
|
-
|
|
762
|
-
// Step 4: Get video resolution for auto font size
|
|
763
|
-
const { width, height } = await getVideoResolution(videoPath);
|
|
764
|
-
const fontSize = customFontSize || Math.round(height / 18);
|
|
765
|
-
|
|
766
|
-
// Step 5: Check FFmpeg subtitle filter support
|
|
767
|
-
let hasSubtitles = false;
|
|
768
|
-
try {
|
|
769
|
-
const { stdout: filterList } = await execSafe("ffmpeg", ["-filters"], { maxBuffer: 10 * 1024 * 1024 });
|
|
770
|
-
hasSubtitles = filterList.includes("subtitles");
|
|
771
|
-
} catch {
|
|
772
|
-
// If filter check fails, continue and let FFmpeg error naturally
|
|
773
|
-
}
|
|
774
|
-
|
|
775
|
-
// Step 6: Burn captions
|
|
776
|
-
if (hasSubtitles) {
|
|
777
|
-
// Fast path: FFmpeg subtitles filter (requires libass)
|
|
778
|
-
const forceStyle = getCaptionForceStyle(style, fontSize, fontColor, position);
|
|
779
|
-
const escapedSrtPath = srtPath.replace(/\\/g, "/").replace(/:/g, "\\:");
|
|
780
|
-
await execSafe("ffmpeg", [
|
|
781
|
-
"-i", videoPath, "-vf", `subtitles=${escapedSrtPath}:force_style='${forceStyle}'`,
|
|
782
|
-
"-c:a", "copy", outputPath, "-y",
|
|
783
|
-
], { timeout: 600000, maxBuffer: 50 * 1024 * 1024 });
|
|
784
|
-
} else {
|
|
785
|
-
// Remotion fallback: embed video + captions in a single Remotion composition
|
|
786
|
-
console.log("FFmpeg missing subtitles filter (libass) — using Remotion fallback...");
|
|
787
|
-
const { generateCaptionComponent, renderWithEmbeddedVideo, ensureRemotionInstalled } = await import("../utils/remotion.js");
|
|
788
|
-
|
|
789
|
-
const remotionErr = await ensureRemotionInstalled();
|
|
790
|
-
if (remotionErr) {
|
|
791
|
-
// Save SRT so the user still gets something
|
|
792
|
-
const outputDir = dirname(outputPath);
|
|
793
|
-
const outputSrtPath = join(outputDir, basename(outputPath, extname(outputPath)) + ".srt");
|
|
794
|
-
await writeFile(outputSrtPath, srtContent);
|
|
795
|
-
return { success: false, error: `${remotionErr}\nSRT saved to: ${outputSrtPath}` };
|
|
796
|
-
}
|
|
797
|
-
|
|
798
|
-
const videoDuration = await getVideoDuration(videoPath);
|
|
799
|
-
const fps = 30;
|
|
800
|
-
const durationInFrames = Math.ceil(videoDuration * fps);
|
|
801
|
-
const videoFileName = "source_video.mp4";
|
|
802
|
-
|
|
803
|
-
const { code, name } = generateCaptionComponent({
|
|
804
|
-
segments: transcriptResult.segments.map((s) => ({
|
|
805
|
-
start: s.startTime,
|
|
806
|
-
end: s.endTime,
|
|
807
|
-
text: s.text,
|
|
808
|
-
})),
|
|
809
|
-
style,
|
|
810
|
-
fontSize,
|
|
811
|
-
fontColor,
|
|
812
|
-
position,
|
|
813
|
-
width,
|
|
814
|
-
height,
|
|
815
|
-
videoFileName,
|
|
816
|
-
});
|
|
817
|
-
|
|
818
|
-
const renderResult = await renderWithEmbeddedVideo({
|
|
819
|
-
componentCode: code,
|
|
820
|
-
componentName: name,
|
|
821
|
-
width,
|
|
822
|
-
height,
|
|
823
|
-
fps,
|
|
824
|
-
durationInFrames,
|
|
825
|
-
videoPath,
|
|
826
|
-
videoFileName,
|
|
827
|
-
outputPath,
|
|
828
|
-
});
|
|
829
|
-
|
|
830
|
-
if (!renderResult.success) {
|
|
831
|
-
const outputDir = dirname(outputPath);
|
|
832
|
-
const outputSrtPath = join(outputDir, basename(outputPath, extname(outputPath)) + ".srt");
|
|
833
|
-
await writeFile(outputSrtPath, srtContent);
|
|
834
|
-
return { success: false, error: `${renderResult.error}\nSRT saved to: ${outputSrtPath}` };
|
|
835
|
-
}
|
|
836
|
-
}
|
|
837
|
-
|
|
838
|
-
// Copy SRT to output directory for user reference
|
|
839
|
-
const outputDir = dirname(outputPath);
|
|
840
|
-
const outputSrtPath = join(outputDir, basename(outputPath, extname(outputPath)) + ".srt");
|
|
841
|
-
await writeFile(outputSrtPath, srtContent);
|
|
842
|
-
|
|
843
|
-
return {
|
|
844
|
-
success: true,
|
|
845
|
-
outputPath,
|
|
846
|
-
srtPath: outputSrtPath,
|
|
847
|
-
segmentCount: transcriptResult.segments.length,
|
|
848
|
-
};
|
|
849
|
-
} finally {
|
|
850
|
-
// Cleanup temp files
|
|
851
|
-
try {
|
|
852
|
-
const { rm } = await import("node:fs/promises");
|
|
853
|
-
await rm(tmpDir, { recursive: true, force: true });
|
|
854
|
-
} catch {
|
|
855
|
-
// Ignore cleanup errors
|
|
856
|
-
}
|
|
857
|
-
}
|
|
858
|
-
} catch (error) {
|
|
859
|
-
return {
|
|
860
|
-
success: false,
|
|
861
|
-
error: `Caption failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
862
|
-
};
|
|
863
|
-
}
|
|
864
|
-
}
|
|
865
|
-
|
|
866
|
-
// ============================================================================
|
|
867
|
-
// Noise Reduce
|
|
868
|
-
// ============================================================================
|
|
869
|
-
|
|
870
|
-
/** Options for {@link executeNoiseReduce}. */
|
|
871
|
-
export interface NoiseReduceOptions {
|
|
872
|
-
/** Path to the input audio or video file */
|
|
873
|
-
inputPath: string;
|
|
874
|
-
/** Path for the noise-reduced output file */
|
|
875
|
-
outputPath: string;
|
|
876
|
-
/** Reduction strength preset (default: "medium") */
|
|
877
|
-
strength?: "low" | "medium" | "high";
|
|
878
|
-
/** Custom noise floor in dB (overrides strength preset) */
|
|
879
|
-
noiseFloor?: number;
|
|
880
|
-
}
|
|
881
|
-
|
|
882
|
-
/** Result from {@link executeNoiseReduce}. */
|
|
883
|
-
export interface NoiseReduceResult {
|
|
884
|
-
/** Whether the operation succeeded */
|
|
885
|
-
success: boolean;
|
|
886
|
-
/** Path to the noise-reduced output file */
|
|
887
|
-
outputPath?: string;
|
|
888
|
-
/** Duration of the input file in seconds */
|
|
889
|
-
inputDuration?: number;
|
|
890
|
-
/** Error message on failure */
|
|
891
|
-
error?: string;
|
|
892
|
-
}
|
|
893
|
-
|
|
894
|
-
/**
|
|
895
|
-
* Reduce audio noise in a video or audio file using FFmpeg afftdn filter.
|
|
896
|
-
*
|
|
897
|
-
* Supports three strength presets (low/medium/high) with optional highpass/lowpass
|
|
898
|
-
* for the "high" setting. Video streams are copied without re-encoding.
|
|
899
|
-
*
|
|
900
|
-
* @param options - Noise reduction configuration
|
|
901
|
-
* @returns Result with output path and input duration
|
|
902
|
-
*/
|
|
903
|
-
export async function executeNoiseReduce(options: NoiseReduceOptions): Promise<NoiseReduceResult> {
|
|
904
|
-
const {
|
|
905
|
-
inputPath,
|
|
906
|
-
outputPath,
|
|
907
|
-
strength = "medium",
|
|
908
|
-
noiseFloor,
|
|
909
|
-
} = options;
|
|
910
|
-
|
|
911
|
-
if (!existsSync(inputPath)) {
|
|
912
|
-
return { success: false, error: `File not found: ${inputPath}` };
|
|
913
|
-
}
|
|
914
|
-
|
|
915
|
-
if (!commandExists("ffmpeg")) {
|
|
916
|
-
return { success: false, error: "FFmpeg not found. Please install FFmpeg." };
|
|
917
|
-
}
|
|
918
|
-
|
|
919
|
-
try {
|
|
920
|
-
const inputDuration = await getVideoDuration(inputPath);
|
|
921
|
-
|
|
922
|
-
// Map strength to noise floor dB value
|
|
923
|
-
const nf = noiseFloor ?? (strength === "low" ? -20 : strength === "high" ? -35 : -25);
|
|
924
|
-
|
|
925
|
-
// Build audio filter
|
|
926
|
-
let audioFilter = `afftdn=nf=${nf}`;
|
|
927
|
-
if (strength === "high") {
|
|
928
|
-
audioFilter = `${audioFilter},highpass=f=80,lowpass=f=12000`;
|
|
929
|
-
}
|
|
930
|
-
|
|
931
|
-
// Check if input has video stream
|
|
932
|
-
let hasVideo = false;
|
|
933
|
-
try {
|
|
934
|
-
const { stdout } = await execSafe("ffprobe", [
|
|
935
|
-
"-v", "error", "-select_streams", "v", "-show_entries", "stream=codec_type", "-of", "csv=p=0", inputPath,
|
|
936
|
-
], { maxBuffer: 10 * 1024 * 1024 });
|
|
937
|
-
hasVideo = stdout.trim().includes("video");
|
|
938
|
-
} catch {
|
|
939
|
-
// No video stream
|
|
940
|
-
}
|
|
941
|
-
|
|
942
|
-
const args = ["-i", inputPath, "-af", audioFilter];
|
|
943
|
-
if (hasVideo) args.push("-c:v", "copy");
|
|
944
|
-
args.push(outputPath, "-y");
|
|
945
|
-
await execSafe("ffmpeg", args, { timeout: 600000, maxBuffer: 50 * 1024 * 1024 });
|
|
946
|
-
|
|
947
|
-
return {
|
|
948
|
-
success: true,
|
|
949
|
-
outputPath,
|
|
950
|
-
inputDuration,
|
|
951
|
-
};
|
|
952
|
-
} catch (error) {
|
|
953
|
-
return {
|
|
954
|
-
success: false,
|
|
955
|
-
error: `Noise reduction failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
956
|
-
};
|
|
957
|
-
}
|
|
958
|
-
}
|
|
959
|
-
|
|
960
|
-
// ============================================================================
|
|
961
|
-
// Fade
|
|
962
|
-
// ============================================================================
|
|
963
|
-
|
|
964
|
-
/** Options for {@link executeFade}. */
|
|
965
|
-
export interface FadeOptions {
|
|
966
|
-
/** Path to the input video file */
|
|
967
|
-
videoPath: string;
|
|
968
|
-
/** Path for the output video with fade effects */
|
|
969
|
-
outputPath: string;
|
|
970
|
-
/** Fade-in duration in seconds (default: 1) */
|
|
971
|
-
fadeIn?: number;
|
|
972
|
-
/** Fade-out duration in seconds (default: 1) */
|
|
973
|
-
fadeOut?: number;
|
|
974
|
-
/** Apply fade to audio only (video copied) */
|
|
975
|
-
audioOnly?: boolean;
|
|
976
|
-
/** Apply fade to video only (audio copied) */
|
|
977
|
-
videoOnly?: boolean;
|
|
978
|
-
}
|
|
979
|
-
|
|
980
|
-
/** Result from {@link executeFade}. */
|
|
981
|
-
export interface FadeResult {
|
|
982
|
-
/** Whether the operation succeeded */
|
|
983
|
-
success: boolean;
|
|
984
|
-
/** Path to the output video */
|
|
985
|
-
outputPath?: string;
|
|
986
|
-
/** Total duration of the source video in seconds */
|
|
987
|
-
totalDuration?: number;
|
|
988
|
-
/** Whether fade-in was applied */
|
|
989
|
-
fadeInApplied?: boolean;
|
|
990
|
-
/** Whether fade-out was applied */
|
|
991
|
-
fadeOutApplied?: boolean;
|
|
992
|
-
/** Error message on failure */
|
|
993
|
-
error?: string;
|
|
994
|
-
}
|
|
995
|
-
|
|
996
|
-
/**
|
|
997
|
-
* Apply fade-in and/or fade-out effects to video and/or audio using FFmpeg.
|
|
998
|
-
*
|
|
999
|
-
* @param options - Fade configuration
|
|
1000
|
-
* @returns Result with output path and which fades were applied
|
|
1001
|
-
*/
|
|
1002
|
-
export async function executeFade(options: FadeOptions): Promise<FadeResult> {
|
|
1003
|
-
const {
|
|
1004
|
-
videoPath,
|
|
1005
|
-
outputPath,
|
|
1006
|
-
fadeIn = 1,
|
|
1007
|
-
fadeOut = 1,
|
|
1008
|
-
audioOnly = false,
|
|
1009
|
-
videoOnly = false,
|
|
1010
|
-
} = options;
|
|
1011
|
-
|
|
1012
|
-
if (!existsSync(videoPath)) {
|
|
1013
|
-
return { success: false, error: `Video not found: ${videoPath}` };
|
|
1014
|
-
}
|
|
1015
|
-
|
|
1016
|
-
if (!commandExists("ffmpeg")) {
|
|
1017
|
-
return { success: false, error: "FFmpeg not found. Please install FFmpeg." };
|
|
1018
|
-
}
|
|
1019
|
-
|
|
1020
|
-
try {
|
|
1021
|
-
const totalDuration = await getVideoDuration(videoPath);
|
|
1022
|
-
|
|
1023
|
-
const videoFilters: string[] = [];
|
|
1024
|
-
const audioFilters: string[] = [];
|
|
1025
|
-
|
|
1026
|
-
// Video fade filters
|
|
1027
|
-
if (!audioOnly) {
|
|
1028
|
-
if (fadeIn > 0) {
|
|
1029
|
-
videoFilters.push(`fade=t=in:st=0:d=${fadeIn}`);
|
|
1030
|
-
}
|
|
1031
|
-
if (fadeOut > 0) {
|
|
1032
|
-
const fadeOutStart = Math.max(0, totalDuration - fadeOut);
|
|
1033
|
-
videoFilters.push(`fade=t=out:st=${fadeOutStart}:d=${fadeOut}`);
|
|
1034
|
-
}
|
|
1035
|
-
}
|
|
1036
|
-
|
|
1037
|
-
// Audio fade filters
|
|
1038
|
-
if (!videoOnly) {
|
|
1039
|
-
if (fadeIn > 0) {
|
|
1040
|
-
audioFilters.push(`afade=t=in:st=0:d=${fadeIn}`);
|
|
1041
|
-
}
|
|
1042
|
-
if (fadeOut > 0) {
|
|
1043
|
-
const fadeOutStart = Math.max(0, totalDuration - fadeOut);
|
|
1044
|
-
audioFilters.push(`afade=t=out:st=${fadeOutStart}:d=${fadeOut}`);
|
|
1045
|
-
}
|
|
1046
|
-
}
|
|
1047
|
-
|
|
1048
|
-
// Build FFmpeg command
|
|
1049
|
-
const ffmpegArgs: string[] = ["-i", videoPath];
|
|
1050
|
-
|
|
1051
|
-
if (videoFilters.length > 0) {
|
|
1052
|
-
ffmpegArgs.push("-vf", videoFilters.join(","));
|
|
1053
|
-
} else if (audioOnly) {
|
|
1054
|
-
ffmpegArgs.push("-c:v", "copy");
|
|
1055
|
-
}
|
|
1056
|
-
|
|
1057
|
-
if (audioFilters.length > 0) {
|
|
1058
|
-
ffmpegArgs.push("-af", audioFilters.join(","));
|
|
1059
|
-
} else if (videoOnly) {
|
|
1060
|
-
ffmpegArgs.push("-c:a", "copy");
|
|
1061
|
-
}
|
|
1062
|
-
|
|
1063
|
-
ffmpegArgs.push(outputPath, "-y");
|
|
1064
|
-
|
|
1065
|
-
await execSafe("ffmpeg", ffmpegArgs, { timeout: 600000, maxBuffer: 50 * 1024 * 1024 });
|
|
1066
|
-
|
|
1067
|
-
return {
|
|
1068
|
-
success: true,
|
|
1069
|
-
outputPath,
|
|
1070
|
-
totalDuration,
|
|
1071
|
-
fadeInApplied: fadeIn > 0,
|
|
1072
|
-
fadeOutApplied: fadeOut > 0,
|
|
1073
|
-
};
|
|
1074
|
-
} catch (error) {
|
|
1075
|
-
return {
|
|
1076
|
-
success: false,
|
|
1077
|
-
error: `Fade failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
1078
|
-
};
|
|
1079
|
-
}
|
|
1080
|
-
}
|
|
1081
|
-
// ============================================================================
|
|
1082
|
-
// Translate SRT
|
|
1083
|
-
// ============================================================================
|
|
1084
|
-
|
|
1085
|
-
/** Options for {@link executeTranslateSrt}. */
|
|
1086
|
-
export interface TranslateSrtOptions {
|
|
1087
|
-
/** Path to the source SRT subtitle file */
|
|
1088
|
-
srtPath: string;
|
|
1089
|
-
/** Path for the translated SRT output */
|
|
1090
|
-
outputPath: string;
|
|
1091
|
-
/** Target language name (e.g. "Korean", "Spanish") */
|
|
1092
|
-
targetLanguage: string;
|
|
1093
|
-
/** LLM provider for translation (default: "claude") */
|
|
1094
|
-
provider?: "claude" | "openai";
|
|
1095
|
-
/** Source language hint (auto-detected if omitted) */
|
|
1096
|
-
sourceLanguage?: string;
|
|
1097
|
-
/** Override API key for the chosen provider */
|
|
1098
|
-
apiKey?: string;
|
|
1099
|
-
}
|
|
1100
|
-
|
|
1101
|
-
/** Result from {@link executeTranslateSrt}. */
|
|
1102
|
-
export interface TranslateSrtResult {
|
|
1103
|
-
/** Whether the operation succeeded */
|
|
1104
|
-
success: boolean;
|
|
1105
|
-
/** Path to the translated SRT file */
|
|
1106
|
-
outputPath?: string;
|
|
1107
|
-
/** Number of subtitle segments translated */
|
|
1108
|
-
segmentCount?: number;
|
|
1109
|
-
/** Detected or specified source language */
|
|
1110
|
-
sourceLanguage?: string;
|
|
1111
|
-
/** Target language used for translation */
|
|
1112
|
-
targetLanguage?: string;
|
|
1113
|
-
/** Error message on failure */
|
|
1114
|
-
error?: string;
|
|
1115
|
-
}
|
|
1116
|
-
|
|
1117
|
-
/**
|
|
1118
|
-
* Translate an SRT subtitle file to a target language using Claude or OpenAI.
|
|
1119
|
-
*
|
|
1120
|
-
* Segments are batched (~30 at a time) for efficient API usage. Preserves
|
|
1121
|
-
* original timestamps; only text content is translated.
|
|
1122
|
-
*
|
|
1123
|
-
* @param options - Translation configuration
|
|
1124
|
-
* @returns Result with output path and segment count
|
|
1125
|
-
*/
|
|
1126
|
-
export async function executeTranslateSrt(options: TranslateSrtOptions): Promise<TranslateSrtResult> {
|
|
1127
|
-
const {
|
|
1128
|
-
srtPath,
|
|
1129
|
-
outputPath,
|
|
1130
|
-
targetLanguage,
|
|
1131
|
-
provider = "claude",
|
|
1132
|
-
sourceLanguage,
|
|
1133
|
-
apiKey,
|
|
1134
|
-
} = options;
|
|
1135
|
-
|
|
1136
|
-
if (!existsSync(srtPath)) {
|
|
1137
|
-
return { success: false, error: `SRT file not found: ${srtPath}` };
|
|
1138
|
-
}
|
|
1139
|
-
|
|
1140
|
-
try {
|
|
1141
|
-
const srtContent = await readFile(srtPath, "utf-8");
|
|
1142
|
-
const segments = parseSRT(srtContent);
|
|
1143
|
-
|
|
1144
|
-
if (segments.length === 0) {
|
|
1145
|
-
return { success: false, error: "No subtitle segments found in SRT file" };
|
|
1146
|
-
}
|
|
1147
|
-
|
|
1148
|
-
// Batch translate segments (~30 at a time)
|
|
1149
|
-
const batchSize = 30;
|
|
1150
|
-
const translatedSegments: { startTime: number; endTime: number; text: string }[] = [];
|
|
1151
|
-
|
|
1152
|
-
for (let i = 0; i < segments.length; i += batchSize) {
|
|
1153
|
-
const batch = segments.slice(i, i + batchSize);
|
|
1154
|
-
const textsToTranslate = batch.map((s, idx) => `[${idx}] ${s.text}`).join("\n");
|
|
1155
|
-
|
|
1156
|
-
const translatePrompt =
|
|
1157
|
-
`Translate the following subtitle texts to ${targetLanguage}.` +
|
|
1158
|
-
(sourceLanguage ? ` The source language is ${sourceLanguage}.` : "") +
|
|
1159
|
-
` Return ONLY the translated texts, one per line, preserving the [N] prefix format exactly. ` +
|
|
1160
|
-
`Do not add explanations.\n\n${textsToTranslate}`;
|
|
1161
|
-
|
|
1162
|
-
let translatedText: string;
|
|
1163
|
-
|
|
1164
|
-
if (provider === "openai") {
|
|
1165
|
-
const openaiKey = apiKey || process.env.OPENAI_API_KEY;
|
|
1166
|
-
if (!openaiKey) {
|
|
1167
|
-
return { success: false, error: "OpenAI API key required for translation. Run 'vibe setup' or set OPENAI_API_KEY in .env" };
|
|
1168
|
-
}
|
|
1169
|
-
const response = await fetch("https://api.openai.com/v1/chat/completions", {
|
|
1170
|
-
method: "POST",
|
|
1171
|
-
headers: {
|
|
1172
|
-
"Content-Type": "application/json",
|
|
1173
|
-
Authorization: `Bearer ${openaiKey}`,
|
|
1174
|
-
},
|
|
1175
|
-
body: JSON.stringify({
|
|
1176
|
-
model: "gpt-5-mini",
|
|
1177
|
-
messages: [{ role: "user", content: translatePrompt }],
|
|
1178
|
-
temperature: 0.3,
|
|
1179
|
-
}),
|
|
1180
|
-
});
|
|
1181
|
-
if (!response.ok) {
|
|
1182
|
-
return { success: false, error: `OpenAI API error: ${response.status} ${response.statusText}` };
|
|
1183
|
-
}
|
|
1184
|
-
const data = await response.json() as { choices: Array<{ message: { content: string } }> };
|
|
1185
|
-
translatedText = data.choices[0]?.message?.content || "";
|
|
1186
|
-
} else {
|
|
1187
|
-
const claudeKey = apiKey || process.env.ANTHROPIC_API_KEY;
|
|
1188
|
-
if (!claudeKey) {
|
|
1189
|
-
return { success: false, error: "Anthropic API key required for translation. Run 'vibe setup' or set ANTHROPIC_API_KEY in .env" };
|
|
1190
|
-
}
|
|
1191
|
-
const response = await fetch("https://api.anthropic.com/v1/messages", {
|
|
1192
|
-
method: "POST",
|
|
1193
|
-
headers: {
|
|
1194
|
-
"Content-Type": "application/json",
|
|
1195
|
-
"x-api-key": claudeKey,
|
|
1196
|
-
"anthropic-version": "2023-06-01",
|
|
1197
|
-
},
|
|
1198
|
-
body: JSON.stringify({
|
|
1199
|
-
model: "claude-sonnet-4-6-20250514",
|
|
1200
|
-
max_tokens: 4096,
|
|
1201
|
-
messages: [{ role: "user", content: translatePrompt }],
|
|
1202
|
-
}),
|
|
1203
|
-
});
|
|
1204
|
-
if (!response.ok) {
|
|
1205
|
-
return { success: false, error: `Claude API error: ${response.status} ${response.statusText}` };
|
|
1206
|
-
}
|
|
1207
|
-
const data = await response.json() as { content: Array<{ type: string; text: string }> };
|
|
1208
|
-
translatedText = data.content?.find((c) => c.type === "text")?.text || "";
|
|
1209
|
-
}
|
|
1210
|
-
|
|
1211
|
-
// Parse translated lines
|
|
1212
|
-
const translatedLines = translatedText.trim().split("\n");
|
|
1213
|
-
for (let j = 0; j < batch.length; j++) {
|
|
1214
|
-
const seg = batch[j];
|
|
1215
|
-
// Try to match [N] prefix
|
|
1216
|
-
const line = translatedLines[j];
|
|
1217
|
-
let text: string;
|
|
1218
|
-
if (line) {
|
|
1219
|
-
text = line.replace(/^\[\d+\]\s*/, "").trim();
|
|
1220
|
-
} else {
|
|
1221
|
-
// Fallback: use original text if translation is missing
|
|
1222
|
-
text = seg.text;
|
|
1223
|
-
}
|
|
1224
|
-
translatedSegments.push({
|
|
1225
|
-
startTime: seg.startTime,
|
|
1226
|
-
endTime: seg.endTime,
|
|
1227
|
-
text,
|
|
1228
|
-
});
|
|
1229
|
-
}
|
|
1230
|
-
}
|
|
1231
|
-
|
|
1232
|
-
// Format as SRT and write
|
|
1233
|
-
const translatedSrt = formatSRT(translatedSegments);
|
|
1234
|
-
await writeFile(outputPath, translatedSrt);
|
|
1235
|
-
|
|
1236
|
-
return {
|
|
1237
|
-
success: true,
|
|
1238
|
-
outputPath,
|
|
1239
|
-
segmentCount: translatedSegments.length,
|
|
1240
|
-
sourceLanguage: sourceLanguage || "auto",
|
|
1241
|
-
targetLanguage,
|
|
1242
|
-
};
|
|
1243
|
-
} catch (error) {
|
|
1244
|
-
return {
|
|
1245
|
-
success: false,
|
|
1246
|
-
error: `Translation failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
1247
|
-
};
|
|
1248
|
-
}
|
|
1249
|
-
}
|
|
1250
|
-
|
|
1251
|
-
// ============================================================================
|
|
1252
|
-
// Text Overlay
|
|
1253
|
-
// ============================================================================
|
|
1254
|
-
|
|
1255
|
-
/** Visual style preset for text overlays. */
|
|
1256
|
-
export type TextOverlayStyle = "lower-third" | "center-bold" | "subtitle" | "minimal";
|
|
1257
|
-
|
|
1258
|
-
/** Options for {@link applyTextOverlays} and {@link executeTextOverlay}. */
|
|
1259
|
-
export interface TextOverlayOptions {
|
|
1260
|
-
/** Path to the input video file */
|
|
1261
|
-
videoPath: string;
|
|
1262
|
-
/** Array of text lines to overlay */
|
|
1263
|
-
texts: string[];
|
|
1264
|
-
/** Path for the output video */
|
|
1265
|
-
outputPath: string;
|
|
1266
|
-
/** Text overlay style preset (default: "lower-third") */
|
|
1267
|
-
style?: TextOverlayStyle;
|
|
1268
|
-
/** Font size override (auto-calculated from video height if omitted) */
|
|
1269
|
-
fontSize?: number;
|
|
1270
|
-
/** Font color name (default: "white") */
|
|
1271
|
-
fontColor?: string;
|
|
1272
|
-
/** Fade in/out duration for text in seconds (default: 0.3) */
|
|
1273
|
-
fadeDuration?: number;
|
|
1274
|
-
/** Start time for text display in seconds (default: 0) */
|
|
1275
|
-
startTime?: number;
|
|
1276
|
-
/** End time for text display in seconds (default: video duration) */
|
|
1277
|
-
endTime?: number;
|
|
1278
|
-
}
|
|
1279
|
-
|
|
1280
|
-
/** Result from {@link applyTextOverlays} and {@link executeTextOverlay}. */
|
|
1281
|
-
export interface TextOverlayResult {
|
|
1282
|
-
/** Whether the operation succeeded */
|
|
1283
|
-
success: boolean;
|
|
1284
|
-
/** Absolute path to the output video */
|
|
1285
|
-
outputPath?: string;
|
|
1286
|
-
/** Error message on failure */
|
|
1287
|
-
error?: string;
|
|
1288
|
-
}
|
|
1289
|
-
|
|
1290
|
-
/**
|
|
1291
|
-
* Detect system font path for FFmpeg drawtext
|
|
1292
|
-
*/
|
|
1293
|
-
function detectSystemFont(): string | null {
|
|
1294
|
-
const platform = process.platform;
|
|
1295
|
-
if (platform === "darwin") {
|
|
1296
|
-
const candidates = [
|
|
1297
|
-
"/System/Library/Fonts/Helvetica.ttc",
|
|
1298
|
-
"/System/Library/Fonts/HelveticaNeue.ttc",
|
|
1299
|
-
"/Library/Fonts/Arial.ttf",
|
|
1300
|
-
];
|
|
1301
|
-
for (const f of candidates) {
|
|
1302
|
-
if (existsSync(f)) return f;
|
|
1303
|
-
}
|
|
1304
|
-
} else if (platform === "linux") {
|
|
1305
|
-
const candidates = [
|
|
1306
|
-
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
|
|
1307
|
-
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
|
|
1308
|
-
"/usr/share/fonts/TTF/DejaVuSans-Bold.ttf",
|
|
1309
|
-
];
|
|
1310
|
-
for (const f of candidates) {
|
|
1311
|
-
if (existsSync(f)) return f;
|
|
1312
|
-
}
|
|
1313
|
-
} else if (platform === "win32") {
|
|
1314
|
-
const candidates = [
|
|
1315
|
-
"C:\\Windows\\Fonts\\arial.ttf",
|
|
1316
|
-
"C:\\Windows\\Fonts\\segoeui.ttf",
|
|
1317
|
-
];
|
|
1318
|
-
for (const f of candidates) {
|
|
1319
|
-
if (existsSync(f)) return f;
|
|
1320
|
-
}
|
|
1321
|
-
}
|
|
1322
|
-
return null;
|
|
1323
|
-
}
|
|
1324
|
-
|
|
1325
|
-
/**
|
|
1326
|
-
* Get video resolution via ffprobe
|
|
1327
|
-
*/
|
|
1328
|
-
async function getVideoResolution(videoPath: string): Promise<{ width: number; height: number }> {
|
|
1329
|
-
const { stdout } = await execSafe("ffprobe", [
|
|
1330
|
-
"-v", "error", "-select_streams", "v:0", "-show_entries", "stream=width,height", "-of", "csv=p=0", videoPath,
|
|
1331
|
-
]);
|
|
1332
|
-
const [w, h] = stdout.trim().split(",").map(Number);
|
|
1333
|
-
return { width: w || 1920, height: h || 1080 };
|
|
1334
|
-
}
|
|
1335
|
-
|
|
1336
|
-
/**
|
|
1337
|
-
* Escape text for FFmpeg drawtext filter
|
|
1338
|
-
*/
|
|
1339
|
-
function escapeDrawtext(text: string): string {
|
|
1340
|
-
return text
|
|
1341
|
-
.replace(/\\/g, "\\\\\\\\")
|
|
1342
|
-
.replace(/'/g, "'\\\\\\''")
|
|
1343
|
-
.replace(/:/g, "\\\\:")
|
|
1344
|
-
.replace(/%/g, "\\\\%");
|
|
1345
|
-
}
|
|
1346
|
-
|
|
1347
|
-
/**
|
|
1348
|
-
* Apply text overlays to a video using FFmpeg drawtext filter.
|
|
1349
|
-
*
|
|
1350
|
-
* Supports multiple text lines with configurable style, position, font,
|
|
1351
|
-
* and fade-in/out. Auto-detects system fonts across macOS, Linux, and Windows.
|
|
1352
|
-
*
|
|
1353
|
-
* @param options - Text overlay configuration
|
|
1354
|
-
* @returns Result with absolute output path
|
|
1355
|
-
*/
|
|
1356
|
-
export async function applyTextOverlays(options: TextOverlayOptions): Promise<TextOverlayResult> {
|
|
1357
|
-
const {
|
|
1358
|
-
videoPath,
|
|
1359
|
-
texts,
|
|
1360
|
-
outputPath,
|
|
1361
|
-
style = "lower-third",
|
|
1362
|
-
fontSize: customFontSize,
|
|
1363
|
-
fontColor = "white",
|
|
1364
|
-
fadeDuration = 0.3,
|
|
1365
|
-
startTime = 0,
|
|
1366
|
-
} = options;
|
|
1367
|
-
|
|
1368
|
-
if (!texts || texts.length === 0) {
|
|
1369
|
-
return { success: false, error: "No texts provided" };
|
|
1370
|
-
}
|
|
1371
|
-
|
|
1372
|
-
const absVideoPath = resolve(process.cwd(), videoPath);
|
|
1373
|
-
const absOutputPath = resolve(process.cwd(), outputPath);
|
|
1374
|
-
|
|
1375
|
-
// Check video exists
|
|
1376
|
-
if (!existsSync(absVideoPath)) {
|
|
1377
|
-
return { success: false, error: `Video not found: ${absVideoPath}` };
|
|
1378
|
-
}
|
|
1379
|
-
|
|
1380
|
-
// Check FFmpeg
|
|
1381
|
-
if (!commandExists("ffmpeg")) {
|
|
1382
|
-
return { success: false, error: "FFmpeg not found. Please install FFmpeg." };
|
|
1383
|
-
}
|
|
1384
|
-
|
|
1385
|
-
// Check drawtext filter availability
|
|
1386
|
-
try {
|
|
1387
|
-
const { stdout } = await execSafe("ffmpeg", ["-filters"]);
|
|
1388
|
-
if (!stdout.includes("drawtext")) {
|
|
1389
|
-
const platform = process.platform;
|
|
1390
|
-
let hint = "";
|
|
1391
|
-
if (platform === "darwin") {
|
|
1392
|
-
hint = "\n\nFix: brew uninstall ffmpeg && brew install ffmpeg\n(The default homebrew formula includes libfreetype)";
|
|
1393
|
-
} else if (platform === "linux") {
|
|
1394
|
-
hint = "\n\nFix: sudo apt install ffmpeg (Ubuntu/Debian)\n or rebuild FFmpeg with --enable-libfreetype";
|
|
1395
|
-
}
|
|
1396
|
-
return {
|
|
1397
|
-
success: false,
|
|
1398
|
-
error: `FFmpeg 'drawtext' filter not available. Your FFmpeg was built without libfreetype.${hint}`,
|
|
1399
|
-
};
|
|
1400
|
-
}
|
|
1401
|
-
} catch {
|
|
1402
|
-
// If filter check fails, continue and let FFmpeg error naturally
|
|
1403
|
-
}
|
|
1404
|
-
|
|
1405
|
-
// Get video resolution for scaling
|
|
1406
|
-
const { width, height } = await getVideoResolution(absVideoPath);
|
|
1407
|
-
const baseFontSize = customFontSize || Math.round(height / 20);
|
|
1408
|
-
|
|
1409
|
-
// Get video duration for endTime default
|
|
1410
|
-
const videoDuration = await getVideoDuration(absVideoPath);
|
|
1411
|
-
const endTime = options.endTime ?? videoDuration;
|
|
1412
|
-
|
|
1413
|
-
// Detect font
|
|
1414
|
-
const fontPath = detectSystemFont();
|
|
1415
|
-
const fontFile = fontPath ? `fontfile=${fontPath}:` : "";
|
|
1416
|
-
|
|
1417
|
-
// Build drawtext filters based on style
|
|
1418
|
-
const filters: string[] = [];
|
|
1419
|
-
|
|
1420
|
-
for (let i = 0; i < texts.length; i++) {
|
|
1421
|
-
const escaped = escapeDrawtext(texts[i]);
|
|
1422
|
-
let x: string;
|
|
1423
|
-
let y: string;
|
|
1424
|
-
let fs: number;
|
|
1425
|
-
let fc: string = fontColor;
|
|
1426
|
-
let boxEnabled = 0;
|
|
1427
|
-
let boxColor = "black@0.5";
|
|
1428
|
-
let borderW = 0;
|
|
1429
|
-
|
|
1430
|
-
switch (style) {
|
|
1431
|
-
case "center-bold":
|
|
1432
|
-
x = "(w-text_w)/2";
|
|
1433
|
-
y = `(h-text_h)/2+${i * Math.round(baseFontSize * 1.4)}`;
|
|
1434
|
-
fs = Math.round(baseFontSize * 1.5);
|
|
1435
|
-
borderW = 3;
|
|
1436
|
-
break;
|
|
1437
|
-
case "subtitle":
|
|
1438
|
-
x = "(w-text_w)/2";
|
|
1439
|
-
y = `h-${Math.round(height * 0.12)}+${i * Math.round(baseFontSize * 1.3)}`;
|
|
1440
|
-
fs = baseFontSize;
|
|
1441
|
-
boxEnabled = 1;
|
|
1442
|
-
boxColor = "black@0.6";
|
|
1443
|
-
break;
|
|
1444
|
-
case "minimal":
|
|
1445
|
-
x = `${Math.round(width * 0.05)}`;
|
|
1446
|
-
y = `${Math.round(height * 0.05)}+${i * Math.round(baseFontSize * 1.3)}`;
|
|
1447
|
-
fs = Math.round(baseFontSize * 0.8);
|
|
1448
|
-
fc = "white@0.85";
|
|
1449
|
-
break;
|
|
1450
|
-
case "lower-third":
|
|
1451
|
-
default:
|
|
1452
|
-
x = `${Math.round(width * 0.05)}`;
|
|
1453
|
-
y = `h-${Math.round(height * 0.18)}+${i * Math.round(baseFontSize * 1.3)}`;
|
|
1454
|
-
fs = i === 0 ? Math.round(baseFontSize * 1.2) : baseFontSize;
|
|
1455
|
-
boxEnabled = 1;
|
|
1456
|
-
boxColor = "black@0.5";
|
|
1457
|
-
break;
|
|
1458
|
-
}
|
|
1459
|
-
|
|
1460
|
-
// Build alpha expression for fade in/out
|
|
1461
|
-
const fadeIn = `if(lt(t-${startTime}\\,${fadeDuration})\\,(t-${startTime})/${fadeDuration}\\,1)`;
|
|
1462
|
-
const fadeOut = `if(gt(t\\,${endTime - fadeDuration})\\,( ${endTime}-t)/${fadeDuration}\\,1)`;
|
|
1463
|
-
const alpha = `min(${fadeIn}\\,${fadeOut})`;
|
|
1464
|
-
|
|
1465
|
-
let filter = `drawtext=${fontFile}text='${escaped}':fontsize=${fs}:fontcolor=${fc}:x=${x}:y=${y}:borderw=${borderW}:enable='between(t\\,${startTime}\\,${endTime})'`;
|
|
1466
|
-
filter += `:alpha='${alpha}'`;
|
|
1467
|
-
if (boxEnabled) {
|
|
1468
|
-
filter += `:box=1:boxcolor=${boxColor}:boxborderw=8`;
|
|
1469
|
-
}
|
|
1470
|
-
|
|
1471
|
-
filters.push(filter);
|
|
1472
|
-
}
|
|
1473
|
-
|
|
1474
|
-
const filterChain = filters.join(",");
|
|
1475
|
-
try {
|
|
1476
|
-
await execSafe("ffmpeg", [
|
|
1477
|
-
"-i", absVideoPath, "-vf", filterChain, "-c:a", "copy", absOutputPath, "-y",
|
|
1478
|
-
], { timeout: 600000, maxBuffer: 50 * 1024 * 1024 });
|
|
1479
|
-
return { success: true, outputPath: absOutputPath };
|
|
1480
|
-
} catch (error) {
|
|
1481
|
-
return {
|
|
1482
|
-
success: false,
|
|
1483
|
-
error: `FFmpeg failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
1484
|
-
};
|
|
1485
|
-
}
|
|
1486
|
-
}
|
|
1487
|
-
|
|
1488
|
-
/**
|
|
1489
|
-
* Execute text overlay for CLI/Agent usage. Delegates to {@link applyTextOverlays}.
|
|
1490
|
-
*
|
|
1491
|
-
* @param options - Text overlay configuration
|
|
1492
|
-
* @returns Result with absolute output path
|
|
1493
|
-
*/
|
|
1494
|
-
export async function executeTextOverlay(options: TextOverlayOptions): Promise<TextOverlayResult> {
|
|
1495
|
-
return applyTextOverlays(options);
|
|
1496
|
-
}
|
|
1497
|
-
|
|
1498
|
-
// ============================================================================
|
|
1499
|
-
// Video Review (Gemini)
|
|
1500
|
-
// ============================================================================
|
|
1501
|
-
|
|
1502
|
-
/** A single auto-fixable issue identified during video review. */
|
|
1503
|
-
export interface AutoFix {
|
|
1504
|
-
/** Category of the fix */
|
|
1505
|
-
type: "color_grade" | "text_overlay_adjust" | "speed_adjust" | "crop";
|
|
1506
|
-
/** Human-readable description of the issue */
|
|
1507
|
-
description: string;
|
|
1508
|
-
/** FFmpeg filter string to apply the fix (if applicable) */
|
|
1509
|
-
ffmpegFilter?: string;
|
|
1510
|
-
}
|
|
1511
|
-
|
|
1512
|
-
/** Scored review for a single quality category. */
|
|
1513
|
-
export interface VideoReviewCategory {
|
|
1514
|
-
/** Quality score from 1-10 */
|
|
1515
|
-
score: number;
|
|
1516
|
-
/** List of identified issues */
|
|
1517
|
-
issues: string[];
|
|
1518
|
-
/** Whether the issues can be auto-fixed */
|
|
1519
|
-
fixable: boolean;
|
|
1520
|
-
/** Suggested FFmpeg filter for fixing (color category) */
|
|
1521
|
-
suggestedFilter?: string;
|
|
1522
|
-
/** Improvement suggestions (text readability category) */
|
|
1523
|
-
suggestions?: string[];
|
|
1524
|
-
}
|
|
1525
|
-
|
|
1526
|
-
/** Complete AI video review feedback from Gemini analysis. */
|
|
1527
|
-
export interface VideoReviewFeedback {
|
|
1528
|
-
/** Overall quality score from 1-10 */
|
|
1529
|
-
overallScore: number;
|
|
1530
|
-
/** Per-category quality assessments */
|
|
1531
|
-
categories: {
|
|
1532
|
-
pacing: VideoReviewCategory;
|
|
1533
|
-
color: VideoReviewCategory;
|
|
1534
|
-
textReadability: VideoReviewCategory;
|
|
1535
|
-
audioVisualSync: VideoReviewCategory;
|
|
1536
|
-
composition: VideoReviewCategory;
|
|
1537
|
-
};
|
|
1538
|
-
/** List of auto-fixable issues with FFmpeg filter suggestions */
|
|
1539
|
-
autoFixable: AutoFix[];
|
|
1540
|
-
/** General improvement recommendations */
|
|
1541
|
-
recommendations: string[];
|
|
1542
|
-
}
|