@lightcone-ai/daemon 0.15.46 → 0.15.48
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -9,6 +9,53 @@ const DEFAULT_WIDTH = 1080;
|
|
|
9
9
|
const DEFAULT_HEIGHT = 1920;
|
|
10
10
|
const DEFAULT_FPS = 30;
|
|
11
11
|
const TRANSITION_DURATION = 0.5;
|
|
12
|
+
const SUBTITLE_FONT = 'PingFang SC,Microsoft YaHei,Arial';
|
|
13
|
+
const SUBTITLE_FONT_SIZE = 72;
|
|
14
|
+
const SUBTITLE_MARGIN_V = 120;
|
|
15
|
+
|
|
16
|
+
function msToAssTimestamp(ms) {
|
|
17
|
+
const totalCs = Math.round(Math.max(0, ms) / 10);
|
|
18
|
+
const cs = totalCs % 100;
|
|
19
|
+
const totalSec = Math.floor(totalCs / 100);
|
|
20
|
+
const sec = totalSec % 60;
|
|
21
|
+
const min = Math.floor(totalSec / 60) % 60;
|
|
22
|
+
const hr = Math.floor(totalSec / 3600);
|
|
23
|
+
return `${hr}:${String(min).padStart(2, '0')}:${String(sec).padStart(2, '0')}.${String(cs).padStart(2, '0')}`;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function wrapSubtitleText(text, maxChars = 14) {
|
|
27
|
+
const chars = Array.from(String(text ?? ''));
|
|
28
|
+
if (chars.length <= maxChars) return chars.join('');
|
|
29
|
+
const lines = [];
|
|
30
|
+
for (let i = 0; i < chars.length; i += maxChars) {
|
|
31
|
+
lines.push(chars.slice(i, i + maxChars).join(''));
|
|
32
|
+
}
|
|
33
|
+
return lines.join('\\N');
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function buildAssContent(subtitles, { playResX = DEFAULT_WIDTH, playResY = DEFAULT_HEIGHT } = {}) {
|
|
37
|
+
const maxChars = Math.floor((playResX - 60) / SUBTITLE_FONT_SIZE);
|
|
38
|
+
const header = [
|
|
39
|
+
'[Script Info]',
|
|
40
|
+
'ScriptType: v4.00+',
|
|
41
|
+
`PlayResX: ${playResX}`,
|
|
42
|
+
`PlayResY: ${playResY}`,
|
|
43
|
+
'WrapStyle: 2',
|
|
44
|
+
'',
|
|
45
|
+
'[V4+ Styles]',
|
|
46
|
+
'Format: Name, Fontname, Fontsize, PrimaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding',
|
|
47
|
+
`Style: Default,${SUBTITLE_FONT},${SUBTITLE_FONT_SIZE},&H00FFFFFF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,4,0,2,30,30,${SUBTITLE_MARGIN_V},1`,
|
|
48
|
+
'',
|
|
49
|
+
'[Events]',
|
|
50
|
+
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
|
|
51
|
+
].join('\n');
|
|
52
|
+
const events = subtitles.map(({ text, start_ms, end_ms }) => {
|
|
53
|
+
const wrapped = wrapSubtitleText(text, maxChars);
|
|
54
|
+
const safe = wrapped.replace(/\r?\n/g, '\\N').replace(/,/g, '{\\,}');
|
|
55
|
+
return `Dialogue: 0,${msToAssTimestamp(start_ms)},${msToAssTimestamp(end_ms)},Default,,0,0,0,,${safe}`;
|
|
56
|
+
});
|
|
57
|
+
return `${header}\n${events.join('\n')}\n`;
|
|
58
|
+
}
|
|
12
59
|
|
|
13
60
|
async function fileExists(p) {
|
|
14
61
|
try { await access(p, fsConstants.R_OK); return true; } catch { return false; }
|
|
@@ -245,7 +292,8 @@ export async function composeVideoV2({
|
|
|
245
292
|
finalClip = await silentClip({ videoPath: visualClip.path, duration: visualClip.duration, tmpDir });
|
|
246
293
|
}
|
|
247
294
|
|
|
248
|
-
|
|
295
|
+
const subtitleText = typeof seg.subtitle_text === 'string' ? seg.subtitle_text.trim() : '';
|
|
296
|
+
readyClips.push({ path: finalClip, duration: visualClip.duration, transition, subtitleText });
|
|
249
297
|
}
|
|
250
298
|
|
|
251
299
|
const outroClips = [];
|
|
@@ -256,6 +304,20 @@ export async function composeVideoV2({
|
|
|
256
304
|
}
|
|
257
305
|
}
|
|
258
306
|
|
|
307
|
+
// Build subtitle entries with cumulative timeline timestamps
|
|
308
|
+
let cursorMs = 0;
|
|
309
|
+
const subtitleEntries = [];
|
|
310
|
+
for (const clip of readyClips) {
|
|
311
|
+
if (clip.subtitleText) {
|
|
312
|
+
subtitleEntries.push({
|
|
313
|
+
text: clip.subtitleText,
|
|
314
|
+
start_ms: cursorMs,
|
|
315
|
+
end_ms: cursorMs + Math.round(clip.duration * 1000),
|
|
316
|
+
});
|
|
317
|
+
}
|
|
318
|
+
cursorMs += Math.round(clip.duration * 1000);
|
|
319
|
+
}
|
|
320
|
+
|
|
259
321
|
const allClips = [];
|
|
260
322
|
let accumulated = readyClips[0].path;
|
|
261
323
|
for (let i = 1; i < readyClips.length; i++) {
|
|
@@ -271,10 +333,29 @@ export async function composeVideoV2({
|
|
|
271
333
|
|
|
272
334
|
const finalSequence = [...allClips, ...outroClips];
|
|
273
335
|
|
|
336
|
+
// Compose without subtitles first (subtitles are burned in a separate pass)
|
|
337
|
+
const preSubPath = subtitleEntries.length > 0
|
|
338
|
+
? path.join(tmpDir, `pre-sub-${randomUUID().slice(0, 8)}.mp4`)
|
|
339
|
+
: outPath;
|
|
340
|
+
|
|
274
341
|
if (finalSequence.length === 1) {
|
|
275
|
-
await runFfmpeg(['-i', finalSequence[0], '-c', 'copy', '-movflags', '+faststart',
|
|
342
|
+
await runFfmpeg(['-i', finalSequence[0], '-c', 'copy', '-movflags', '+faststart', preSubPath], 'ffmpeg copy');
|
|
276
343
|
} else {
|
|
277
|
-
await concatWithCuts({ clips: finalSequence, outputPath:
|
|
344
|
+
await concatWithCuts({ clips: finalSequence, outputPath: preSubPath });
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// Burn subtitles into final output
|
|
348
|
+
if (subtitleEntries.length > 0) {
|
|
349
|
+
const assPath = path.join(tmpDir, `subs-${randomUUID().slice(0, 8)}.ass`);
|
|
350
|
+
await writeFile(assPath, buildAssContent(subtitleEntries, { playResX: width, playResY: height }));
|
|
351
|
+
const escapedAssPath = assPath.replace(/\\/g, '/').replace(/:/g, '\\:').replace(/'/g, "\\'");
|
|
352
|
+
await runFfmpeg([
|
|
353
|
+
'-i', preSubPath,
|
|
354
|
+
'-vf', `subtitles='${escapedAssPath}'`,
|
|
355
|
+
'-c:a', 'copy',
|
|
356
|
+
'-movflags', '+faststart',
|
|
357
|
+
outPath,
|
|
358
|
+
], 'ffmpeg burn-subtitles');
|
|
278
359
|
}
|
|
279
360
|
|
|
280
361
|
const totalDuration = await probeDurationSec(outPath);
|
package/src/chat-bridge.js
CHANGED
|
@@ -2,17 +2,9 @@
|
|
|
2
2
|
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
3
3
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
4
4
|
import { z } from 'zod';
|
|
5
|
-
import { createReadStream, existsSync, mkdirSync, readFileSync,
|
|
5
|
+
import { createReadStream, existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
|
|
6
6
|
import { createHash, randomUUID } from 'crypto';
|
|
7
7
|
import path, { extname } from 'path';
|
|
8
|
-
import { homedir } from 'os';
|
|
9
|
-
import {
|
|
10
|
-
buildAssContent,
|
|
11
|
-
concatVideos,
|
|
12
|
-
muxAudioToVideo,
|
|
13
|
-
probeDurationMs,
|
|
14
|
-
transcodeForPlatform,
|
|
15
|
-
} from './_vendor/video/composer/index.js';
|
|
16
8
|
import { recordUrlNarration } from './_vendor/video/recorder/index.js';
|
|
17
9
|
import {
|
|
18
10
|
VIDEO_EXT,
|
|
@@ -24,6 +16,7 @@ import { runSubmitToLibraryTool } from './submit-to-library-tool.js';
|
|
|
24
16
|
import { runRenderTextToImageTool } from './tools/render-text-to-image.js';
|
|
25
17
|
import { runRenderHtmlToImageTool } from './tools/render-html-to-image.js';
|
|
26
18
|
import { runSynthesisTtsTool } from './tools/synthesize-tts.js';
|
|
19
|
+
import { runPlanVideoSegmentsTool } from './tools/plan-video-segments.js';
|
|
27
20
|
import { runComposeVideoV2Tool } from './tools/compose-video-v2.js';
|
|
28
21
|
import { runTakePageScreenshotTool } from './tools/take-page-screenshot.js';
|
|
29
22
|
import { runGetLibraryFileTool } from './tools/get-library-file.js';
|
|
@@ -75,18 +68,13 @@ function redactTokenPrefix(token) {
|
|
|
75
68
|
// Current active workspaceId for memory isolation (defaults to spawn-time WORKSPACE_ID)
|
|
76
69
|
let currentWorkspaceId = WORKSPACE_ID;
|
|
77
70
|
|
|
78
|
-
const VOICEOVER_LOCAL_DIR = path.join(WORKSPACE_DIR, 'artifacts', 'audio');
|
|
79
|
-
const VIDEO_COMPOSE_LOCAL_DIR = path.join(WORKSPACE_DIR, 'artifacts', 'video');
|
|
80
|
-
const DEFAULT_OUTRO_PATH = path.join(homedir(), '.lightcone', 'assets', 'outros', 'default.mp4');
|
|
81
71
|
// Temporary: block legacy video pipeline tools for a specific editor_in_chief agent.
|
|
82
72
|
// Set via env so this doesn't need a code change when workspace/agent IDs rotate.
|
|
83
73
|
// Remove entirely once the new atomic tool framework is stable and the legacy pipeline retires.
|
|
84
74
|
const CVMAX_WORKSPACE_ID = process.env.BLOCKED_EDITOR_WORKSPACE_ID ?? '';
|
|
85
75
|
const CVMAX_EDITOR_IN_CHIEF_AGENT_ID = process.env.BLOCKED_EDITOR_AGENT_ID ?? '';
|
|
86
76
|
const CVMAX_EDITOR_BLOCKED_VIDEO_TOOLS = new Set([
|
|
87
|
-
'generate_voiceover',
|
|
88
77
|
'record_url_narration',
|
|
89
|
-
'compose_video',
|
|
90
78
|
'submit_to_library',
|
|
91
79
|
]);
|
|
92
80
|
|
|
@@ -132,27 +120,6 @@ function cvmaxEditorVideoToolError(toolName) {
|
|
|
132
120
|
};
|
|
133
121
|
}
|
|
134
122
|
|
|
135
|
-
function normalizeVoiceFormat(value) {
|
|
136
|
-
const normalized = String(value ?? '').trim().toLowerCase();
|
|
137
|
-
if (!normalized) return 'mp3';
|
|
138
|
-
if (['mp3', 'wav', 'flac'].includes(normalized)) return normalized;
|
|
139
|
-
return 'mp3';
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
function inferAudioExtension(url, format = 'mp3') {
|
|
143
|
-
const normalizedFormat = normalizeVoiceFormat(format);
|
|
144
|
-
if (typeof url === 'string' && url.trim()) {
|
|
145
|
-
try {
|
|
146
|
-
const pathname = new URL(url).pathname;
|
|
147
|
-
const ext = extname(pathname).toLowerCase();
|
|
148
|
-
if (ext && ['.mp3', '.wav', '.flac'].includes(ext)) return ext;
|
|
149
|
-
} catch {
|
|
150
|
-
// noop
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
return `.${normalizedFormat}`;
|
|
154
|
-
}
|
|
155
|
-
|
|
156
123
|
function isInsideDir(filePath, dir) {
|
|
157
124
|
const rel = path.relative(dir, filePath);
|
|
158
125
|
return rel === '' || (!!rel && !rel.startsWith('..') && !path.isAbsolute(rel));
|
|
@@ -168,58 +135,6 @@ function resolveLocalWorkspaceFile(filePath) {
|
|
|
168
135
|
throw new Error(`Local file must be inside the agent workspace or workspace shared artifacts/notes/tmp directories. Got: ${filePath}`);
|
|
169
136
|
}
|
|
170
137
|
|
|
171
|
-
function normalizeComposePath(filePath, label) {
|
|
172
|
-
const normalized = String(filePath ?? '').trim();
|
|
173
|
-
if (!normalized) throw new Error(`${label} is required.`);
|
|
174
|
-
return path.resolve(WORKSPACE_DIR, normalized);
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
function normalizeComposeTarget(value) {
|
|
178
|
-
const normalized = String(value ?? '').trim().toLowerCase();
|
|
179
|
-
if (!normalized) return 'short_video_cn';
|
|
180
|
-
if (['short_video_cn', 'douyin', 'xhs'].includes(normalized)) return normalized;
|
|
181
|
-
throw new Error(`Unsupported compose target: ${value}`);
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
function normalizeComposeAudioSegments(audioSegments) {
|
|
185
|
-
if (!Array.isArray(audioSegments) || audioSegments.length === 0) {
|
|
186
|
-
throw new Error('audio_segments must be a non-empty array.');
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
return audioSegments.map((segment, index) => {
|
|
190
|
-
if (!segment || typeof segment !== 'object' || Array.isArray(segment)) {
|
|
191
|
-
throw new Error(`audio_segments[${index}] must be an object.`);
|
|
192
|
-
}
|
|
193
|
-
const audioPath = normalizeComposePath(
|
|
194
|
-
segment.audio_path ?? segment.audioPath,
|
|
195
|
-
`audio_segments[${index}].audio_path`
|
|
196
|
-
);
|
|
197
|
-
const startMsRaw = segment.start_ms ?? segment.startMs;
|
|
198
|
-
const startMs = startMsRaw == null ? null : Number(startMsRaw);
|
|
199
|
-
if (startMsRaw != null && (!Number.isFinite(startMs) || startMs < 0)) {
|
|
200
|
-
throw new Error(`audio_segments[${index}].start_ms must be a non-negative number.`);
|
|
201
|
-
}
|
|
202
|
-
const phase = String(segment.phase ?? segment.phase_id ?? segment.phaseId ?? '').trim();
|
|
203
|
-
if (startMs == null && !phase) {
|
|
204
|
-
throw new Error(`audio_segments[${index}] requires start_ms, or provide phase with events_log.`);
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
const normalized = { audio_path: audioPath };
|
|
208
|
-
if (startMs != null) normalized.start_ms = Math.floor(startMs);
|
|
209
|
-
if (phase) normalized.phase = phase;
|
|
210
|
-
return {
|
|
211
|
-
...normalized,
|
|
212
|
-
};
|
|
213
|
-
});
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
function cleanupLocalFiles(paths = []) {
|
|
217
|
-
for (const filePath of paths) {
|
|
218
|
-
if (!filePath) continue;
|
|
219
|
-
try { rmSync(filePath, { force: true }); } catch { /* noop */ }
|
|
220
|
-
}
|
|
221
|
-
}
|
|
222
|
-
|
|
223
138
|
const DEFAULT_TOOL_CLASSIFICATION = {
|
|
224
139
|
check_messages: 'local',
|
|
225
140
|
list_memory: 'local',
|
|
@@ -247,9 +162,7 @@ const DEFAULT_TOOL_CLASSIFICATION = {
|
|
|
247
162
|
update_goal_field: 'mandatory',
|
|
248
163
|
supersede_goal_field: 'mandatory',
|
|
249
164
|
request_credential_auth: 'mandatory',
|
|
250
|
-
generate_voiceover: 'mandatory',
|
|
251
165
|
record_url_narration: 'mandatory',
|
|
252
|
-
compose_video: 'mandatory',
|
|
253
166
|
submit_to_library: 'mandatory',
|
|
254
167
|
register_data_source: 'mandatory',
|
|
255
168
|
bind_workspace_scenario: 'mandatory',
|
|
@@ -385,7 +298,6 @@ function inferToolForApi(method, apiPath, body) {
|
|
|
385
298
|
if (method === 'POST' && cleanPath === '/goal-fields/update') return 'update_goal_field';
|
|
386
299
|
if (method === 'POST' && cleanPath === '/goal-fields/supersede') return 'supersede_goal_field';
|
|
387
300
|
if (method === 'POST' && cleanPath === '/credential-auth/request') return 'request_credential_auth';
|
|
388
|
-
if (method === 'POST' && cleanPath === '/tts/voiceover') return 'generate_voiceover';
|
|
389
301
|
if (method === 'POST' && cleanPath === '/content-library/submit') return 'submit_to_library';
|
|
390
302
|
if (method === 'POST' && cleanPath === '/api/data-sources') return 'register_data_source';
|
|
391
303
|
if (method === 'POST' && cleanPath === '/orchestrate/decision') return 'write_governance_decision';
|
|
@@ -1446,24 +1358,44 @@ server.tool('synthesize_tts',
|
|
|
1446
1358
|
async (args) => runSynthesisTtsTool({ ...args, currentWorkspaceId, api })
|
|
1447
1359
|
);
|
|
1448
1360
|
|
|
1361
|
+
// ── plan_video_segments ────────────────────────────────────────────────────────
|
|
1362
|
+
server.tool('plan_video_segments',
|
|
1363
|
+
'Universal audio-video sync planning step. For each segment, call TTS to get the real audio duration, then compute the visual duration with a safety buffer. Returns a planned segments array ready to pass directly to compose_video_v2 (with audio_path, presentation.duration/per_card_duration, and subtitle_text pre-filled). Always call this before compose_video_v2 when you have narration text.',
|
|
1364
|
+
{
|
|
1365
|
+
segments: z.array(z.object({
|
|
1366
|
+
text: z.string().describe('Narration text for this segment. TTS will be generated from this.'),
|
|
1367
|
+
visual_kind: z.enum(['image', 'video', 'gif', 'carousel']).describe('Type of visual.'),
|
|
1368
|
+
visual_path: z.string().optional().describe('Absolute path to a single image, video, or gif file.'),
|
|
1369
|
+
visual_paths: z.array(z.string()).optional().describe('For carousel: array of image paths, one per card.'),
|
|
1370
|
+
transition: z.enum(['cut', 'fade', 'crossfade']).optional().describe('Transition to next segment. Default cut.'),
|
|
1371
|
+
presentation: z.object({
|
|
1372
|
+
style: z.enum(['static', 'scroll']).optional(),
|
|
1373
|
+
}).optional().describe('Partial presentation hints (style only). duration/per_card_duration are computed from TTS.'),
|
|
1374
|
+
})).describe('Segments to plan. Each must have narration text and visual info.'),
|
|
1375
|
+
voice_id: z.string().optional().describe('TTS voice ID. Omit to use workspace default.'),
|
|
1376
|
+
workspace_id: z.string().optional().describe('Target workspace. Defaults to current workspace context.'),
|
|
1377
|
+
},
|
|
1378
|
+
async (args) => runPlanVideoSegmentsTool({ ...args, currentWorkspaceId, api })
|
|
1379
|
+
);
|
|
1380
|
+
|
|
1449
1381
|
// ── compose_video_v2 ───────────────────────────────────────────────────────────
|
|
1450
1382
|
server.tool('compose_video_v2',
|
|
1451
|
-
'Compose a video from a list of segments using ffmpeg. Each segment has a visual source (image/scroll/carousel/video/gif) and optional
|
|
1383
|
+
'Compose a video from a list of segments using ffmpeg. Each segment has a visual source (image/scroll/carousel/video/gif), optional audio, and optional subtitle text. Subtitles are burned into the video by default when subtitle_text is provided. Segments are concatenated in order; outro clips are appended at the end. Returns a local mp4 path.\n\nTypical flow: plan_video_segments → compose_video_v2 (segments output fed directly in).',
|
|
1452
1384
|
{
|
|
1453
1385
|
segments: z.array(z.object({
|
|
1454
1386
|
visual_path: z.string().optional().describe('Absolute path to a single image, video, or gif file.'),
|
|
1455
1387
|
visual_paths: z.array(z.string()).optional().describe('For carousel: array of image paths, one per card.'),
|
|
1456
|
-
visual_kind: z.enum(['image', 'video', 'gif', 'carousel']).describe('Type of visual.
|
|
1388
|
+
visual_kind: z.enum(['image', 'video', 'gif', 'carousel']).describe('Type of visual.'),
|
|
1457
1389
|
presentation: z.object({
|
|
1458
|
-
style: z.enum(['static', 'scroll']).optional().describe('For image: static (default) or scroll (pan upward).
|
|
1459
|
-
duration: z.number().optional().describe('Segment duration in seconds. Required for image/scroll.
|
|
1390
|
+
style: z.enum(['static', 'scroll']).optional().describe('For image: static (default) or scroll (pan upward).'),
|
|
1391
|
+
duration: z.number().optional().describe('Segment duration in seconds. Required for image/scroll.'),
|
|
1460
1392
|
per_card_duration: z.number().optional().describe('Seconds per card for carousel.'),
|
|
1461
1393
|
}).optional(),
|
|
1462
|
-
audio_path: z.string().nullable().optional().describe('Absolute path to
|
|
1394
|
+
audio_path: z.string().nullable().optional().describe('Absolute path to audio (mp3). null or omit for silence.'),
|
|
1395
|
+
subtitle_text: z.string().optional().describe('Narration text to burn as subtitle for this segment. Displayed for the full segment duration.'),
|
|
1463
1396
|
transition: z.enum(['cut', 'fade', 'crossfade']).optional().describe('Transition to next segment. Default cut.'),
|
|
1464
1397
|
})).describe('Ordered list of video segments.'),
|
|
1465
1398
|
outro_paths: z.array(z.string()).optional().describe('Absolute paths to outro video clips appended after all segments.'),
|
|
1466
|
-
format: z.string().optional().describe('Aspect ratio. Default "9:16".'),
|
|
1467
1399
|
resolution: z.string().optional().describe('Output resolution WxH. Default "1080x1920".'),
|
|
1468
1400
|
output_path: z.string().optional().describe('Absolute output path for the mp4. Auto-generated if omitted.'),
|
|
1469
1401
|
},
|
|
@@ -1495,89 +1427,9 @@ server.tool('get_library_file',
|
|
|
1495
1427
|
async (args) => runGetLibraryFileTool({ ...args, currentWorkspaceId, api, SERVER_URL, MACHINE_API_KEY, workspaceDir: WORKSPACE_DIR })
|
|
1496
1428
|
);
|
|
1497
1429
|
|
|
1498
|
-
// ── generate_voiceover ─────────────────────────────────────────────────────────
|
|
1499
|
-
server.tool('generate_voiceover',
|
|
1500
|
-
'Generate a TTS voiceover using an authorized tts_provider credential and return a local audio file path.',
|
|
1501
|
-
{
|
|
1502
|
-
workspace_id: z.string().optional().describe('Target workspace id. Defaults to current workspace context.'),
|
|
1503
|
-
text: z.string().describe('Text content to synthesize.'),
|
|
1504
|
-
voice_preset: z.string().optional().describe('Platform-neutral voice preset id, e.g. "warm_female_zh_01".'),
|
|
1505
|
-
speed: z.number().optional().describe('Speech speed. Typical range is 0.5 to 2.0.'),
|
|
1506
|
-
format: z.enum(['mp3', 'wav', 'flac']).optional().describe('Audio format. Defaults to mp3.'),
|
|
1507
|
-
credential_id: z.string().optional().describe('Optional explicit credential id. If omitted, uses latest granted tts_provider credential.'),
|
|
1508
|
-
},
|
|
1509
|
-
async ({ workspace_id, text, voice_preset, speed, format, credential_id }) => {
|
|
1510
|
-
if (isBlockedCvmaxEditorVideoTool('generate_voiceover')) {
|
|
1511
|
-
return cvmaxEditorVideoToolError('generate_voiceover');
|
|
1512
|
-
}
|
|
1513
|
-
const targetWorkspaceId = (workspace_id ?? currentWorkspaceId ?? WORKSPACE_ID ?? '').trim();
|
|
1514
|
-
if (!targetWorkspaceId) {
|
|
1515
|
-
return { isError: true, content: [{ type: 'text', text: 'workspace_id is required (no current workspace context).' }] };
|
|
1516
|
-
}
|
|
1517
|
-
|
|
1518
|
-
const normalizedText = String(text ?? '').trim();
|
|
1519
|
-
if (!normalizedText) {
|
|
1520
|
-
return { isError: true, content: [{ type: 'text', text: 'text is required for generate_voiceover.' }] };
|
|
1521
|
-
}
|
|
1522
|
-
|
|
1523
|
-
const normalizedSpeed = speed == null ? 1 : Number(speed);
|
|
1524
|
-
if (!Number.isFinite(normalizedSpeed)) {
|
|
1525
|
-
return { isError: true, content: [{ type: 'text', text: 'speed must be numeric.' }] };
|
|
1526
|
-
}
|
|
1527
|
-
|
|
1528
|
-
const normalizedFormat = normalizeVoiceFormat(format);
|
|
1529
|
-
const payload = {
|
|
1530
|
-
workspace_id: targetWorkspaceId,
|
|
1531
|
-
text: normalizedText,
|
|
1532
|
-
speed: normalizedSpeed,
|
|
1533
|
-
format: normalizedFormat,
|
|
1534
|
-
};
|
|
1535
|
-
if (voice_preset) payload.voice_preset = String(voice_preset).trim();
|
|
1536
|
-
if (credential_id) payload.credential_id = String(credential_id).trim();
|
|
1537
|
-
|
|
1538
|
-
const data = await api('POST', '/tts/voiceover', payload);
|
|
1539
|
-
const remoteAudioUrl = String(data.audio_url ?? '').trim();
|
|
1540
|
-
if (!remoteAudioUrl) {
|
|
1541
|
-
return { isError: true, content: [{ type: 'text', text: 'Voiceover API did not return audio_url.' }] };
|
|
1542
|
-
}
|
|
1543
|
-
|
|
1544
|
-
const downloadRes = await fetch(remoteAudioUrl, {
|
|
1545
|
-
method: 'GET',
|
|
1546
|
-
headers: { 'Authorization': `Bearer ${MACHINE_API_KEY}` },
|
|
1547
|
-
});
|
|
1548
|
-
if (!downloadRes.ok) {
|
|
1549
|
-
return {
|
|
1550
|
-
isError: true,
|
|
1551
|
-
content: [{ type: 'text', text: `Failed to download synthesized audio (${downloadRes.status})` }],
|
|
1552
|
-
};
|
|
1553
|
-
}
|
|
1554
|
-
|
|
1555
|
-
const fileBuffer = Buffer.from(await downloadRes.arrayBuffer());
|
|
1556
|
-
mkdirSync(VOICEOVER_LOCAL_DIR, { recursive: true });
|
|
1557
|
-
const audioExt = inferAudioExtension(remoteAudioUrl, data.format ?? normalizedFormat);
|
|
1558
|
-
const localFileName = `voiceover-${Date.now()}-${randomUUID().slice(0, 8)}${audioExt}`;
|
|
1559
|
-
const localAudioPath = path.join(VOICEOVER_LOCAL_DIR, localFileName);
|
|
1560
|
-
writeFileSync(localAudioPath, fileBuffer);
|
|
1561
|
-
|
|
1562
|
-
return {
|
|
1563
|
-
content: [{
|
|
1564
|
-
type: 'text',
|
|
1565
|
-
text:
|
|
1566
|
-
`Voiceover generated.\n` +
|
|
1567
|
-
`workspace_id=${data.workspace_id ?? targetWorkspaceId}\n` +
|
|
1568
|
-
`local_audio_path=${localAudioPath}\n` +
|
|
1569
|
-
`duration_ms=${data.duration_ms ?? 'unknown'}\n` +
|
|
1570
|
-
`sample_rate=${data.sample_rate ?? 'unknown'}\n` +
|
|
1571
|
-
`format=${data.format ?? normalizedFormat}\n` +
|
|
1572
|
-
`size=${formatBytes(fileBuffer.length)}`,
|
|
1573
|
-
}],
|
|
1574
|
-
};
|
|
1575
|
-
}
|
|
1576
|
-
);
|
|
1577
|
-
|
|
1578
1430
|
// ── record_url_narration ────────────────────────────────────────────────────────
|
|
1579
1431
|
server.tool('record_url_narration',
|
|
1580
|
-
'Record a silent video of a URL by orchestrating Xvfb + Chromium + ffmpeg, driven by a video plan. Outputs a silent mp4
|
|
1432
|
+
'Record a silent video of a URL by orchestrating Xvfb + Chromium + ffmpeg, driven by a video plan. Outputs a silent mp4 that can be passed to compose_video_v2 as a video-kind segment with an audio_path for narration.\n\nUse this as the canonical recording step for URL-narration videos. Falls back: if the page needs interactions outside the visual_action vocabulary (clicks, waits, OCR loops), use Monitor (Bash) with custom Playwright instead.\n\nRuntime requirements: this tool only works on a Linux daemon machine with Xvfb + ffmpeg (x11grab) + Chromium installed. macOS / Windows daemons will fail at startup.',
|
|
1581
1433
|
{
|
|
1582
1434
|
url: z.string().describe('Page URL to record'),
|
|
1583
1435
|
plan: z.record(z.any()).describe('Must be the full output from detail_sections (not plan_video). detail_sections output includes detail_sections_version, sections[], audio metadata, and dwell_ms per phase.'),
|
|
@@ -1604,126 +1456,6 @@ server.tool('record_url_narration',
|
|
|
1604
1456
|
}
|
|
1605
1457
|
);
|
|
1606
1458
|
|
|
1607
|
-
// ── compose_video ───────────────────────────────────────────────────────────────
|
|
1608
|
-
server.tool('compose_video',
|
|
1609
|
-
'Compose a final short video by muxing audio onto a base video, optionally burning subtitles, concatenating an outro, and transcoding to platform spec.',
|
|
1610
|
-
{
|
|
1611
|
-
video_path: z.string().describe('Base silent video path. Relative paths resolve from the current workspace.'),
|
|
1612
|
-
audio_segments: z.array(z.object({
|
|
1613
|
-
audio_path: z.string().describe('Audio file path for one narration segment.'),
|
|
1614
|
-
start_ms: z.union([z.number(), z.string()]).optional().describe('Segment start offset in milliseconds.'),
|
|
1615
|
-
phase: z.string().optional().describe('Optional phase id. Used with events_log to derive start time.'),
|
|
1616
|
-
})).describe('Ordered or unordered narration audio segments.'),
|
|
1617
|
-
events_log: z.array(z.any()).optional().describe('Optional recorder event log. Used to resolve segment start time by phase.'),
|
|
1618
|
-
subtitles: z.array(z.object({
|
|
1619
|
-
text: z.string().describe('Subtitle text for this segment (the narration sentence).'),
|
|
1620
|
-
start_ms: z.number().describe('Subtitle start time in milliseconds.'),
|
|
1621
|
-
end_ms: z.number().describe('Subtitle end time in milliseconds.'),
|
|
1622
|
-
})).optional().describe('Subtitle segments to burn into the video. Pass each phase sentence text (from detail_sections sections[].sentence) with cumulative start/end time derived from dwell_ms. Omit to produce no subtitles.'),
|
|
1623
|
-
outro_path: z.string().optional().describe('Optional outro mp4 path. If omitted, uses ~/.lightcone/assets/outros/default.mp4 when present.'),
|
|
1624
|
-
target: z.enum(['short_video_cn', 'douyin', 'xhs']).optional().describe('Transcode target profile. Defaults to short_video_cn.'),
|
|
1625
|
-
},
|
|
1626
|
-
async ({ video_path, audio_segments, events_log, subtitles, outro_path, target }) => {
|
|
1627
|
-
if (isBlockedCvmaxEditorVideoTool('compose_video')) {
|
|
1628
|
-
return cvmaxEditorVideoToolError('compose_video');
|
|
1629
|
-
}
|
|
1630
|
-
const composeInput = { video_path, audio_segments, events_log, subtitles, outro_path, target };
|
|
1631
|
-
try {
|
|
1632
|
-
const result = await runMandatoryLocalTool({
|
|
1633
|
-
toolName: 'compose_video',
|
|
1634
|
-
toolInput: composeInput,
|
|
1635
|
-
executor: async (checkedInput) => {
|
|
1636
|
-
const videoPath = normalizeComposePath(checkedInput.video_path, 'video_path');
|
|
1637
|
-
const audioSegments = normalizeComposeAudioSegments(checkedInput.audio_segments);
|
|
1638
|
-
const eventsLog = Array.isArray(checkedInput.events_log) ? checkedInput.events_log : [];
|
|
1639
|
-
const targetProfile = normalizeComposeTarget(checkedInput.target);
|
|
1640
|
-
const requestedOutroPath = String(checkedInput.outro_path ?? '').trim();
|
|
1641
|
-
|
|
1642
|
-
let resolvedOutroPath = null;
|
|
1643
|
-
if (requestedOutroPath) {
|
|
1644
|
-
resolvedOutroPath = path.resolve(WORKSPACE_DIR, requestedOutroPath);
|
|
1645
|
-
if (!existsSync(resolvedOutroPath)) {
|
|
1646
|
-
throw new Error(`outro_path not found: ${resolvedOutroPath}`);
|
|
1647
|
-
}
|
|
1648
|
-
} else if (existsSync(DEFAULT_OUTRO_PATH)) {
|
|
1649
|
-
resolvedOutroPath = DEFAULT_OUTRO_PATH;
|
|
1650
|
-
}
|
|
1651
|
-
|
|
1652
|
-
mkdirSync(VIDEO_COMPOSE_LOCAL_DIR, { recursive: true });
|
|
1653
|
-
const runId = `${Date.now()}-${randomUUID().slice(0, 8)}`;
|
|
1654
|
-
const muxedPath = path.join(VIDEO_COMPOSE_LOCAL_DIR, `compose-${runId}.muxed.mp4`);
|
|
1655
|
-
const concatPath = path.join(VIDEO_COMPOSE_LOCAL_DIR, `compose-${runId}.concat.mp4`);
|
|
1656
|
-
const finalPath = path.join(VIDEO_COMPOSE_LOCAL_DIR, `compose-${runId}.final.mp4`);
|
|
1657
|
-
const assPath = path.join(VIDEO_COMPOSE_LOCAL_DIR, `compose-${runId}.ass`);
|
|
1658
|
-
const intermediates = [muxedPath, concatPath, assPath];
|
|
1659
|
-
|
|
1660
|
-
const subtitleSegments = Array.isArray(checkedInput.subtitles) ? checkedInput.subtitles : [];
|
|
1661
|
-
let subtitlesAssPath = null;
|
|
1662
|
-
if (subtitleSegments.length > 0) {
|
|
1663
|
-
const assContent = buildAssContent(subtitleSegments);
|
|
1664
|
-
writeFileSync(assPath, assContent, 'utf8');
|
|
1665
|
-
subtitlesAssPath = assPath;
|
|
1666
|
-
}
|
|
1667
|
-
|
|
1668
|
-
try {
|
|
1669
|
-
await muxAudioToVideo({
|
|
1670
|
-
video_path: videoPath,
|
|
1671
|
-
audio_segments: audioSegments,
|
|
1672
|
-
events_log: eventsLog,
|
|
1673
|
-
output: muxedPath,
|
|
1674
|
-
});
|
|
1675
|
-
|
|
1676
|
-
let composedPath = muxedPath;
|
|
1677
|
-
if (resolvedOutroPath) {
|
|
1678
|
-
await concatVideos({
|
|
1679
|
-
inputs: [muxedPath, resolvedOutroPath],
|
|
1680
|
-
output: concatPath,
|
|
1681
|
-
});
|
|
1682
|
-
composedPath = concatPath;
|
|
1683
|
-
}
|
|
1684
|
-
|
|
1685
|
-
await transcodeForPlatform({
|
|
1686
|
-
input: composedPath,
|
|
1687
|
-
output: finalPath,
|
|
1688
|
-
target: targetProfile,
|
|
1689
|
-
subtitlesAssPath,
|
|
1690
|
-
});
|
|
1691
|
-
|
|
1692
|
-
const durationMs = await probeDurationMs(finalPath);
|
|
1693
|
-
cleanupLocalFiles(intermediates);
|
|
1694
|
-
return {
|
|
1695
|
-
finalVideoPath: finalPath,
|
|
1696
|
-
durationMs,
|
|
1697
|
-
outroPath: resolvedOutroPath,
|
|
1698
|
-
target: targetProfile,
|
|
1699
|
-
subtitles: subtitleSegments.length > 0,
|
|
1700
|
-
};
|
|
1701
|
-
} catch (error) {
|
|
1702
|
-
cleanupLocalFiles([...intermediates, finalPath]);
|
|
1703
|
-
throw error;
|
|
1704
|
-
}
|
|
1705
|
-
},
|
|
1706
|
-
});
|
|
1707
|
-
|
|
1708
|
-
const outroText = result.outroPath ? result.outroPath : 'skipped';
|
|
1709
|
-
return {
|
|
1710
|
-
content: [{
|
|
1711
|
-
type: 'text',
|
|
1712
|
-
text:
|
|
1713
|
-
`Video composed.\n` +
|
|
1714
|
-
`final_video_path=${result.finalVideoPath}\n` +
|
|
1715
|
-
`duration_ms=${result.durationMs}\n` +
|
|
1716
|
-
`target=${result.target}\n` +
|
|
1717
|
-
`subtitles=${result.subtitles ? 'burned' : 'none'}\n` +
|
|
1718
|
-
`outro=${outroText}`,
|
|
1719
|
-
}],
|
|
1720
|
-
};
|
|
1721
|
-
} catch (error) {
|
|
1722
|
-
return { isError: true, content: [{ type: 'text', text: `Error: ${error.message}` }] };
|
|
1723
|
-
}
|
|
1724
|
-
}
|
|
1725
|
-
);
|
|
1726
|
-
|
|
1727
1459
|
// ── submit_to_library ──────────────────────────────────────────────────────────
|
|
1728
1460
|
server.tool('submit_to_library',
|
|
1729
1461
|
'把已生成的视频成片归档进内容库(content_video_draft entry)。调用前 mp4 必须已经通过 write_workspace_file 落到 workspace 的 artifacts/ 路径。归档后内容库会出现一张新卡片,含视频预览 + 元数据 + 后续支持发布/回采链路。',
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import { mkdirSync, writeFileSync } from 'fs';
|
|
2
|
+
import { randomUUID } from 'crypto';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import os from 'os';
|
|
5
|
+
|
|
6
|
+
function toolText(text) {
|
|
7
|
+
return { content: [{ type: 'text', text }] };
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function toolError(text) {
|
|
11
|
+
return { isError: true, content: [{ type: 'text', text }] };
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function inferAudioExt(url) {
|
|
15
|
+
const clean = String(url ?? '').split('?')[0];
|
|
16
|
+
const ext = path.extname(clean).toLowerCase();
|
|
17
|
+
return ['.mp3', '.wav', '.flac', '.aac', '.ogg'].includes(ext) ? ext : '.mp3';
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
async function synthesizeSegmentTts(text, { workspace_id, voice_id, api }) {
|
|
21
|
+
const payload = { workspace_id, text, speed: 1, format: 'mp3' };
|
|
22
|
+
if (voice_id) payload.voice_preset = String(voice_id).trim();
|
|
23
|
+
|
|
24
|
+
const data = await api('POST', '/tts/voiceover', payload);
|
|
25
|
+
const remoteAudioUrl = String(data.audio_url ?? '').trim();
|
|
26
|
+
if (!remoteAudioUrl) throw new Error('TTS API did not return audio_url');
|
|
27
|
+
|
|
28
|
+
const downloadRes = await fetch(remoteAudioUrl);
|
|
29
|
+
if (!downloadRes.ok) throw new Error(`Failed to download audio (${downloadRes.status})`);
|
|
30
|
+
|
|
31
|
+
const fileBuffer = Buffer.from(await downloadRes.arrayBuffer());
|
|
32
|
+
const outDir = path.join(os.tmpdir(), 'lightcone-tts');
|
|
33
|
+
mkdirSync(outDir, { recursive: true });
|
|
34
|
+
const ext = inferAudioExt(remoteAudioUrl);
|
|
35
|
+
const outPath = path.join(outDir, `tts-${Date.now()}-${randomUUID().slice(0, 8)}${ext}`);
|
|
36
|
+
writeFileSync(outPath, fileBuffer);
|
|
37
|
+
|
|
38
|
+
const durationMs = Number(data.duration_ms ?? 0);
|
|
39
|
+
return { audio_path: outPath, audio_duration_ms: durationMs };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Compute segment duration from audio duration: audio + 0.5s buffer, rounded up to nearest 0.5s.
|
|
43
|
+
function planDurationSec(audioDurationMs, bufferSec = 0.5) {
|
|
44
|
+
const raw = audioDurationMs / 1000 + bufferSec;
|
|
45
|
+
return Math.ceil(raw * 2) / 2; // round up to nearest 0.5s
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export async function runPlanVideoSegmentsTool({ segments, workspace_id, voice_id, currentWorkspaceId, api }) {
|
|
49
|
+
if (!Array.isArray(segments) || segments.length === 0) {
|
|
50
|
+
return toolError('segments must be a non-empty array.');
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const targetWorkspaceId = String(workspace_id ?? currentWorkspaceId ?? '').trim();
|
|
54
|
+
if (!targetWorkspaceId) {
|
|
55
|
+
return toolError('workspace_id is required (no current workspace context).');
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const planned = [];
|
|
59
|
+
const errors = [];
|
|
60
|
+
|
|
61
|
+
for (let i = 0; i < segments.length; i++) {
|
|
62
|
+
const seg = segments[i];
|
|
63
|
+
const text = String(seg.text ?? '').trim();
|
|
64
|
+
const kind = String(seg.visual_kind ?? 'image');
|
|
65
|
+
|
|
66
|
+
let audioResult = null;
|
|
67
|
+
if (text) {
|
|
68
|
+
try {
|
|
69
|
+
audioResult = await synthesizeSegmentTts(text, { workspace_id: targetWorkspaceId, voice_id, api });
|
|
70
|
+
} catch (err) {
|
|
71
|
+
errors.push(`segments[${i}]: TTS failed — ${err.message}`);
|
|
72
|
+
audioResult = { audio_path: null, audio_duration_ms: 3000 }; // fallback estimate
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const audioDurationMs = audioResult?.audio_duration_ms ?? 0;
|
|
77
|
+
let presentation;
|
|
78
|
+
|
|
79
|
+
if (kind === 'carousel') {
|
|
80
|
+
const numCards = Array.isArray(seg.visual_paths) ? seg.visual_paths.length : 1;
|
|
81
|
+
const totalDuration = audioDurationMs > 0 ? planDurationSec(audioDurationMs) : numCards * 4;
|
|
82
|
+
const perCard = Math.max(2, Math.ceil((totalDuration / numCards) * 2) / 2);
|
|
83
|
+
presentation = { per_card_duration: perCard };
|
|
84
|
+
} else {
|
|
85
|
+
// image, scroll, video, gif
|
|
86
|
+
const duration = audioDurationMs > 0 ? planDurationSec(audioDurationMs, kind === 'scroll' ? 1.0 : 0.5) : 4;
|
|
87
|
+
presentation = { duration, ...(kind === 'scroll' ? { style: 'scroll' } : {}) };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const planned_seg = {
|
|
91
|
+
...seg,
|
|
92
|
+
...(audioResult?.audio_path ? { audio_path: audioResult.audio_path } : {}),
|
|
93
|
+
...(text ? { subtitle_text: text } : {}),
|
|
94
|
+
presentation: { ...presentation, ...(seg.presentation ?? {}) },
|
|
95
|
+
};
|
|
96
|
+
if (audioResult?.audio_duration_ms) {
|
|
97
|
+
planned_seg.audio_duration_ms = audioResult.audio_duration_ms;
|
|
98
|
+
}
|
|
99
|
+
planned.push(planned_seg);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const result = {
|
|
103
|
+
segments: planned,
|
|
104
|
+
total_segments: planned.length,
|
|
105
|
+
total_duration_ms: planned.reduce((sum, s) => {
|
|
106
|
+
const d = s.presentation?.per_card_duration
|
|
107
|
+
? s.presentation.per_card_duration * (Array.isArray(s.visual_paths) ? s.visual_paths.length : 1)
|
|
108
|
+
: (s.presentation?.duration ?? 4);
|
|
109
|
+
return sum + Math.round(d * 1000);
|
|
110
|
+
}, 0),
|
|
111
|
+
...(errors.length > 0 ? { warnings: errors } : {}),
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
return toolText(JSON.stringify(result, null, 2));
|
|
115
|
+
}
|
|
@@ -1,440 +0,0 @@
|
|
|
1
|
-
import { spawn } from 'node:child_process';
|
|
2
|
-
import path from 'node:path';
|
|
3
|
-
import { access, mkdir, mkdtemp, rm, stat, writeFile } from 'node:fs/promises';
|
|
4
|
-
import { constants as fsConstants } from 'node:fs';
|
|
5
|
-
import os from 'node:os';
|
|
6
|
-
|
|
7
|
-
const SUBTITLE_FONT = 'WenQuanYi Micro Hei';
|
|
8
|
-
const SUBTITLE_FONT_SIZE = 72;
|
|
9
|
-
const SUBTITLE_MARGIN_V = 80;
|
|
10
|
-
|
|
11
|
-
const MAX_STDERR_LENGTH = 4000;
|
|
12
|
-
|
|
13
|
-
const TRANSCODE_TARGETS = Object.freeze({
|
|
14
|
-
short_video_cn: {
|
|
15
|
-
width: 1080,
|
|
16
|
-
height: 1920,
|
|
17
|
-
fps: 30,
|
|
18
|
-
videoCodec: 'libx264',
|
|
19
|
-
profile: 'baseline',
|
|
20
|
-
pixelFormat: 'yuv420p',
|
|
21
|
-
crf: 23,
|
|
22
|
-
preset: 'veryfast',
|
|
23
|
-
level: '4.0',
|
|
24
|
-
audioCodec: 'aac',
|
|
25
|
-
audioBitrate: '128k',
|
|
26
|
-
audioSampleRate: 48000,
|
|
27
|
-
audioChannels: 2,
|
|
28
|
-
},
|
|
29
|
-
});
|
|
30
|
-
|
|
31
|
-
function normalizeText(value) {
|
|
32
|
-
if (typeof value !== 'string') return '';
|
|
33
|
-
return value.trim();
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
function normalizePath(value, label) {
|
|
37
|
-
const raw = normalizeText(value);
|
|
38
|
-
if (!raw) throw new Error(`${label} required`);
|
|
39
|
-
return path.resolve(raw);
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
async function ensureReadableFile(filePath, label) {
|
|
43
|
-
try {
|
|
44
|
-
await access(filePath, fsConstants.R_OK);
|
|
45
|
-
} catch {
|
|
46
|
-
throw new Error(`${label} not found or unreadable: ${filePath}`);
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
const st = await stat(filePath);
|
|
50
|
-
if (!st.isFile()) throw new Error(`${label} is not a file: ${filePath}`);
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
async function ensureParentDir(filePath) {
|
|
54
|
-
await mkdir(path.dirname(filePath), { recursive: true });
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
function sanitizeStderr(stderr) {
|
|
58
|
-
const text = String(stderr ?? '').trim();
|
|
59
|
-
if (!text) return '';
|
|
60
|
-
if (text.length <= MAX_STDERR_LENGTH) return text;
|
|
61
|
-
return text.slice(text.length - MAX_STDERR_LENGTH);
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
function toolError(prefix, error, stderr = '') {
|
|
65
|
-
const details = [];
|
|
66
|
-
const message = normalizeText(error?.message);
|
|
67
|
-
if (message) details.push(message);
|
|
68
|
-
const cleanedStderr = sanitizeStderr(stderr);
|
|
69
|
-
if (cleanedStderr) details.push(cleanedStderr);
|
|
70
|
-
const suffix = details.length > 0 ? `: ${details.join(' | ')}` : '';
|
|
71
|
-
return new Error(`${prefix}${suffix}`);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
function runProcess(binary, args, { name = binary } = {}) {
|
|
75
|
-
return new Promise((resolve, reject) => {
|
|
76
|
-
const child = spawn(binary, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
77
|
-
let stdout = '';
|
|
78
|
-
let stderr = '';
|
|
79
|
-
|
|
80
|
-
child.stdout.on('data', (chunk) => {
|
|
81
|
-
stdout += chunk.toString();
|
|
82
|
-
});
|
|
83
|
-
child.stderr.on('data', (chunk) => {
|
|
84
|
-
stderr += chunk.toString();
|
|
85
|
-
});
|
|
86
|
-
|
|
87
|
-
child.on('error', (error) => {
|
|
88
|
-
reject(toolError(`${name} failed`, error, stderr));
|
|
89
|
-
});
|
|
90
|
-
|
|
91
|
-
child.on('close', (code) => {
|
|
92
|
-
if (code === 0) {
|
|
93
|
-
resolve({ stdout, stderr });
|
|
94
|
-
return;
|
|
95
|
-
}
|
|
96
|
-
reject(toolError(`${name} exited with code ${code}`, null, stderr));
|
|
97
|
-
});
|
|
98
|
-
});
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
function normalizeStartMs(value) {
|
|
102
|
-
const parsed = Number(value);
|
|
103
|
-
if (!Number.isFinite(parsed) || parsed < 0) return null;
|
|
104
|
-
return Math.floor(parsed);
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
function resolveStartMsFromEvents(segment, eventsLog = []) {
|
|
108
|
-
const phase = normalizeText(segment?.phase ?? segment?.phase_id ?? segment?.phaseId);
|
|
109
|
-
if (!phase) return null;
|
|
110
|
-
|
|
111
|
-
let candidate = null;
|
|
112
|
-
for (const event of eventsLog) {
|
|
113
|
-
const eventPhase = normalizeText(event?.phase ?? event?.phase_id ?? event?.phaseId);
|
|
114
|
-
if (!eventPhase || eventPhase !== phase) continue;
|
|
115
|
-
|
|
116
|
-
const eventStart = normalizeStartMs(
|
|
117
|
-
event?.t_ms_start
|
|
118
|
-
?? event?.tMsStart
|
|
119
|
-
?? event?.t_ms
|
|
120
|
-
?? event?.tMs
|
|
121
|
-
);
|
|
122
|
-
if (eventStart == null) continue;
|
|
123
|
-
if (candidate == null || eventStart < candidate) candidate = eventStart;
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
return candidate;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
function normalizeAudioSegments(audioSegments = [], eventsLog = []) {
|
|
130
|
-
if (!Array.isArray(audioSegments)) {
|
|
131
|
-
throw new Error('audio_segments must be an array');
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
return audioSegments.map((segment, index) => {
|
|
135
|
-
if (!segment || typeof segment !== 'object' || Array.isArray(segment)) {
|
|
136
|
-
throw new Error(`audio_segments[${index}] must be an object`);
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
const audioPath = normalizePath(segment.audio_path ?? segment.audioPath, `audio_segments[${index}].audio_path`);
|
|
140
|
-
const startMs = normalizeStartMs(segment.start_ms ?? segment.startMs)
|
|
141
|
-
?? resolveStartMsFromEvents(segment, eventsLog);
|
|
142
|
-
if (startMs == null) {
|
|
143
|
-
throw new Error(`audio_segments[${index}].start_ms missing (and no matching events_log entry found)`);
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
return {
|
|
147
|
-
audioPath,
|
|
148
|
-
startMs,
|
|
149
|
-
};
|
|
150
|
-
});
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
function defaultOutputPath(inputPath, suffix) {
|
|
154
|
-
const ext = path.extname(inputPath) || '.mp4';
|
|
155
|
-
const base = path.basename(inputPath, ext);
|
|
156
|
-
return path.join(path.dirname(inputPath), `${base}.${suffix}${ext}`);
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
function escapeConcatPath(filePath) {
|
|
160
|
-
return filePath.replace(/'/g, `'\\''`);
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
export async function muxAudioToVideo({
|
|
164
|
-
video_path,
|
|
165
|
-
audio_segments = [],
|
|
166
|
-
events_log = [],
|
|
167
|
-
output = null,
|
|
168
|
-
} = {}) {
|
|
169
|
-
const videoPath = normalizePath(video_path, 'video_path');
|
|
170
|
-
await ensureReadableFile(videoPath, 'video_path');
|
|
171
|
-
|
|
172
|
-
const segments = normalizeAudioSegments(audio_segments, events_log).sort((a, b) => a.startMs - b.startMs);
|
|
173
|
-
for (const segment of segments) {
|
|
174
|
-
await ensureReadableFile(segment.audioPath, `audio segment (${segment.audioPath})`);
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
const outputPath = output
|
|
178
|
-
? normalizePath(output, 'output')
|
|
179
|
-
: defaultOutputPath(videoPath, 'muxed');
|
|
180
|
-
if (outputPath === videoPath) throw new Error('output must not equal video_path');
|
|
181
|
-
await ensureParentDir(outputPath);
|
|
182
|
-
|
|
183
|
-
if (segments.length === 0) {
|
|
184
|
-
await runProcess('ffmpeg', [
|
|
185
|
-
'-y',
|
|
186
|
-
'-i', videoPath,
|
|
187
|
-
'-map', '0:v:0',
|
|
188
|
-
'-c:v', 'copy',
|
|
189
|
-
'-an',
|
|
190
|
-
outputPath,
|
|
191
|
-
], { name: 'ffmpeg mux(no-audio)' });
|
|
192
|
-
return outputPath;
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
const filterChunks = [];
|
|
196
|
-
const mixInputs = [];
|
|
197
|
-
for (let i = 0; i < segments.length; i += 1) {
|
|
198
|
-
const delay = segments[i].startMs;
|
|
199
|
-
const label = `a${i}`;
|
|
200
|
-
filterChunks.push(`[${i + 1}:a]adelay=${delay}|${delay},aresample=async=1:first_pts=0[${label}]`);
|
|
201
|
-
mixInputs.push(`[${label}]`);
|
|
202
|
-
}
|
|
203
|
-
filterChunks.push(`${mixInputs.join('')}amix=inputs=${segments.length}:duration=longest:dropout_transition=0,apad[a]`);
|
|
204
|
-
|
|
205
|
-
const args = [
|
|
206
|
-
'-y',
|
|
207
|
-
'-i', videoPath,
|
|
208
|
-
];
|
|
209
|
-
|
|
210
|
-
for (const segment of segments) {
|
|
211
|
-
args.push('-i', segment.audioPath);
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
args.push(
|
|
215
|
-
'-filter_complex', filterChunks.join(';'),
|
|
216
|
-
'-map', '0:v:0',
|
|
217
|
-
'-map', '[a]',
|
|
218
|
-
'-c:v', 'copy',
|
|
219
|
-
'-c:a', 'aac',
|
|
220
|
-
'-shortest',
|
|
221
|
-
'-movflags', '+faststart',
|
|
222
|
-
outputPath
|
|
223
|
-
);
|
|
224
|
-
|
|
225
|
-
await runProcess('ffmpeg', args, { name: 'ffmpeg mux' });
|
|
226
|
-
return outputPath;
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
export async function concatVideos({
|
|
230
|
-
inputs = [],
|
|
231
|
-
output,
|
|
232
|
-
} = {}) {
|
|
233
|
-
if (!Array.isArray(inputs) || inputs.length === 0) {
|
|
234
|
-
throw new Error('inputs must be a non-empty array');
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
const normalizedInputs = inputs.map((input, index) => normalizePath(input, `inputs[${index}]`));
|
|
238
|
-
for (const inputPath of normalizedInputs) {
|
|
239
|
-
await ensureReadableFile(inputPath, `concat input (${inputPath})`);
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
const outputPath = output
|
|
243
|
-
? normalizePath(output, 'output')
|
|
244
|
-
: defaultOutputPath(normalizedInputs[0], 'concat');
|
|
245
|
-
await ensureParentDir(outputPath);
|
|
246
|
-
|
|
247
|
-
if (normalizedInputs.length === 1) {
|
|
248
|
-
await runProcess('ffmpeg', [
|
|
249
|
-
'-y',
|
|
250
|
-
'-i', normalizedInputs[0],
|
|
251
|
-
'-c', 'copy',
|
|
252
|
-
outputPath,
|
|
253
|
-
], { name: 'ffmpeg concat(single-input)' });
|
|
254
|
-
return outputPath;
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
const tempDir = await mkdtemp(path.join(os.tmpdir(), 'lightcone-concat-'));
|
|
258
|
-
const listPath = path.join(tempDir, 'inputs.txt');
|
|
259
|
-
|
|
260
|
-
try {
|
|
261
|
-
const content = normalizedInputs
|
|
262
|
-
.map(inputPath => `file '${escapeConcatPath(inputPath)}'`)
|
|
263
|
-
.join('\n');
|
|
264
|
-
await writeFile(listPath, `${content}\n`, 'utf8');
|
|
265
|
-
|
|
266
|
-
await runProcess('ffmpeg', [
|
|
267
|
-
'-y',
|
|
268
|
-
'-f', 'concat',
|
|
269
|
-
'-safe', '0',
|
|
270
|
-
'-i', listPath,
|
|
271
|
-
'-c', 'copy',
|
|
272
|
-
'-movflags', '+faststart',
|
|
273
|
-
outputPath,
|
|
274
|
-
], { name: 'ffmpeg concat' });
|
|
275
|
-
} finally {
|
|
276
|
-
await rm(tempDir, { recursive: true, force: true }).catch(() => {});
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
return outputPath;
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
function resolveTranscodeTarget(target) {
|
|
283
|
-
const normalized = normalizeText(target).toLowerCase();
|
|
284
|
-
if (!normalized) return TRANSCODE_TARGETS.short_video_cn;
|
|
285
|
-
|
|
286
|
-
if (normalized === 'short_video_cn' || normalized === 'douyin' || normalized === 'xhs') {
|
|
287
|
-
return TRANSCODE_TARGETS.short_video_cn;
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
throw new Error(`unsupported transcode target: ${target}`);
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
function msToAssTimestamp(ms) {
|
|
294
|
-
const totalCs = Math.round(Math.max(0, ms) / 10);
|
|
295
|
-
const cs = totalCs % 100;
|
|
296
|
-
const totalSec = Math.floor(totalCs / 100);
|
|
297
|
-
const sec = totalSec % 60;
|
|
298
|
-
const totalMin = Math.floor(totalSec / 60);
|
|
299
|
-
const min = totalMin % 60;
|
|
300
|
-
const hr = Math.floor(totalMin / 60);
|
|
301
|
-
return `${hr}:${String(min).padStart(2, '0')}:${String(sec).padStart(2, '0')}.${String(cs).padStart(2, '0')}`;
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
// Hard-wrap CJK subtitle text so it never overflows the video frame.
|
|
305
|
-
// libass WrapStyle:0 doesn't handle Chinese text reliably (no word boundaries),
|
|
306
|
-
// so we insert explicit \N breaks every maxChars characters.
|
|
307
|
-
function wrapSubtitleText(text, maxChars = 14) {
|
|
308
|
-
const chars = Array.from(String(text ?? ''));
|
|
309
|
-
if (chars.length <= maxChars) return chars.join('');
|
|
310
|
-
const lines = [];
|
|
311
|
-
for (let i = 0; i < chars.length; i += maxChars) {
|
|
312
|
-
lines.push(chars.slice(i, i + maxChars).join(''));
|
|
313
|
-
}
|
|
314
|
-
return lines.join('\\N');
|
|
315
|
-
}
|
|
316
|
-
|
|
317
|
-
export function buildAssContent(subtitles = [], { playResX = 1080, playResY = 1920 } = {}) {
|
|
318
|
-
// Max chars per line: (playResX - marginL - marginR) / fontSizePx
|
|
319
|
-
// 1080 - 30 - 30 = 1020px, fontsize 72 ≈ 72px/char → 14 chars
|
|
320
|
-
const maxCharsPerLine = Math.floor((playResX - 60) / SUBTITLE_FONT_SIZE);
|
|
321
|
-
|
|
322
|
-
const header = [
|
|
323
|
-
'[Script Info]',
|
|
324
|
-
'ScriptType: v4.00+',
|
|
325
|
-
`PlayResX: ${playResX}`,
|
|
326
|
-
`PlayResY: ${playResY}`,
|
|
327
|
-
'WrapStyle: 2',
|
|
328
|
-
'',
|
|
329
|
-
'[V4+ Styles]',
|
|
330
|
-
'Format: Name, Fontname, Fontsize, PrimaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding',
|
|
331
|
-
`Style: Default,${SUBTITLE_FONT},${SUBTITLE_FONT_SIZE},&H00FFFFFF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,4,0,2,30,30,${SUBTITLE_MARGIN_V},1`,
|
|
332
|
-
'',
|
|
333
|
-
'[Events]',
|
|
334
|
-
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
|
|
335
|
-
].join('\n');
|
|
336
|
-
|
|
337
|
-
const events = subtitles.map(({ text, start_ms, end_ms }) => {
|
|
338
|
-
const wrapped = wrapSubtitleText(text, maxCharsPerLine);
|
|
339
|
-
const safeText = wrapped.replace(/\r?\n/g, '\\N').replace(/,/g, '{\\,}');
|
|
340
|
-
return `Dialogue: 0,${msToAssTimestamp(start_ms)},${msToAssTimestamp(end_ms)},Default,,0,0,0,,${safeText}`;
|
|
341
|
-
});
|
|
342
|
-
|
|
343
|
-
return `${header}\n${events.join('\n')}\n`;
|
|
344
|
-
}
|
|
345
|
-
|
|
346
|
-
export async function transcodeForPlatform({
|
|
347
|
-
input,
|
|
348
|
-
output,
|
|
349
|
-
target = 'short_video_cn',
|
|
350
|
-
subtitlesAssPath = null,
|
|
351
|
-
} = {}) {
|
|
352
|
-
const inputPath = normalizePath(input, 'input');
|
|
353
|
-
await ensureReadableFile(inputPath, 'input');
|
|
354
|
-
|
|
355
|
-
const outputPath = output
|
|
356
|
-
? normalizePath(output, 'output')
|
|
357
|
-
: defaultOutputPath(inputPath, 'platform');
|
|
358
|
-
if (outputPath === inputPath) throw new Error('output must not equal input');
|
|
359
|
-
await ensureParentDir(outputPath);
|
|
360
|
-
|
|
361
|
-
const preset = resolveTranscodeTarget(target);
|
|
362
|
-
const vfParts = [
|
|
363
|
-
`scale=${preset.width}:${preset.height}:force_original_aspect_ratio=decrease`,
|
|
364
|
-
`pad=${preset.width}:${preset.height}:(ow-iw)/2:(oh-ih)/2:black`,
|
|
365
|
-
'setsar=1',
|
|
366
|
-
];
|
|
367
|
-
if (subtitlesAssPath) {
|
|
368
|
-
const escapedPath = subtitlesAssPath.replace(/\\/g, '/').replace(/:/g, '\\:').replace(/'/g, "\\'");
|
|
369
|
-
vfParts.push(`subtitles='${escapedPath}'`);
|
|
370
|
-
}
|
|
371
|
-
const vf = vfParts.join(',');
|
|
372
|
-
|
|
373
|
-
await runProcess('ffmpeg', [
|
|
374
|
-
'-y',
|
|
375
|
-
'-i', inputPath,
|
|
376
|
-
'-vf', vf,
|
|
377
|
-
'-r', String(preset.fps),
|
|
378
|
-
'-c:v', preset.videoCodec,
|
|
379
|
-
'-profile:v', preset.profile,
|
|
380
|
-
'-level', preset.level,
|
|
381
|
-
'-pix_fmt', preset.pixelFormat,
|
|
382
|
-
'-preset', preset.preset,
|
|
383
|
-
'-crf', String(preset.crf),
|
|
384
|
-
'-c:a', preset.audioCodec,
|
|
385
|
-
'-b:a', preset.audioBitrate,
|
|
386
|
-
'-ar', String(preset.audioSampleRate),
|
|
387
|
-
'-ac', String(preset.audioChannels),
|
|
388
|
-
'-movflags', '+faststart',
|
|
389
|
-
outputPath,
|
|
390
|
-
], { name: 'ffmpeg transcode' });
|
|
391
|
-
|
|
392
|
-
return outputPath;
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
export async function probeDurationMs(inputPath) {
|
|
396
|
-
const resolved = normalizePath(inputPath, 'input');
|
|
397
|
-
await ensureReadableFile(resolved, 'input');
|
|
398
|
-
|
|
399
|
-
const { stdout } = await runProcess('ffprobe', [
|
|
400
|
-
'-v', 'error',
|
|
401
|
-
'-show_entries', 'format=duration',
|
|
402
|
-
'-of', 'default=noprint_wrappers=1:nokey=1',
|
|
403
|
-
resolved,
|
|
404
|
-
], { name: 'ffprobe duration' });
|
|
405
|
-
|
|
406
|
-
const seconds = Number.parseFloat(String(stdout ?? '').trim());
|
|
407
|
-
if (!Number.isFinite(seconds) || seconds <= 0) {
|
|
408
|
-
throw new Error(`ffprobe returned invalid duration for ${resolved}`);
|
|
409
|
-
}
|
|
410
|
-
return Math.floor(seconds * 1000);
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
export async function readMediaSpec(inputPath) {
|
|
414
|
-
const resolved = normalizePath(inputPath, 'input');
|
|
415
|
-
await ensureReadableFile(resolved, 'input');
|
|
416
|
-
|
|
417
|
-
const { stdout } = await runProcess('ffprobe', [
|
|
418
|
-
'-v', 'error',
|
|
419
|
-
'-select_streams', 'v:0',
|
|
420
|
-
'-show_entries', 'stream=width,height,r_frame_rate,pix_fmt,codec_name',
|
|
421
|
-
'-of', 'json',
|
|
422
|
-
resolved,
|
|
423
|
-
], { name: 'ffprobe spec' });
|
|
424
|
-
|
|
425
|
-
let parsed;
|
|
426
|
-
try {
|
|
427
|
-
parsed = JSON.parse(stdout);
|
|
428
|
-
} catch {
|
|
429
|
-
throw new Error(`Failed to parse ffprobe spec output for ${resolved}`);
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
const stream = parsed?.streams?.[0] ?? {};
|
|
433
|
-
return {
|
|
434
|
-
width: Number(stream.width) || null,
|
|
435
|
-
height: Number(stream.height) || null,
|
|
436
|
-
frame_rate: String(stream.r_frame_rate ?? ''),
|
|
437
|
-
pixel_format: String(stream.pix_fmt ?? ''),
|
|
438
|
-
video_codec: String(stream.codec_name ?? ''),
|
|
439
|
-
};
|
|
440
|
-
}
|