@lightcone-ai/daemon 0.18.0 → 0.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -278,9 +278,10 @@ server.tool(
|
|
|
278
278
|
+ 'When any segment has audio_path, MUST be preceded by plan_video_segments in the same session '
|
|
279
279
|
+ '(plan_video_segments fills duration/subtitle_text/audio_path mechanically; manual alignment is rejected). '
|
|
280
280
|
+ 'Returns a local mp4 path + size_bytes.\n\n'
|
|
281
|
-
+ 'Dual-version delivery (subtitled + clean):
|
|
282
|
-
+ '
|
|
283
|
-
+ '
|
|
281
|
+
+ 'Dual / multi-version delivery (e.g. subtitled+voiced + clean silent): pass the variants[] array — one call '
|
|
282
|
+
+ 'runs the heavy per-segment ffmpeg work ONCE and only diverges at audio mux + concat + subtitle burn per '
|
|
283
|
+
+ 'variant. That is ~1.2-1.4× single-version time vs ~2× when calling this tool twice. Each variant chooses '
|
|
284
|
+
+ 'its own burn_subtitles and include_audio independently.',
|
|
284
285
|
{
|
|
285
286
|
segments: z.array(z.object({
|
|
286
287
|
visual_path: z.string().optional().describe('Absolute path to a single image / video / gif.'),
|
|
@@ -297,10 +298,18 @@ server.tool(
|
|
|
297
298
|
})).describe('Ordered list of video segments.'),
|
|
298
299
|
outro_paths: z.array(z.string()).optional().describe('Absolute paths to outro video clips appended at end.'),
|
|
299
300
|
resolution: z.string().optional().describe('Output resolution WxH. Default "1080x1920".'),
|
|
300
|
-
output_path: z.string().optional().describe('Absolute output path. Auto-generated if omitted.'),
|
|
301
|
-
burn_subtitles: z.boolean().optional().describe('
|
|
302
|
-
+ '
|
|
303
|
-
|
|
301
|
+
output_path: z.string().optional().describe('Absolute output path (single-output mode). Auto-generated if omitted. Ignored when variants[] is provided.'),
|
|
302
|
+
burn_subtitles: z.boolean().optional().describe('Single-output mode only: whether to burn subtitle_text. Default true. '
|
|
303
|
+
+ 'For producing multiple variants in one call, use variants[] instead.'),
|
|
304
|
+
variants: z.array(z.object({
|
|
305
|
+
output_path: z.string().describe('Absolute output path for this variant. Each variant must use a unique path.'),
|
|
306
|
+
burn_subtitles: z.boolean().optional().describe('Whether to burn subtitle_text into THIS variant. Default true.'),
|
|
307
|
+
include_audio: z.boolean().optional().describe('Whether to mux segment.audio_path into THIS variant. Default true. '
|
|
308
|
+
+ 'Pass false for a fully silent copy (skips audio mux entirely; segment.audio_path is ignored for this variant).'),
|
|
309
|
+
})).optional().describe('Multi-output mode: one call produces all variants. '
|
|
310
|
+
+ 'Visual segment processing (the heavy work) runs once; each variant only repeats audio mux + concat + optional subtitle burn. '
|
|
311
|
+
+ 'Typical use: [{output_path:"with-sub.mp4"}, {output_path:"clean.mp4", burn_subtitles:false, include_audio:false}] '
|
|
312
|
+
+ 'to deliver a subtitled+voiced version and a silent clean version together.'),
|
|
304
313
|
},
|
|
305
314
|
async (args) => {
|
|
306
315
|
const segments = Array.isArray(args?.segments) ? args.segments : [];
|
package/package.json
CHANGED
|
@@ -249,17 +249,64 @@ async function applyFadeTransition({ clipA, clipB, tmpDir, style = 'fade' }) {
|
|
|
249
249
|
return outPath;
|
|
250
250
|
}
|
|
251
251
|
|
|
252
|
+
// compose_video_v2 supports two modes:
|
|
253
|
+
//
|
|
254
|
+
// 1. Legacy single-output: pass output_path (+ optional burn_subtitles).
|
|
255
|
+
// Returns { path, duration_ms, size_bytes, variants: [..1 entry..] }.
|
|
256
|
+
//
|
|
257
|
+
// 2. Multi-variant: pass variants=[{output_path, burn_subtitles?, include_audio?}, ...].
|
|
258
|
+
// Visual segment processing runs ONCE (the heavy part — per-segment ffmpeg
|
|
259
|
+
// transcode/scale/scroll). Each variant then diverges only at audio mux +
|
|
260
|
+
// concat + subtitle burn — typically a few seconds per extra variant.
|
|
261
|
+
// Returns { variants: [{path, duration_ms, size_bytes, burn_subtitles,
|
|
262
|
+
// include_audio}, ...] }.
|
|
263
|
+
//
|
|
264
|
+
// Use the multi-variant mode when shipping the same content with different
|
|
265
|
+
// subtitle/audio combinations (e.g. subtitled+voiced + clean silent). Calling
|
|
266
|
+
// the legacy mode twice produces correct outputs but redoes per-segment work.
|
|
252
267
|
export async function composeVideoV2({
|
|
253
268
|
segments = [],
|
|
254
269
|
outro_paths = [],
|
|
255
270
|
resolution = '1080x1920',
|
|
256
271
|
output_path,
|
|
257
272
|
burn_subtitles = true,
|
|
273
|
+
variants,
|
|
258
274
|
}) {
|
|
259
275
|
if (!Array.isArray(segments) || segments.length === 0) {
|
|
260
276
|
throw new Error('segments must be a non-empty array');
|
|
261
277
|
}
|
|
262
278
|
|
|
279
|
+
// Normalize variants. If caller did not pass an explicit variants array,
|
|
280
|
+
// synthesize a single variant from the legacy output_path + burn_subtitles.
|
|
281
|
+
// include_audio defaults to true (auto-include any segment.audio_path).
|
|
282
|
+
const normalizedVariants = (Array.isArray(variants) && variants.length > 0)
|
|
283
|
+
? variants.map((v, idx) => {
|
|
284
|
+
if (!v || typeof v !== 'object') {
|
|
285
|
+
throw new Error(`variants[${idx}]: must be an object`);
|
|
286
|
+
}
|
|
287
|
+
const outPath = String(v.output_path ?? '').trim();
|
|
288
|
+
if (!outPath) throw new Error(`variants[${idx}]: output_path is required`);
|
|
289
|
+
return {
|
|
290
|
+
output_path: outPath,
|
|
291
|
+
burn_subtitles: v.burn_subtitles !== false,
|
|
292
|
+
include_audio: v.include_audio !== false,
|
|
293
|
+
};
|
|
294
|
+
})
|
|
295
|
+
: [{
|
|
296
|
+
output_path: output_path ?? path.join(os.tmpdir(), `lightcone-video-${Date.now()}.mp4`),
|
|
297
|
+
burn_subtitles: burn_subtitles !== false,
|
|
298
|
+
include_audio: true,
|
|
299
|
+
}];
|
|
300
|
+
|
|
301
|
+
// Disallow two variants writing to the same file — would race on disk.
|
|
302
|
+
const seenOutputs = new Set();
|
|
303
|
+
for (const v of normalizedVariants) {
|
|
304
|
+
if (seenOutputs.has(v.output_path)) {
|
|
305
|
+
throw new Error(`variants share output_path "${v.output_path}" — each variant needs a unique destination`);
|
|
306
|
+
}
|
|
307
|
+
seenOutputs.add(v.output_path);
|
|
308
|
+
}
|
|
309
|
+
|
|
263
310
|
const [widthStr, heightStr] = String(resolution).split('x');
|
|
264
311
|
const width = parseInt(widthStr, 10) || DEFAULT_WIDTH;
|
|
265
312
|
const height = parseInt(heightStr, 10) || DEFAULT_HEIGHT;
|
|
@@ -268,23 +315,19 @@ export async function composeVideoV2({
|
|
|
268
315
|
const tmpDir = path.join(os.tmpdir(), `compose-v2-${randomUUID().slice(0, 8)}`);
|
|
269
316
|
await mkdir(tmpDir, { recursive: true });
|
|
270
317
|
|
|
271
|
-
const outPath = output_path ?? path.join(os.tmpdir(), `lightcone-video-${Date.now()}.mp4`);
|
|
272
|
-
await mkdir(path.dirname(outPath), { recursive: true });
|
|
273
|
-
|
|
274
318
|
try {
|
|
275
|
-
|
|
276
|
-
|
|
319
|
+
// ── Shared phase: generate visual clips per segment ONCE ──────────────────
|
|
320
|
+
// This is the heavy work (image scaling, scroll rendering, video resize +
|
|
321
|
+
// re-encode). Reused across every variant.
|
|
322
|
+
const visualClips = [];
|
|
277
323
|
for (let i = 0; i < segments.length; i++) {
|
|
278
324
|
const seg = segments[i];
|
|
279
325
|
const kind = String(seg.visual_kind ?? 'image');
|
|
280
326
|
const presentation = seg.presentation ?? {};
|
|
281
327
|
const style = String(presentation.style ?? 'static');
|
|
282
328
|
const duration = Number(presentation.duration ?? presentation.per_card_duration ?? 4);
|
|
283
|
-
const audioPath = seg.audio_path ?? null;
|
|
284
|
-
const transition = String(seg.transition ?? 'cut');
|
|
285
329
|
|
|
286
330
|
let visualClip;
|
|
287
|
-
|
|
288
331
|
if (kind === 'image') {
|
|
289
332
|
const imgPath = String(seg.visual_path ?? '');
|
|
290
333
|
if (!imgPath) throw new Error(`segments[${i}]: visual_path required for kind=image`);
|
|
@@ -312,119 +355,145 @@ export async function composeVideoV2({
|
|
|
312
355
|
} else {
|
|
313
356
|
throw new Error(`segments[${i}]: unknown visual_kind "${kind}"`);
|
|
314
357
|
}
|
|
315
|
-
|
|
316
|
-
let finalClip;
|
|
317
|
-
if (audioPath && await fileExists(audioPath)) {
|
|
318
|
-
finalClip = await muxAudio({ videoPath: visualClip.path, audioPath, duration: visualClip.duration, tmpDir });
|
|
319
|
-
} else {
|
|
320
|
-
finalClip = await silentClip({ videoPath: visualClip.path, duration: visualClip.duration, tmpDir });
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
// Accept `text` as an alias for `subtitle_text`: plan_video_segments takes
|
|
324
|
-
// segment narration as `text` on input, compose_video_v2's canonical name is
|
|
325
|
-
// `subtitle_text`. Either reaches the burn pass so subtitles aren't silently dropped.
|
|
326
|
-
// burn_subtitles=false (dual-version delivery: same segments composed once
|
|
327
|
-
// with subtitles and once without) drops the text here so the burn-in pass
|
|
328
|
-
// skips entirely — saves the second compose having to mutate the segment array.
|
|
329
|
-
const subtitleText = burn_subtitles
|
|
330
|
-
? (
|
|
331
|
-
typeof seg.subtitle_text === 'string' ? seg.subtitle_text
|
|
332
|
-
: typeof seg.text === 'string' ? seg.text
|
|
333
|
-
: ''
|
|
334
|
-
).trim()
|
|
335
|
-
: '';
|
|
336
|
-
readyClips.push({ path: finalClip, duration: visualClip.duration, transition, subtitleText });
|
|
358
|
+
visualClips.push(visualClip);
|
|
337
359
|
}
|
|
338
360
|
|
|
339
|
-
|
|
361
|
+
// Outros are also shared — they don't depend on subtitle/audio choices.
|
|
362
|
+
const outroClipPaths = [];
|
|
340
363
|
for (const outroPath of (outro_paths ?? [])) {
|
|
341
364
|
if (outroPath && await fileExists(outroPath)) {
|
|
342
365
|
const c = await videoToClip({ videoPath: outroPath, tmpDir, width, height, fps });
|
|
343
|
-
|
|
366
|
+
outroClipPaths.push(c.path);
|
|
344
367
|
}
|
|
345
368
|
}
|
|
346
369
|
|
|
347
|
-
//
|
|
348
|
-
//
|
|
349
|
-
//
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
370
|
+
// ── Per-variant phase ─────────────────────────────────────────────────────
|
|
371
|
+
// For each variant: mux audio (or silent), build subtitle text, concat with
|
|
372
|
+
// transitions, optionally burn subtitles. Writes to variant.output_path.
|
|
373
|
+
const variantOutputs = [];
|
|
374
|
+
for (let vi = 0; vi < normalizedVariants.length; vi++) {
|
|
375
|
+
const variant = normalizedVariants[vi];
|
|
376
|
+
await mkdir(path.dirname(variant.output_path), { recursive: true });
|
|
377
|
+
|
|
378
|
+
const variantTag = `v${vi}`;
|
|
379
|
+
const readyClips = [];
|
|
380
|
+
|
|
381
|
+
for (let i = 0; i < segments.length; i++) {
|
|
382
|
+
const seg = segments[i];
|
|
383
|
+
const visualClip = visualClips[i];
|
|
384
|
+
const transition = String(seg.transition ?? 'cut');
|
|
385
|
+
const audioPath = variant.include_audio ? (seg.audio_path ?? null) : null;
|
|
386
|
+
|
|
387
|
+
let finalClip;
|
|
388
|
+
if (audioPath && await fileExists(audioPath)) {
|
|
389
|
+
finalClip = await muxAudio({ videoPath: visualClip.path, audioPath, duration: visualClip.duration, tmpDir });
|
|
390
|
+
} else {
|
|
391
|
+
finalClip = await silentClip({ videoPath: visualClip.path, duration: visualClip.duration, tmpDir });
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// Accept `text` as an alias for `subtitle_text`: plan_video_segments takes
|
|
395
|
+
// segment narration as `text` on input, compose_video_v2's canonical name is
|
|
396
|
+
// `subtitle_text`. burn_subtitles=false drops the text here so the burn-in
|
|
397
|
+
// pass skips entirely.
|
|
398
|
+
const subtitleText = variant.burn_subtitles
|
|
399
|
+
? (
|
|
400
|
+
typeof seg.subtitle_text === 'string' ? seg.subtitle_text
|
|
401
|
+
: typeof seg.text === 'string' ? seg.text
|
|
402
|
+
: ''
|
|
403
|
+
).trim()
|
|
404
|
+
: '';
|
|
405
|
+
readyClips.push({ path: finalClip, duration: visualClip.duration, transition, subtitleText });
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// Subtitle entries: cumulative timeline timestamps. Same logic as before,
|
|
409
|
+
// scoped per variant (subtitleText is already gated by variant.burn_subtitles).
|
|
410
|
+
let cursorMs = 0;
|
|
411
|
+
const subtitleEntries = [];
|
|
412
|
+
for (const clip of readyClips) {
|
|
413
|
+
if (clip.subtitleText) {
|
|
414
|
+
const clipMs = Math.round(clip.duration * 1000);
|
|
415
|
+
const sentences = splitSubtitleSentences(clip.subtitleText);
|
|
416
|
+
const totalLen = sentences.reduce((sum, s) => sum + Array.from(s).length, 0) || 1;
|
|
417
|
+
let offsetMs = 0;
|
|
418
|
+
sentences.forEach((sentence, idx) => {
|
|
419
|
+
const share = Array.from(sentence).length / totalLen;
|
|
420
|
+
const isLast = idx === sentences.length - 1;
|
|
421
|
+
const spanMs = isLast ? clipMs - offsetMs : Math.max(1, Math.round(clipMs * share));
|
|
422
|
+
subtitleEntries.push({
|
|
423
|
+
text: sentence,
|
|
424
|
+
start_ms: cursorMs + offsetMs,
|
|
425
|
+
end_ms: cursorMs + offsetMs + spanMs,
|
|
426
|
+
});
|
|
427
|
+
offsetMs += spanMs;
|
|
368
428
|
});
|
|
369
|
-
|
|
370
|
-
|
|
429
|
+
}
|
|
430
|
+
cursorMs += Math.round(clip.duration * 1000);
|
|
371
431
|
}
|
|
372
|
-
cursorMs += Math.round(clip.duration * 1000);
|
|
373
|
-
}
|
|
374
432
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
433
|
+
const allClips = [];
|
|
434
|
+
let accumulated = readyClips[0].path;
|
|
435
|
+
for (let i = 1; i < readyClips.length; i++) {
|
|
436
|
+
const { path: nextClip, transition } = readyClips[i];
|
|
437
|
+
if (transition === 'fade' || transition === 'crossfade') {
|
|
438
|
+
accumulated = await applyFadeTransition({ clipA: accumulated, clipB: nextClip, tmpDir, style: transition });
|
|
439
|
+
} else {
|
|
440
|
+
allClips.push(accumulated);
|
|
441
|
+
accumulated = nextClip;
|
|
442
|
+
}
|
|
384
443
|
}
|
|
385
|
-
|
|
386
|
-
allClips.push(accumulated);
|
|
444
|
+
allClips.push(accumulated);
|
|
387
445
|
|
|
388
|
-
|
|
446
|
+
const finalSequence = [...allClips, ...outroClipPaths];
|
|
389
447
|
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
448
|
+
// Compose without subtitles first (subtitles are burned in a separate pass)
|
|
449
|
+
const preSubPath = subtitleEntries.length > 0
|
|
450
|
+
? path.join(tmpDir, `${variantTag}-pre-sub-${randomUUID().slice(0, 8)}.mp4`)
|
|
451
|
+
: variant.output_path;
|
|
394
452
|
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
453
|
+
if (finalSequence.length === 1) {
|
|
454
|
+
await runFfmpeg(['-i', finalSequence[0], '-c', 'copy', '-movflags', '+faststart', preSubPath], `ffmpeg copy ${variantTag}`);
|
|
455
|
+
} else {
|
|
456
|
+
await concatWithCuts({ clips: finalSequence, outputPath: preSubPath });
|
|
457
|
+
}
|
|
400
458
|
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
}
|
|
459
|
+
if (subtitleEntries.length > 0) {
|
|
460
|
+
const assPath = path.join(tmpDir, `${variantTag}-subs-${randomUUID().slice(0, 8)}.ass`);
|
|
461
|
+
await writeFile(assPath, buildAssContent(subtitleEntries, { playResX: width, playResY: height }));
|
|
462
|
+
const escapedAssPath = assPath.replace(/\\/g, '/').replace(/:/g, '\\:').replace(/'/g, "\\'");
|
|
463
|
+
await runFfmpeg([
|
|
464
|
+
'-i', preSubPath,
|
|
465
|
+
'-vf', `subtitles='${escapedAssPath}'`,
|
|
466
|
+
'-c:a', 'copy',
|
|
467
|
+
'-movflags', '+faststart',
|
|
468
|
+
variant.output_path,
|
|
469
|
+
], `ffmpeg burn-subtitles ${variantTag}`);
|
|
470
|
+
}
|
|
414
471
|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
472
|
+
const totalDuration = await probeDurationSec(variant.output_path);
|
|
473
|
+
const finalStat = await statAsync(variant.output_path);
|
|
474
|
+
const sizeBytes = Number(finalStat.size ?? 0);
|
|
475
|
+
if (!Number.isFinite(sizeBytes) || sizeBytes < 1024) {
|
|
476
|
+
throw new Error(`compose_video_v2 produced an invalid output: ${variant.output_path} size=${sizeBytes} bytes (variant ${vi})`);
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
variantOutputs.push({
|
|
480
|
+
path: variant.output_path,
|
|
481
|
+
duration_ms: Math.round(totalDuration * 1000),
|
|
482
|
+
size_bytes: sizeBytes,
|
|
483
|
+
burn_subtitles: variant.burn_subtitles,
|
|
484
|
+
include_audio: variant.include_audio,
|
|
485
|
+
});
|
|
426
486
|
}
|
|
427
|
-
|
|
487
|
+
|
|
488
|
+
// Legacy single-output callers (didn't pass variants) get the same flat
|
|
489
|
+
// shape they used to get, plus the variants array for forward-compat.
|
|
490
|
+
const first = variantOutputs[0];
|
|
491
|
+
return {
|
|
492
|
+
path: first.path,
|
|
493
|
+
duration_ms: first.duration_ms,
|
|
494
|
+
size_bytes: first.size_bytes,
|
|
495
|
+
variants: variantOutputs,
|
|
496
|
+
};
|
|
428
497
|
} finally {
|
|
429
498
|
await rm(tmpDir, { recursive: true, force: true });
|
|
430
499
|
}
|
|
@@ -22,8 +22,16 @@ function statSizeOrNull(p) {
|
|
|
22
22
|
try { return fs.statSync(p).size; } catch { return null; }
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
-
export async function runComposeVideoV2Tool({
|
|
26
|
-
|
|
25
|
+
export async function runComposeVideoV2Tool({
|
|
26
|
+
segments,
|
|
27
|
+
outro_paths,
|
|
28
|
+
format,
|
|
29
|
+
resolution,
|
|
30
|
+
output_path,
|
|
31
|
+
burn_subtitles,
|
|
32
|
+
variants,
|
|
33
|
+
workspaceDir,
|
|
34
|
+
}) {
|
|
27
35
|
if (!Array.isArray(segments) || segments.length === 0) {
|
|
28
36
|
return toolError('segments must be a non-empty array.');
|
|
29
37
|
}
|
|
@@ -60,6 +68,37 @@ export async function runComposeVideoV2Tool({ segments, outro_paths, format, res
|
|
|
60
68
|
);
|
|
61
69
|
}
|
|
62
70
|
}
|
|
71
|
+
|
|
72
|
+
// Normalize variants. If caller passed a variants[] array, that takes
|
|
73
|
+
// priority — multi-output mode. Otherwise build a single-element variants
|
|
74
|
+
// array from the legacy output_path + burn_subtitles params.
|
|
75
|
+
const outDir = workspaceDir
|
|
76
|
+
? path.join(workspaceDir, 'artifacts', 'video')
|
|
77
|
+
: path.join(os.tmpdir(), 'lightcone-video');
|
|
78
|
+
|
|
79
|
+
let normalizedVariants;
|
|
80
|
+
if (Array.isArray(variants) && variants.length > 0) {
|
|
81
|
+
normalizedVariants = variants.map((v, idx) => {
|
|
82
|
+
if (!v || typeof v !== 'object') {
|
|
83
|
+
return null; // surfaced below
|
|
84
|
+
}
|
|
85
|
+
const outPath = String(v.output_path ?? '').trim()
|
|
86
|
+
|| path.join(outDir, `composed-${Date.now()}-${idx}-${randomUUID().slice(0, 8)}.mp4`);
|
|
87
|
+
return {
|
|
88
|
+
output_path: outPath,
|
|
89
|
+
burn_subtitles: v.burn_subtitles !== false,
|
|
90
|
+
include_audio: v.include_audio !== false,
|
|
91
|
+
};
|
|
92
|
+
});
|
|
93
|
+
if (normalizedVariants.some(v => v === null)) {
|
|
94
|
+
return toolError('variants must be an array of objects, each with { output_path, burn_subtitles?, include_audio? }.');
|
|
95
|
+
}
|
|
96
|
+
} else {
|
|
97
|
+
const burnSubtitles = burn_subtitles !== false;
|
|
98
|
+
const outPath = output_path ?? path.join(outDir, `composed-${Date.now()}-${randomUUID().slice(0, 8)}.mp4`);
|
|
99
|
+
normalizedVariants = [{ output_path: outPath, burn_subtitles: burnSubtitles, include_audio: true }];
|
|
100
|
+
}
|
|
101
|
+
|
|
63
102
|
const warnings = [];
|
|
64
103
|
// Heuristic warning: a multi-segment image video that reuses one single image
|
|
65
104
|
// will look near-static — usually a sign the source page didn't render and the
|
|
@@ -70,48 +109,59 @@ export async function runComposeVideoV2Tool({ segments, outro_paths, format, res
|
|
|
70
109
|
+ 'The output will be near-static — verify the source page actually rendered before submitting this video.'
|
|
71
110
|
);
|
|
72
111
|
}
|
|
73
|
-
// Warn when narration is present but no subtitle text is —
|
|
74
|
-
//
|
|
75
|
-
//
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
if (burnSubtitles) {
|
|
112
|
+
// Warn when narration is present but no subtitle text is — only meaningful
|
|
113
|
+
// for variants that ARE supposed to burn subtitles. Variants that explicitly
|
|
114
|
+
// ask for burn_subtitles=false are the "clean" path and shouldn't trigger it.
|
|
115
|
+
const variantsWithBurn = normalizedVariants.filter(v => v.burn_subtitles && v.include_audio);
|
|
116
|
+
if (variantsWithBurn.length > 0) {
|
|
79
117
|
const hasSubText = s => (typeof s?.subtitle_text === 'string' && s.subtitle_text.trim())
|
|
80
118
|
|| (typeof s?.text === 'string' && s.text.trim());
|
|
81
119
|
const narratedNoSub = segments.filter(s =>
|
|
82
120
|
(typeof s?.audio_path === 'string' && s.audio_path.trim()) && !hasSubText(s)).length;
|
|
83
121
|
if (narratedNoSub > 0) {
|
|
84
122
|
warnings.push(
|
|
85
|
-
`WARNING: ${narratedNoSub} segment(s) have narration audio but no subtitle text —
|
|
123
|
+
`WARNING: ${narratedNoSub} segment(s) have narration audio but no subtitle text — `
|
|
124
|
+
+ `the subtitled variant${variantsWithBurn.length > 1 ? 's' : ''} will have NO subtitles. `
|
|
86
125
|
+ 'If subtitles are wanted, set subtitle_text per segment (or pass the plan_video_segments output array verbatim).'
|
|
87
126
|
);
|
|
88
127
|
}
|
|
89
128
|
}
|
|
90
129
|
|
|
91
|
-
const outDir = workspaceDir
|
|
92
|
-
? path.join(workspaceDir, 'artifacts', 'video')
|
|
93
|
-
: path.join(os.tmpdir(), 'lightcone-video');
|
|
94
|
-
|
|
95
|
-
const outPath = output_path ?? path.join(outDir, `composed-${Date.now()}-${randomUUID().slice(0, 8)}.mp4`);
|
|
96
|
-
|
|
97
130
|
try {
|
|
98
131
|
const result = await composeVideoV2({
|
|
99
132
|
segments,
|
|
100
133
|
outro_paths: outro_paths ?? [],
|
|
101
134
|
resolution: resolution ?? '1080x1920',
|
|
102
|
-
|
|
103
|
-
burn_subtitles: burnSubtitles,
|
|
135
|
+
variants: normalizedVariants,
|
|
104
136
|
});
|
|
105
137
|
|
|
106
|
-
const
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
138
|
+
const outputs = Array.isArray(result?.variants) && result.variants.length > 0
|
|
139
|
+
? result.variants
|
|
140
|
+
: [{ path: result.path, duration_ms: result.duration_ms, size_bytes: result.size_bytes,
|
|
141
|
+
burn_subtitles: normalizedVariants[0].burn_subtitles,
|
|
142
|
+
include_audio: normalizedVariants[0].include_audio }];
|
|
143
|
+
|
|
144
|
+
const lines = ['compose_video_v2 completed.'];
|
|
145
|
+
if (outputs.length === 1) {
|
|
146
|
+
const v = outputs[0];
|
|
147
|
+
lines.push(`path=${v.path}`);
|
|
148
|
+
lines.push(`duration_ms=${v.duration_ms}`);
|
|
149
|
+
lines.push(`size_bytes=${v.size_bytes ?? 'unknown'}`);
|
|
150
|
+
lines.push(`burn_subtitles=${v.burn_subtitles}`);
|
|
151
|
+
lines.push(`include_audio=${v.include_audio}`);
|
|
152
|
+
} else {
|
|
153
|
+
lines.push(`variants=${outputs.length}`);
|
|
154
|
+
outputs.forEach((v, idx) => {
|
|
155
|
+
lines.push(`--- variant ${idx} ---`);
|
|
156
|
+
lines.push(`path=${v.path}`);
|
|
157
|
+
lines.push(`duration_ms=${v.duration_ms}`);
|
|
158
|
+
lines.push(`size_bytes=${v.size_bytes ?? 'unknown'}`);
|
|
159
|
+
lines.push(`burn_subtitles=${v.burn_subtitles}`);
|
|
160
|
+
lines.push(`include_audio=${v.include_audio}`);
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
lines.push(`segments=${segments.length}`);
|
|
164
|
+
lines.push(`outro_clips=${(outro_paths ?? []).length}`);
|
|
115
165
|
for (const w of warnings) lines.push(w);
|
|
116
166
|
return toolText(lines.join('\n'));
|
|
117
167
|
} catch (error) {
|