@lightcone-ai/daemon 0.18.0 → 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -278,9 +278,10 @@ server.tool(
278
278
  + 'When any segment has audio_path, MUST be preceded by plan_video_segments in the same session '
279
279
  + '(plan_video_segments fills duration/subtitle_text/audio_path mechanically; manual alignment is rejected). '
280
280
  + 'Returns a local mp4 path + size_bytes.\n\n'
281
- + 'Dual-version delivery (subtitled + clean): call this tool twice with the SAME segmentsfirst with default '
282
- + 'burn_subtitles=true (or omitted), then again with burn_subtitles=false and a different output_path. Only the '
283
- + 'final ffmpeg pass repeats; audio, source clips, and the plan_video_segments alignment are all reused.',
281
+ + 'Dual / multi-version delivery (e.g. subtitled+voiced + clean silent): pass the variants[] arrayone call '
282
+ + 'runs the heavy per-segment ffmpeg work ONCE and only diverges at audio mux + concat + subtitle burn per '
283
+ + 'variant. That is ~1.2-1.4× single-version time vs ~2× when calling this tool twice. Each variant chooses '
284
+ + 'its own burn_subtitles and include_audio independently.',
284
285
  {
285
286
  segments: z.array(z.object({
286
287
  visual_path: z.string().optional().describe('Absolute path to a single image / video / gif.'),
@@ -297,10 +298,18 @@ server.tool(
297
298
  })).describe('Ordered list of video segments.'),
298
299
  outro_paths: z.array(z.string()).optional().describe('Absolute paths to outro video clips appended at end.'),
299
300
  resolution: z.string().optional().describe('Output resolution WxH. Default "1080x1920".'),
300
- output_path: z.string().optional().describe('Absolute output path. Auto-generated if omitted.'),
301
- burn_subtitles: z.boolean().optional().describe('Whether to burn subtitle_text into the video. Default true. '
302
- + 'Pass false to produce a clean no-subtitle copy (dual-version delivery: run compose_video_v2 twice — '
303
- + 'once with default true, once with false + a different output_path — same segments, only one extra ffmpeg pass).'),
301
+ output_path: z.string().optional().describe('Absolute output path (single-output mode). Auto-generated if omitted. Ignored when variants[] is provided.'),
302
+ burn_subtitles: z.boolean().optional().describe('Single-output mode only: whether to burn subtitle_text. Default true. '
303
+ + 'For producing multiple variants in one call, use variants[] instead.'),
304
+ variants: z.array(z.object({
305
+ output_path: z.string().describe('Absolute output path for this variant. Each variant must use a unique path.'),
306
+ burn_subtitles: z.boolean().optional().describe('Whether to burn subtitle_text into THIS variant. Default true.'),
307
+ include_audio: z.boolean().optional().describe('Whether to mux segment.audio_path into THIS variant. Default true. '
308
+ + 'Pass false for a fully silent copy (skips audio mux entirely; segment.audio_path is ignored for this variant).'),
309
+ })).optional().describe('Multi-output mode: one call produces all variants. '
310
+ + 'Visual segment processing (the heavy work) runs once; each variant only repeats audio mux + concat + optional subtitle burn. '
311
+ + 'Typical use: [{output_path:"with-sub.mp4"}, {output_path:"clean.mp4", burn_subtitles:false, include_audio:false}] '
312
+ + 'to deliver a subtitled+voiced version and a silent clean version together.'),
304
313
  },
305
314
  async (args) => {
306
315
  const segments = Array.isArray(args?.segments) ? args.segments : [];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lightcone-ai/daemon",
3
- "version": "0.18.0",
3
+ "version": "0.18.1",
4
4
  "type": "module",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -249,17 +249,64 @@ async function applyFadeTransition({ clipA, clipB, tmpDir, style = 'fade' }) {
249
249
  return outPath;
250
250
  }
251
251
 
252
+ // compose_video_v2 supports two modes:
253
+ //
254
+ // 1. Legacy single-output: pass output_path (+ optional burn_subtitles).
255
+ // Returns { path, duration_ms, size_bytes, variants: [..1 entry..] }.
256
+ //
257
+ // 2. Multi-variant: pass variants=[{output_path, burn_subtitles?, include_audio?}, ...].
258
+ // Visual segment processing runs ONCE (the heavy part — per-segment ffmpeg
259
+ // transcode/scale/scroll). Each variant then diverges only at audio mux +
260
+ // concat + subtitle burn — typically a few seconds per extra variant.
261
+ // Returns { variants: [{path, duration_ms, size_bytes, burn_subtitles,
262
+ // include_audio}, ...] }.
263
+ //
264
+ // Use the multi-variant mode when shipping the same content with different
265
+ // subtitle/audio combinations (e.g. subtitled+voiced + clean silent). Calling
266
+ // the legacy mode twice produces correct outputs but redoes per-segment work.
252
267
  export async function composeVideoV2({
253
268
  segments = [],
254
269
  outro_paths = [],
255
270
  resolution = '1080x1920',
256
271
  output_path,
257
272
  burn_subtitles = true,
273
+ variants,
258
274
  }) {
259
275
  if (!Array.isArray(segments) || segments.length === 0) {
260
276
  throw new Error('segments must be a non-empty array');
261
277
  }
262
278
 
279
+ // Normalize variants. If caller did not pass an explicit variants array,
280
+ // synthesize a single variant from the legacy output_path + burn_subtitles.
281
+ // include_audio defaults to true (auto-include any segment.audio_path).
282
+ const normalizedVariants = (Array.isArray(variants) && variants.length > 0)
283
+ ? variants.map((v, idx) => {
284
+ if (!v || typeof v !== 'object') {
285
+ throw new Error(`variants[${idx}]: must be an object`);
286
+ }
287
+ const outPath = String(v.output_path ?? '').trim();
288
+ if (!outPath) throw new Error(`variants[${idx}]: output_path is required`);
289
+ return {
290
+ output_path: outPath,
291
+ burn_subtitles: v.burn_subtitles !== false,
292
+ include_audio: v.include_audio !== false,
293
+ };
294
+ })
295
+ : [{
296
+ output_path: output_path ?? path.join(os.tmpdir(), `lightcone-video-${Date.now()}.mp4`),
297
+ burn_subtitles: burn_subtitles !== false,
298
+ include_audio: true,
299
+ }];
300
+
301
+ // Disallow two variants writing to the same file — would race on disk.
302
+ const seenOutputs = new Set();
303
+ for (const v of normalizedVariants) {
304
+ if (seenOutputs.has(v.output_path)) {
305
+ throw new Error(`variants share output_path "${v.output_path}" — each variant needs a unique destination`);
306
+ }
307
+ seenOutputs.add(v.output_path);
308
+ }
309
+
263
310
  const [widthStr, heightStr] = String(resolution).split('x');
264
311
  const width = parseInt(widthStr, 10) || DEFAULT_WIDTH;
265
312
  const height = parseInt(heightStr, 10) || DEFAULT_HEIGHT;
@@ -268,23 +315,19 @@ export async function composeVideoV2({
268
315
  const tmpDir = path.join(os.tmpdir(), `compose-v2-${randomUUID().slice(0, 8)}`);
269
316
  await mkdir(tmpDir, { recursive: true });
270
317
 
271
- const outPath = output_path ?? path.join(os.tmpdir(), `lightcone-video-${Date.now()}.mp4`);
272
- await mkdir(path.dirname(outPath), { recursive: true });
273
-
274
318
  try {
275
- const readyClips = [];
276
-
319
+ // ── Shared phase: generate visual clips per segment ONCE ──────────────────
320
+ // This is the heavy work (image scaling, scroll rendering, video resize +
321
+ // re-encode). Reused across every variant.
322
+ const visualClips = [];
277
323
  for (let i = 0; i < segments.length; i++) {
278
324
  const seg = segments[i];
279
325
  const kind = String(seg.visual_kind ?? 'image');
280
326
  const presentation = seg.presentation ?? {};
281
327
  const style = String(presentation.style ?? 'static');
282
328
  const duration = Number(presentation.duration ?? presentation.per_card_duration ?? 4);
283
- const audioPath = seg.audio_path ?? null;
284
- const transition = String(seg.transition ?? 'cut');
285
329
 
286
330
  let visualClip;
287
-
288
331
  if (kind === 'image') {
289
332
  const imgPath = String(seg.visual_path ?? '');
290
333
  if (!imgPath) throw new Error(`segments[${i}]: visual_path required for kind=image`);
@@ -312,119 +355,145 @@ export async function composeVideoV2({
312
355
  } else {
313
356
  throw new Error(`segments[${i}]: unknown visual_kind "${kind}"`);
314
357
  }
315
-
316
- let finalClip;
317
- if (audioPath && await fileExists(audioPath)) {
318
- finalClip = await muxAudio({ videoPath: visualClip.path, audioPath, duration: visualClip.duration, tmpDir });
319
- } else {
320
- finalClip = await silentClip({ videoPath: visualClip.path, duration: visualClip.duration, tmpDir });
321
- }
322
-
323
- // Accept `text` as an alias for `subtitle_text`: plan_video_segments takes
324
- // segment narration as `text` on input, compose_video_v2's canonical name is
325
- // `subtitle_text`. Either reaches the burn pass so subtitles aren't silently dropped.
326
- // burn_subtitles=false (dual-version delivery: same segments composed once
327
- // with subtitles and once without) drops the text here so the burn-in pass
328
- // skips entirely — saves the second compose having to mutate the segment array.
329
- const subtitleText = burn_subtitles
330
- ? (
331
- typeof seg.subtitle_text === 'string' ? seg.subtitle_text
332
- : typeof seg.text === 'string' ? seg.text
333
- : ''
334
- ).trim()
335
- : '';
336
- readyClips.push({ path: finalClip, duration: visualClip.duration, transition, subtitleText });
358
+ visualClips.push(visualClip);
337
359
  }
338
360
 
339
- const outroClips = [];
361
+ // Outros are also shared — they don't depend on subtitle/audio choices.
362
+ const outroClipPaths = [];
340
363
  for (const outroPath of (outro_paths ?? [])) {
341
364
  if (outroPath && await fileExists(outroPath)) {
342
365
  const c = await videoToClip({ videoPath: outroPath, tmpDir, width, height, fps });
343
- outroClips.push(c.path);
366
+ outroClipPaths.push(c.path);
344
367
  }
345
368
  }
346
369
 
347
- // Build subtitle entries with cumulative timeline timestamps. When a clip's
348
- // subtitle text spans several sentences, split it into one event per sentence
349
- // and spread them across the clip in proportion to their length, so a long
350
- // beat reads as sequential lines roughly tracking the narration instead of one
351
- // static wall of text.
352
- let cursorMs = 0;
353
- const subtitleEntries = [];
354
- for (const clip of readyClips) {
355
- if (clip.subtitleText) {
356
- const clipMs = Math.round(clip.duration * 1000);
357
- const sentences = splitSubtitleSentences(clip.subtitleText);
358
- const totalLen = sentences.reduce((sum, s) => sum + Array.from(s).length, 0) || 1;
359
- let offsetMs = 0;
360
- sentences.forEach((sentence, idx) => {
361
- const share = Array.from(sentence).length / totalLen;
362
- const isLast = idx === sentences.length - 1;
363
- const spanMs = isLast ? clipMs - offsetMs : Math.max(1, Math.round(clipMs * share));
364
- subtitleEntries.push({
365
- text: sentence,
366
- start_ms: cursorMs + offsetMs,
367
- end_ms: cursorMs + offsetMs + spanMs,
370
+ // ── Per-variant phase ─────────────────────────────────────────────────────
371
+ // For each variant: mux audio (or silent), build subtitle text, concat with
372
+ // transitions, optionally burn subtitles. Writes to variant.output_path.
373
+ const variantOutputs = [];
374
+ for (let vi = 0; vi < normalizedVariants.length; vi++) {
375
+ const variant = normalizedVariants[vi];
376
+ await mkdir(path.dirname(variant.output_path), { recursive: true });
377
+
378
+ const variantTag = `v${vi}`;
379
+ const readyClips = [];
380
+
381
+ for (let i = 0; i < segments.length; i++) {
382
+ const seg = segments[i];
383
+ const visualClip = visualClips[i];
384
+ const transition = String(seg.transition ?? 'cut');
385
+ const audioPath = variant.include_audio ? (seg.audio_path ?? null) : null;
386
+
387
+ let finalClip;
388
+ if (audioPath && await fileExists(audioPath)) {
389
+ finalClip = await muxAudio({ videoPath: visualClip.path, audioPath, duration: visualClip.duration, tmpDir });
390
+ } else {
391
+ finalClip = await silentClip({ videoPath: visualClip.path, duration: visualClip.duration, tmpDir });
392
+ }
393
+
394
+ // Accept `text` as an alias for `subtitle_text`: plan_video_segments takes
395
+ // segment narration as `text` on input, compose_video_v2's canonical name is
396
+ // `subtitle_text`. burn_subtitles=false drops the text here so the burn-in
397
+ // pass skips entirely.
398
+ const subtitleText = variant.burn_subtitles
399
+ ? (
400
+ typeof seg.subtitle_text === 'string' ? seg.subtitle_text
401
+ : typeof seg.text === 'string' ? seg.text
402
+ : ''
403
+ ).trim()
404
+ : '';
405
+ readyClips.push({ path: finalClip, duration: visualClip.duration, transition, subtitleText });
406
+ }
407
+
408
+ // Subtitle entries: cumulative timeline timestamps. Same logic as before,
409
+ // scoped per variant (subtitleText is already gated by variant.burn_subtitles).
410
+ let cursorMs = 0;
411
+ const subtitleEntries = [];
412
+ for (const clip of readyClips) {
413
+ if (clip.subtitleText) {
414
+ const clipMs = Math.round(clip.duration * 1000);
415
+ const sentences = splitSubtitleSentences(clip.subtitleText);
416
+ const totalLen = sentences.reduce((sum, s) => sum + Array.from(s).length, 0) || 1;
417
+ let offsetMs = 0;
418
+ sentences.forEach((sentence, idx) => {
419
+ const share = Array.from(sentence).length / totalLen;
420
+ const isLast = idx === sentences.length - 1;
421
+ const spanMs = isLast ? clipMs - offsetMs : Math.max(1, Math.round(clipMs * share));
422
+ subtitleEntries.push({
423
+ text: sentence,
424
+ start_ms: cursorMs + offsetMs,
425
+ end_ms: cursorMs + offsetMs + spanMs,
426
+ });
427
+ offsetMs += spanMs;
368
428
  });
369
- offsetMs += spanMs;
370
- });
429
+ }
430
+ cursorMs += Math.round(clip.duration * 1000);
371
431
  }
372
- cursorMs += Math.round(clip.duration * 1000);
373
- }
374
432
 
375
- const allClips = [];
376
- let accumulated = readyClips[0].path;
377
- for (let i = 1; i < readyClips.length; i++) {
378
- const { path: nextClip, transition } = readyClips[i];
379
- if (transition === 'fade' || transition === 'crossfade') {
380
- accumulated = await applyFadeTransition({ clipA: accumulated, clipB: nextClip, tmpDir, style: transition });
381
- } else {
382
- allClips.push(accumulated);
383
- accumulated = nextClip;
433
+ const allClips = [];
434
+ let accumulated = readyClips[0].path;
435
+ for (let i = 1; i < readyClips.length; i++) {
436
+ const { path: nextClip, transition } = readyClips[i];
437
+ if (transition === 'fade' || transition === 'crossfade') {
438
+ accumulated = await applyFadeTransition({ clipA: accumulated, clipB: nextClip, tmpDir, style: transition });
439
+ } else {
440
+ allClips.push(accumulated);
441
+ accumulated = nextClip;
442
+ }
384
443
  }
385
- }
386
- allClips.push(accumulated);
444
+ allClips.push(accumulated);
387
445
 
388
- const finalSequence = [...allClips, ...outroClips];
446
+ const finalSequence = [...allClips, ...outroClipPaths];
389
447
 
390
- // Compose without subtitles first (subtitles are burned in a separate pass)
391
- const preSubPath = subtitleEntries.length > 0
392
- ? path.join(tmpDir, `pre-sub-${randomUUID().slice(0, 8)}.mp4`)
393
- : outPath;
448
+ // Compose without subtitles first (subtitles are burned in a separate pass)
449
+ const preSubPath = subtitleEntries.length > 0
450
+ ? path.join(tmpDir, `${variantTag}-pre-sub-${randomUUID().slice(0, 8)}.mp4`)
451
+ : variant.output_path;
394
452
 
395
- if (finalSequence.length === 1) {
396
- await runFfmpeg(['-i', finalSequence[0], '-c', 'copy', '-movflags', '+faststart', preSubPath], 'ffmpeg copy');
397
- } else {
398
- await concatWithCuts({ clips: finalSequence, outputPath: preSubPath });
399
- }
453
+ if (finalSequence.length === 1) {
454
+ await runFfmpeg(['-i', finalSequence[0], '-c', 'copy', '-movflags', '+faststart', preSubPath], `ffmpeg copy ${variantTag}`);
455
+ } else {
456
+ await concatWithCuts({ clips: finalSequence, outputPath: preSubPath });
457
+ }
400
458
 
401
- // Burn subtitles into final output
402
- if (subtitleEntries.length > 0) {
403
- const assPath = path.join(tmpDir, `subs-${randomUUID().slice(0, 8)}.ass`);
404
- await writeFile(assPath, buildAssContent(subtitleEntries, { playResX: width, playResY: height }));
405
- const escapedAssPath = assPath.replace(/\\/g, '/').replace(/:/g, '\\:').replace(/'/g, "\\'");
406
- await runFfmpeg([
407
- '-i', preSubPath,
408
- '-vf', `subtitles='${escapedAssPath}'`,
409
- '-c:a', 'copy',
410
- '-movflags', '+faststart',
411
- outPath,
412
- ], 'ffmpeg burn-subtitles');
413
- }
459
+ if (subtitleEntries.length > 0) {
460
+ const assPath = path.join(tmpDir, `${variantTag}-subs-${randomUUID().slice(0, 8)}.ass`);
461
+ await writeFile(assPath, buildAssContent(subtitleEntries, { playResX: width, playResY: height }));
462
+ const escapedAssPath = assPath.replace(/\\/g, '/').replace(/:/g, '\\:').replace(/'/g, "\\'");
463
+ await runFfmpeg([
464
+ '-i', preSubPath,
465
+ '-vf', `subtitles='${escapedAssPath}'`,
466
+ '-c:a', 'copy',
467
+ '-movflags', '+faststart',
468
+ variant.output_path,
469
+ ], `ffmpeg burn-subtitles ${variantTag}`);
470
+ }
414
471
 
415
- const totalDuration = await probeDurationSec(outPath);
416
-
417
- // Stat the final file before returning so the caller can rely on size and
418
- // so we can detect the (rare but observed) case where ffmpeg's `close`
419
- // arrived but the kernel writeback wasn't complete. A 0-byte / tiny mp4
420
- // here means the burn-subtitles pass produced nothing usable — fail loudly
421
- // instead of letting a broken file flow into write_workspace_file / submit.
422
- const finalStat = await statAsync(outPath);
423
- const sizeBytes = Number(finalStat.size ?? 0);
424
- if (!Number.isFinite(sizeBytes) || sizeBytes < 1024) {
425
- throw new Error(`compose_video_v2 produced an invalid output: ${outPath} size=${sizeBytes} bytes`);
472
+ const totalDuration = await probeDurationSec(variant.output_path);
473
+ const finalStat = await statAsync(variant.output_path);
474
+ const sizeBytes = Number(finalStat.size ?? 0);
475
+ if (!Number.isFinite(sizeBytes) || sizeBytes < 1024) {
476
+ throw new Error(`compose_video_v2 produced an invalid output: ${variant.output_path} size=${sizeBytes} bytes (variant ${vi})`);
477
+ }
478
+
479
+ variantOutputs.push({
480
+ path: variant.output_path,
481
+ duration_ms: Math.round(totalDuration * 1000),
482
+ size_bytes: sizeBytes,
483
+ burn_subtitles: variant.burn_subtitles,
484
+ include_audio: variant.include_audio,
485
+ });
426
486
  }
427
- return { path: outPath, duration_ms: Math.round(totalDuration * 1000), size_bytes: sizeBytes };
487
+
488
+ // Legacy single-output callers (didn't pass variants) get the same flat
489
+ // shape they used to get, plus the variants array for forward-compat.
490
+ const first = variantOutputs[0];
491
+ return {
492
+ path: first.path,
493
+ duration_ms: first.duration_ms,
494
+ size_bytes: first.size_bytes,
495
+ variants: variantOutputs,
496
+ };
428
497
  } finally {
429
498
  await rm(tmpDir, { recursive: true, force: true });
430
499
  }
@@ -22,8 +22,16 @@ function statSizeOrNull(p) {
22
22
  try { return fs.statSync(p).size; } catch { return null; }
23
23
  }
24
24
 
25
- export async function runComposeVideoV2Tool({ segments, outro_paths, format, resolution, output_path, burn_subtitles, workspaceDir }) {
26
- const burnSubtitles = burn_subtitles !== false;
25
+ export async function runComposeVideoV2Tool({
26
+ segments,
27
+ outro_paths,
28
+ format,
29
+ resolution,
30
+ output_path,
31
+ burn_subtitles,
32
+ variants,
33
+ workspaceDir,
34
+ }) {
27
35
  if (!Array.isArray(segments) || segments.length === 0) {
28
36
  return toolError('segments must be a non-empty array.');
29
37
  }
@@ -60,6 +68,37 @@ export async function runComposeVideoV2Tool({ segments, outro_paths, format, res
60
68
  );
61
69
  }
62
70
  }
71
+
72
+ // Normalize variants. If caller passed a variants[] array, that takes
73
+ // priority — multi-output mode. Otherwise build a single-element variants
74
+ // array from the legacy output_path + burn_subtitles params.
75
+ const outDir = workspaceDir
76
+ ? path.join(workspaceDir, 'artifacts', 'video')
77
+ : path.join(os.tmpdir(), 'lightcone-video');
78
+
79
+ let normalizedVariants;
80
+ if (Array.isArray(variants) && variants.length > 0) {
81
+ normalizedVariants = variants.map((v, idx) => {
82
+ if (!v || typeof v !== 'object') {
83
+ return null; // surfaced below
84
+ }
85
+ const outPath = String(v.output_path ?? '').trim()
86
+ || path.join(outDir, `composed-${Date.now()}-${idx}-${randomUUID().slice(0, 8)}.mp4`);
87
+ return {
88
+ output_path: outPath,
89
+ burn_subtitles: v.burn_subtitles !== false,
90
+ include_audio: v.include_audio !== false,
91
+ };
92
+ });
93
+ if (normalizedVariants.some(v => v === null)) {
94
+ return toolError('variants must be an array of objects, each with { output_path, burn_subtitles?, include_audio? }.');
95
+ }
96
+ } else {
97
+ const burnSubtitles = burn_subtitles !== false;
98
+ const outPath = output_path ?? path.join(outDir, `composed-${Date.now()}-${randomUUID().slice(0, 8)}.mp4`);
99
+ normalizedVariants = [{ output_path: outPath, burn_subtitles: burnSubtitles, include_audio: true }];
100
+ }
101
+
63
102
  const warnings = [];
64
103
  // Heuristic warning: a multi-segment image video that reuses one single image
65
104
  // will look near-static — usually a sign the source page didn't render and the
@@ -70,48 +109,59 @@ export async function runComposeVideoV2Tool({ segments, outro_paths, format, res
70
109
  + 'The output will be near-static — verify the source page actually rendered before submitting this video.'
71
110
  );
72
111
  }
73
- // Warn when narration is present but no subtitle text is — compose_video_v2 burns
74
- // subtitles only from `subtitle_text` (or its `text` alias); without it the video
75
- // ships with no captions. Simplest fix: pass plan_video_segments' output verbatim.
76
- // burn_subtitles=false is the explicit "no subtitles" path (dual-version delivery),
77
- // so the warning would be noise — suppress it.
78
- if (burnSubtitles) {
112
+ // Warn when narration is present but no subtitle text is — only meaningful
113
+ // for variants that ARE supposed to burn subtitles. Variants that explicitly
114
+ // ask for burn_subtitles=false are the "clean" path and shouldn't trigger it.
115
+ const variantsWithBurn = normalizedVariants.filter(v => v.burn_subtitles && v.include_audio);
116
+ if (variantsWithBurn.length > 0) {
79
117
  const hasSubText = s => (typeof s?.subtitle_text === 'string' && s.subtitle_text.trim())
80
118
  || (typeof s?.text === 'string' && s.text.trim());
81
119
  const narratedNoSub = segments.filter(s =>
82
120
  (typeof s?.audio_path === 'string' && s.audio_path.trim()) && !hasSubText(s)).length;
83
121
  if (narratedNoSub > 0) {
84
122
  warnings.push(
85
- `WARNING: ${narratedNoSub} segment(s) have narration audio but no subtitle text — the output will have NO subtitles. `
123
+ `WARNING: ${narratedNoSub} segment(s) have narration audio but no subtitle text — `
124
+ + `the subtitled variant${variantsWithBurn.length > 1 ? 's' : ''} will have NO subtitles. `
86
125
  + 'If subtitles are wanted, set subtitle_text per segment (or pass the plan_video_segments output array verbatim).'
87
126
  );
88
127
  }
89
128
  }
90
129
 
91
- const outDir = workspaceDir
92
- ? path.join(workspaceDir, 'artifacts', 'video')
93
- : path.join(os.tmpdir(), 'lightcone-video');
94
-
95
- const outPath = output_path ?? path.join(outDir, `composed-${Date.now()}-${randomUUID().slice(0, 8)}.mp4`);
96
-
97
130
  try {
98
131
  const result = await composeVideoV2({
99
132
  segments,
100
133
  outro_paths: outro_paths ?? [],
101
134
  resolution: resolution ?? '1080x1920',
102
- output_path: outPath,
103
- burn_subtitles: burnSubtitles,
135
+ variants: normalizedVariants,
104
136
  });
105
137
 
106
- const lines = [
107
- 'compose_video_v2 completed.',
108
- `path=${result.path}`,
109
- `duration_ms=${result.duration_ms}`,
110
- `size_bytes=${result.size_bytes ?? 'unknown'}`,
111
- `segments=${segments.length}`,
112
- `outro_clips=${(outro_paths ?? []).length}`,
113
- `burn_subtitles=${burnSubtitles}`,
114
- ];
138
+ const outputs = Array.isArray(result?.variants) && result.variants.length > 0
139
+ ? result.variants
140
+ : [{ path: result.path, duration_ms: result.duration_ms, size_bytes: result.size_bytes,
141
+ burn_subtitles: normalizedVariants[0].burn_subtitles,
142
+ include_audio: normalizedVariants[0].include_audio }];
143
+
144
+ const lines = ['compose_video_v2 completed.'];
145
+ if (outputs.length === 1) {
146
+ const v = outputs[0];
147
+ lines.push(`path=${v.path}`);
148
+ lines.push(`duration_ms=${v.duration_ms}`);
149
+ lines.push(`size_bytes=${v.size_bytes ?? 'unknown'}`);
150
+ lines.push(`burn_subtitles=${v.burn_subtitles}`);
151
+ lines.push(`include_audio=${v.include_audio}`);
152
+ } else {
153
+ lines.push(`variants=${outputs.length}`);
154
+ outputs.forEach((v, idx) => {
155
+ lines.push(`--- variant ${idx} ---`);
156
+ lines.push(`path=${v.path}`);
157
+ lines.push(`duration_ms=${v.duration_ms}`);
158
+ lines.push(`size_bytes=${v.size_bytes ?? 'unknown'}`);
159
+ lines.push(`burn_subtitles=${v.burn_subtitles}`);
160
+ lines.push(`include_audio=${v.include_audio}`);
161
+ });
162
+ }
163
+ lines.push(`segments=${segments.length}`);
164
+ lines.push(`outro_clips=${(outro_paths ?? []).length}`);
115
165
  for (const w of warnings) lines.push(w);
116
166
  return toolText(lines.join('\n'));
117
167
  } catch (error) {