@argo-video/cli 0.19.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +45 -3
  2. package/dist/camera-move.d.ts +53 -0
  3. package/dist/camera-move.d.ts.map +1 -0
  4. package/dist/camera-move.js +107 -0
  5. package/dist/camera-move.js.map +1 -0
  6. package/dist/camera.d.ts +9 -0
  7. package/dist/camera.d.ts.map +1 -1
  8. package/dist/camera.js +38 -0
  9. package/dist/camera.js.map +1 -1
  10. package/dist/cli.d.ts.map +1 -1
  11. package/dist/cli.js +45 -0
  12. package/dist/cli.js.map +1 -1
  13. package/dist/config.d.ts +16 -0
  14. package/dist/config.d.ts.map +1 -1
  15. package/dist/config.js.map +1 -1
  16. package/dist/export.d.ts +13 -1
  17. package/dist/export.d.ts.map +1 -1
  18. package/dist/export.js +113 -11
  19. package/dist/export.js.map +1 -1
  20. package/dist/freeze.d.ts +59 -0
  21. package/dist/freeze.d.ts.map +1 -0
  22. package/dist/freeze.js +113 -0
  23. package/dist/freeze.js.map +1 -0
  24. package/dist/index.d.ts +4 -1
  25. package/dist/index.d.ts.map +1 -1
  26. package/dist/index.js +6 -0
  27. package/dist/index.js.map +1 -1
  28. package/dist/media.d.ts +1 -0
  29. package/dist/media.d.ts.map +1 -1
  30. package/dist/media.js +20 -0
  31. package/dist/media.js.map +1 -1
  32. package/dist/music/musicgen.d.ts +43 -0
  33. package/dist/music/musicgen.d.ts.map +1 -0
  34. package/dist/music/musicgen.js +120 -0
  35. package/dist/music/musicgen.js.map +1 -0
  36. package/dist/narration.d.ts +10 -0
  37. package/dist/narration.d.ts.map +1 -1
  38. package/dist/narration.js +33 -1
  39. package/dist/narration.js.map +1 -1
  40. package/dist/pipeline.d.ts.map +1 -1
  41. package/dist/pipeline.js +94 -14
  42. package/dist/pipeline.js.map +1 -1
  43. package/dist/preview.d.ts +4 -0
  44. package/dist/preview.d.ts.map +1 -1
  45. package/dist/preview.js +641 -9
  46. package/dist/preview.js.map +1 -1
  47. package/dist/record.d.ts.map +1 -1
  48. package/dist/record.js +26 -1
  49. package/dist/record.js.map +1 -1
  50. package/package.json +1 -1
package/dist/preview.js CHANGED
@@ -9,7 +9,7 @@
9
9
  */
10
10
  import { execFile } from 'node:child_process';
11
11
  import { createServer } from 'node:http';
12
- import { readFileSync, existsSync, readdirSync, writeFileSync, statSync, createReadStream, unlinkSync } from 'node:fs';
12
+ import { readFileSync, existsSync, readdirSync, writeFileSync, statSync, createReadStream, unlinkSync, mkdirSync } from 'node:fs';
13
13
  import { dirname, extname, join, relative, resolve } from 'node:path';
14
14
  import { renderTemplate } from './overlays/templates.js';
15
15
  import { alignClips, schedulePlacements } from './tts/align.js';
@@ -19,6 +19,8 @@ import { generateSrt, generateVtt } from './subtitles.js';
19
19
  import { generateChapterMetadata } from './chapters.js';
20
20
  import { exportVideo, checkFfmpeg } from './export.js';
21
21
  import { applySpeedRampToTimeline } from './speed-ramp.js';
22
+ import { shiftCameraMoves, scaleCameraMoves } from './camera-move.js';
23
+ import { resolveFreezes, adjustPlacementsForFreezes, totalFreezeDurationMs } from './freeze.js';
22
24
  const MIME_TYPES = {
23
25
  '.html': 'text/html',
24
26
  '.js': 'text/javascript',
@@ -129,7 +131,7 @@ function createSceneReportFromPlacements(placements, persisted) {
129
131
  })),
130
132
  };
131
133
  }
132
- function loadPreviewData(demoName, argoDir, demosDir, outputDir = 'videos') {
134
+ function loadPreviewData(demoName, argoDir, demosDir, outputDir = 'videos', exportConfig, activeMusicPath) {
133
135
  const demoDir = join(argoDir, demoName);
134
136
  // Required files
135
137
  const timingPath = join(demoDir, '.timing.json');
@@ -178,7 +180,26 @@ function loadPreviewData(demoName, argoDir, demosDir, outputDir = 'videos') {
178
180
  const renderedOverlays = buildRenderedOverlays(overlays);
179
181
  // Pipeline metadata (reuse meta loaded above for headTrimMs)
180
182
  const pipelineMeta = Object.keys(meta).length > 0 ? meta : null;
181
- return { demoName, timing, voiceover, overlays, effects, sceneDurations, sceneReport, renderedOverlays, pipelineMeta };
183
+ const hasGenerated = Boolean(activeMusicPath && existsSync(activeMusicPath));
184
+ const hasConfig = Boolean(exportConfig?.musicPath);
185
+ const bgm = {
186
+ hasGenerated,
187
+ hasConfig,
188
+ include: hasGenerated || hasConfig,
189
+ volume: exportConfig?.musicVolume ?? 0.15,
190
+ };
191
+ return {
192
+ demoName,
193
+ timing,
194
+ voiceover,
195
+ overlays,
196
+ effects,
197
+ sceneDurations,
198
+ sceneReport,
199
+ renderedOverlays,
200
+ pipelineMeta,
201
+ bgm,
202
+ };
182
203
  }
183
204
  /** List WAV clip files available for a demo. */
184
205
  function listClips(argoDir, demoName) {
@@ -306,12 +327,18 @@ export async function startPreviewServer(options) {
306
327
  throw new Error(`No recording found for '${demoName}'. Run 'argo pipeline ${demoName}' first.`);
307
328
  }
308
329
  let videoMime = videoPath.endsWith('.mp4') ? 'video/mp4' : 'video/webm';
330
+ // Track BGM saved from the music generator panel
331
+ let activeMusicPath;
332
+ // Check if a previously saved BGM exists
333
+ const savedBgmPath = join(demoDir, 'music', 'bgm.wav');
334
+ if (existsSync(savedBgmPath))
335
+ activeMusicPath = savedBgmPath;
309
336
  const server = createServer(async (req, res) => {
310
337
  const url = req.url ?? '/';
311
338
  try {
312
339
  // --- API routes ---
313
340
  if (url === '/api/data') {
314
- const data = loadPreviewData(demoName, argoDir, demosDir, outputDir);
341
+ const data = loadPreviewData(demoName, argoDir, demosDir, outputDir, options.exportConfig, activeMusicPath);
315
342
  res.writeHead(200, { 'Content-Type': 'application/json' });
316
343
  res.end(JSON.stringify(data));
317
344
  return;
@@ -376,7 +403,7 @@ export async function startPreviewServer(options) {
376
403
  writeFileSync(scenesPath, JSON.stringify(scenes, null, 2) + '\n', 'utf-8');
377
404
  }
378
405
  // Reload and re-render overlays
379
- const data = loadPreviewData(demoName, argoDir, demosDir, outputDir);
406
+ const data = loadPreviewData(demoName, argoDir, demosDir, outputDir, options.exportConfig, activeMusicPath);
380
407
  res.writeHead(200, { 'Content-Type': 'application/json' });
381
408
  res.end(JSON.stringify({ ok: true, changed, renderedOverlays: data.renderedOverlays }));
382
409
  return;
@@ -457,6 +484,11 @@ export async function startPreviewServer(options) {
457
484
  res.writeHead(200, { 'Content-Type': 'application/json', 'Transfer-Encoding': 'chunked' });
458
485
  try {
459
486
  checkFfmpeg();
487
+ const chunks = [];
488
+ for await (const chunk of req)
489
+ chunks.push(chunk);
490
+ const bodyText = Buffer.concat(chunks).toString('utf-8').trim();
491
+ const body = bodyText ? JSON.parse(bodyText) : {};
460
492
  // Refresh aligned audio from current clips + timing
461
493
  const refreshed = refreshPreviewAudioArtifacts(demoName, argoDir, demosDir, options.ttsDefaults);
462
494
  // Read timing for head-trim + placement computation
@@ -496,6 +528,11 @@ export async function startPreviewServer(options) {
496
528
  // Apply speed ramp to timeline if configured (must happen before
497
529
  // chapters/subtitles/export so all artifacts reflect ramped timing)
498
530
  const ec = options.exportConfig;
531
+ const includeBgm = body.includeBgm !== false;
532
+ const requestedMusicVolume = typeof body.musicVolume === 'number' && Number.isFinite(body.musicVolume)
533
+ ? Math.max(0, Math.min(1, body.musicVolume))
534
+ : (ec?.musicVolume ?? 0.15);
535
+ const exportMusicPath = includeBgm ? (activeMusicPath ?? ec?.musicPath) : undefined;
499
536
  const rampResult = applySpeedRampToTimeline(placements, shiftedDurationMs, ec?.speedRamp);
500
537
  const finalPlacements = rampResult.placements;
501
538
  const finalDurationMs = rampResult.totalDurationMs;
@@ -507,6 +544,45 @@ export async function startPreviewServer(options) {
507
544
  writeFileSync(join(outputDir, `${demoName}.vtt`), generateVtt(finalPlacements, sceneTexts), 'utf-8');
508
545
  }
509
546
  catch { /* subtitles are best-effort */ }
547
+ // Resolve freeze-frame holds from scenes manifest
548
+ const previewFreezeSpecs = [];
549
+ for (const entry of scenes) {
550
+ if (!entry.scene || !Array.isArray(entry.post))
551
+ continue;
552
+ for (const effect of entry.post) {
553
+ if (effect.type === 'freeze' && typeof effect.atMs === 'number' && typeof effect.durationMs === 'number') {
554
+ previewFreezeSpecs.push({ scene: entry.scene, atMs: effect.atMs, durationMs: effect.durationMs });
555
+ }
556
+ }
557
+ }
558
+ const previewResolvedFreezes = resolveFreezes(previewFreezeSpecs, finalPlacements);
559
+ let freezeAdjustedPlacements = finalPlacements;
560
+ let freezeAdjustedDurationMs = finalDurationMs;
561
+ if (previewResolvedFreezes.length > 0) {
562
+ freezeAdjustedPlacements = adjustPlacementsForFreezes(finalPlacements, previewResolvedFreezes);
563
+ freezeAdjustedDurationMs += totalFreezeDurationMs(previewResolvedFreezes);
564
+ // Regenerate chapters/subtitles with freeze-adjusted timing
565
+ writeFileSync(chapterMetadataPath, generateChapterMetadata(freezeAdjustedPlacements, freezeAdjustedDurationMs), 'utf-8');
566
+ try {
567
+ writeFileSync(join(outputDir, `${demoName}.srt`), generateSrt(freezeAdjustedPlacements, sceneTexts), 'utf-8');
568
+ writeFileSync(join(outputDir, `${demoName}.vtt`), generateVtt(freezeAdjustedPlacements, sceneTexts), 'utf-8');
569
+ }
570
+ catch { /* best-effort */ }
571
+ }
572
+ // Read camera moves if recorded by zoomTo with narration option
573
+ let cameraMoves;
574
+ const cameraMovesPath = join(demoDir, '.timing.camera-moves.json');
575
+ try {
576
+ if (existsSync(cameraMovesPath)) {
577
+ let moves = JSON.parse(readFileSync(cameraMovesPath, 'utf-8'));
578
+ if (headTrimMs > 0)
579
+ moves = shiftCameraMoves(moves, headTrimMs);
580
+ moves = scaleCameraMoves(moves, ec?.deviceScaleFactor ?? 1);
581
+ if (moves.length > 0)
582
+ cameraMoves = moves;
583
+ }
584
+ }
585
+ catch { /* optional */ }
510
586
  // Export — use full config so output matches argo pipeline
511
587
  await exportVideo({
512
588
  demoName,
@@ -522,10 +598,16 @@ export async function startPreviewServer(options) {
522
598
  chapterMetadataPath,
523
599
  formats: ec?.formats,
524
600
  transition: ec?.transition,
525
- placements: finalPlacements,
526
- totalDurationMs: finalDurationMs,
601
+ placements: freezeAdjustedPlacements,
602
+ totalDurationMs: freezeAdjustedDurationMs,
527
603
  headTrimMs: headTrimMs > 0 ? headTrimMs : undefined,
528
604
  speedRampSegments,
605
+ loudnorm: ec?.loudnorm,
606
+ musicPath: exportMusicPath,
607
+ musicVolume: requestedMusicVolume,
608
+ cameraMoves,
609
+ watermark: ec?.watermark,
610
+ freezeSpecs: previewResolvedFreezes.length > 0 ? previewResolvedFreezes : undefined,
529
611
  });
530
612
  // Switch to serving the new MP4
531
613
  if (existsSync(mp4Path)) {
@@ -539,6 +621,147 @@ export async function startPreviewServer(options) {
539
621
  }
540
622
  return;
541
623
  }
624
+ // Serve the MusicGen Web Worker script (same-origin so ESM imports work)
625
+ if (url === '/musicgen-worker.js') {
626
+ const workerScript = `
627
+ import { AutoTokenizer, MusicgenForConditionalGeneration } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.0.0-next.7';
628
+
629
+ let tokenizer = null;
630
+ let model = null;
631
+ let backend = null;
632
+
633
+ const WEBGPU_DTYPE = {
634
+ text_encoder: 'fp32',
635
+ decoder_model_merged: 'fp32',
636
+ encodec_decode: 'fp32',
637
+ };
638
+
639
+ const WASM_DTYPE = {
640
+ text_encoder: 'q8',
641
+ decoder_model_merged: 'q8',
642
+ encodec_decode: 'fp32',
643
+ };
644
+
645
+ async function ensureTokenizer() {
646
+ if (tokenizer) return;
647
+ self.postMessage({ type: 'progress', message: 'Loading MusicGen tokenizer...' });
648
+ tokenizer = await AutoTokenizer.from_pretrained('Xenova/musicgen-small');
649
+ }
650
+
651
+ async function loadModel(preferredBackend = 'webgpu') {
652
+ await ensureTokenizer();
653
+
654
+ if (preferredBackend === 'webgpu' && typeof navigator !== 'undefined' && 'gpu' in navigator) {
655
+ try {
656
+ self.postMessage({ type: 'progress', message: 'Loading model weights with WebGPU (~1.8GB first time)...' });
657
+ model = await MusicgenForConditionalGeneration.from_pretrained('Xenova/musicgen-small', {
658
+ dtype: WEBGPU_DTYPE,
659
+ device: 'webgpu',
660
+ });
661
+ backend = 'webgpu';
662
+ self.postMessage({ type: 'progress', message: 'Model loaded (WebGPU).' });
663
+ return;
664
+ } catch (err) {
665
+ self.postMessage({
666
+ type: 'progress',
667
+ message: 'WebGPU init failed, falling back to CPU/WASM...',
668
+ });
669
+ model = null;
670
+ backend = null;
671
+ }
672
+ }
673
+
674
+ self.postMessage({ type: 'progress', message: 'Loading model weights on CPU/WASM...' });
675
+ model = await MusicgenForConditionalGeneration.from_pretrained('Xenova/musicgen-small', {
676
+ dtype: WASM_DTYPE,
677
+ });
678
+ backend = 'wasm';
679
+ self.postMessage({ type: 'progress', message: 'Model loaded (CPU/WASM).' });
680
+ }
681
+
682
+ function shouldRetryOnWasm(err) {
683
+ const msg = err?.message || String(err);
684
+ return backend === 'webgpu' && /(OrtRun|webgpu|TensorShape|Cannot reduce shape|ERROR_CODE:\\s*1)/i.test(msg);
685
+ }
686
+
687
+ async function generateAudio(prompt, durationSec, guidanceScale, temperature) {
688
+ if (!model) await loadModel();
689
+
690
+ const inputs = tokenizer(prompt);
691
+ const maxNewTokens = Math.ceil(durationSec * 50);
692
+
693
+ try {
694
+ return await model.generate({
695
+ ...inputs,
696
+ max_new_tokens: maxNewTokens,
697
+ do_sample: true,
698
+ guidance_scale: guidanceScale,
699
+ temperature,
700
+ });
701
+ } catch (err) {
702
+ if (!shouldRetryOnWasm(err)) throw err;
703
+
704
+ self.postMessage({
705
+ type: 'progress',
706
+ message: 'WebGPU generation failed, retrying on CPU/WASM...',
707
+ });
708
+ model = null;
709
+ await loadModel('wasm');
710
+ return await model.generate({
711
+ ...inputs,
712
+ max_new_tokens: maxNewTokens,
713
+ do_sample: true,
714
+ guidance_scale: guidanceScale,
715
+ temperature,
716
+ });
717
+ }
718
+ }
719
+
720
+ self.onmessage = async (e) => {
721
+ if (e.data.type === 'generate') {
722
+ try {
723
+ const durationSec = e.data.durationSec || 30;
724
+ const guidanceScale = e.data.guidanceScale || 3;
725
+ const temperature = e.data.temperature || 1.0;
726
+ self.postMessage({ type: 'progress', message: 'Generating ' + durationSec + 's of music...' });
727
+ const output = await generateAudio(
728
+ e.data.prompt,
729
+ durationSec,
730
+ guidanceScale,
731
+ temperature,
732
+ );
733
+ const audioData = output.data instanceof Float32Array ? output.data : new Float32Array(output.data);
734
+ const sampleRate = model?.config?.audio_encoder?.sampling_rate || 32000;
735
+ self.postMessage({ type: 'complete', audioData, sampleRate }, [audioData.buffer]);
736
+ } catch (err) {
737
+ self.postMessage({ type: 'error', message: err.message || String(err) });
738
+ }
739
+ }
740
+ };
741
+ `;
742
+ res.writeHead(200, {
743
+ 'Content-Type': 'application/javascript',
744
+ 'Cache-Control': 'no-store',
745
+ });
746
+ res.end(workerScript);
747
+ return;
748
+ }
749
+ // Save generated background music WAV — overwrites previous to avoid orphans
750
+ if (url === '/api/save-music' && req.method === 'POST') {
751
+ const chunks = [];
752
+ for await (const chunk of req)
753
+ chunks.push(chunk);
754
+ const wavData = Buffer.concat(chunks);
755
+ const musicDir = join(demoDir, 'music');
756
+ mkdirSync(musicDir, { recursive: true });
757
+ const filePath = join(musicDir, 'bgm.wav');
758
+ writeFileSync(filePath, wavData);
759
+ // Track the active music path so /api/export uses it
760
+ activeMusicPath = filePath;
761
+ res.writeHead(200, { 'Content-Type': 'application/json' });
762
+ res.end(JSON.stringify({ ok: true, path: filePath }));
763
+ return;
764
+ }
542
765
  // --- Static file serving ---
543
766
  // Serve video with Range request support (required for seeking)
544
767
  if (url === '/video' || url === '/video.webm') {
@@ -578,7 +801,7 @@ export async function startPreviewServer(options) {
578
801
  }
579
802
  // Root — serve the preview HTML
580
803
  if (url === '/' || url === '/index.html') {
581
- const data = loadPreviewData(demoName, argoDir, demosDir, outputDir);
804
+ const data = loadPreviewData(demoName, argoDir, demosDir, outputDir, options.exportConfig, activeMusicPath);
582
805
  const html = getPreviewHtml(data);
583
806
  res.writeHead(200, { 'Content-Type': 'text/html' });
584
807
  res.end(html);
@@ -986,6 +1209,179 @@ const PREVIEW_HTML = `<!DOCTYPE html>
986
1209
  .sidebar-tab:hover { color: var(--text); }
987
1210
  .sidebar-tab.active { color: var(--text); border-bottom-color: var(--accent); }
988
1211
  .sidebar-panel { overflow-y: auto; flex: 1; }
1212
+
1213
+ /* Music panel */
1214
+ .music-panel {
1215
+ border-top: 1px solid var(--border);
1216
+ padding: 0;
1217
+ }
1218
+ .music-panel-header {
1219
+ display: flex;
1220
+ align-items: center;
1221
+ padding: 12px 16px;
1222
+ cursor: pointer;
1223
+ user-select: none;
1224
+ font-size: 12px;
1225
+ font-weight: 600;
1226
+ color: var(--text-muted);
1227
+ text-transform: uppercase;
1228
+ letter-spacing: 0.05em;
1229
+ }
1230
+ .music-panel-header:hover { color: var(--text); }
1231
+ .music-panel-header .expand-icon {
1232
+ margin-left: auto;
1233
+ font-size: 10px;
1234
+ color: var(--text-dim);
1235
+ transition: transform var(--transition);
1236
+ }
1237
+ .music-panel.expanded .music-panel-header .expand-icon { transform: rotate(90deg); }
1238
+ .music-panel-body {
1239
+ display: none;
1240
+ padding: 0 16px 16px;
1241
+ }
1242
+ .music-panel.expanded .music-panel-body { display: block; }
1243
+ .music-prompt-input {
1244
+ width: 100%;
1245
+ padding: 8px 10px;
1246
+ font-family: var(--sans);
1247
+ font-size: 13px;
1248
+ color: var(--text);
1249
+ background: var(--surface2);
1250
+ border: 1px solid var(--border);
1251
+ border-radius: var(--radius);
1252
+ outline: none;
1253
+ resize: vertical;
1254
+ min-height: 36px;
1255
+ margin-bottom: 8px;
1256
+ }
1257
+ .music-prompt-input:focus { border-color: var(--accent); }
1258
+ .music-presets {
1259
+ display: flex;
1260
+ flex-wrap: wrap;
1261
+ gap: 4px;
1262
+ margin-bottom: 10px;
1263
+ }
1264
+ .music-preset-btn {
1265
+ padding: 4px 8px;
1266
+ font-size: 11px;
1267
+ font-family: var(--sans);
1268
+ color: var(--text-muted);
1269
+ background: var(--surface3);
1270
+ border: 1px solid var(--border);
1271
+ border-radius: var(--radius);
1272
+ cursor: pointer;
1273
+ transition: background var(--transition), color var(--transition);
1274
+ }
1275
+ .music-preset-btn:hover { background: var(--accent-glow); color: var(--text); }
1276
+ .music-duration-row {
1277
+ display: flex;
1278
+ align-items: center;
1279
+ gap: 8px;
1280
+ margin-bottom: 10px;
1281
+ font-size: 12px;
1282
+ color: var(--text-muted);
1283
+ }
1284
+ .music-duration-row input[type="range"] {
1285
+ flex: 1;
1286
+ accent-color: var(--accent);
1287
+ }
1288
+ .music-option-row,
1289
+ .music-volume-row {
1290
+ display: flex;
1291
+ align-items: center;
1292
+ gap: 8px;
1293
+ margin-bottom: 10px;
1294
+ font-size: 12px;
1295
+ color: var(--text-muted);
1296
+ }
1297
+ .music-option-row {
1298
+ justify-content: space-between;
1299
+ }
1300
+ .music-volume-row input[type="range"] {
1301
+ flex: 1;
1302
+ accent-color: var(--accent);
1303
+ }
1304
+ .music-volume-value {
1305
+ min-width: 40px;
1306
+ text-align: right;
1307
+ font-family: var(--mono);
1308
+ font-size: 11px;
1309
+ color: var(--text);
1310
+ }
1311
+ .music-help {
1312
+ margin-bottom: 10px;
1313
+ font-size: 11px;
1314
+ color: var(--text-dim);
1315
+ line-height: 1.4;
1316
+ }
1317
+ .music-duration-row .music-dur-label {
1318
+ min-width: 32px;
1319
+ text-align: right;
1320
+ font-family: var(--mono);
1321
+ font-size: 11px;
1322
+ }
1323
+ .music-generate-btn, .music-save-btn {
1324
+ width: 100%;
1325
+ padding: 8px 0;
1326
+ font-size: 13px;
1327
+ font-weight: 600;
1328
+ font-family: var(--sans);
1329
+ border: none;
1330
+ border-radius: var(--radius);
1331
+ cursor: pointer;
1332
+ transition: background var(--transition), opacity var(--transition);
1333
+ }
1334
+ .music-generate-btn {
1335
+ background: var(--accent);
1336
+ color: white;
1337
+ margin-bottom: 8px;
1338
+ }
1339
+ .music-generate-btn:hover:not(:disabled) { background: var(--accent-hover); }
1340
+ .music-generate-btn:disabled { opacity: 0.5; cursor: not-allowed; }
1341
+ .music-save-btn {
1342
+ background: var(--success);
1343
+ color: white;
1344
+ display: none;
1345
+ }
1346
+ .music-save-btn:hover:not(:disabled) { opacity: 0.85; }
1347
+ .music-save-btn:disabled { opacity: 0.5; cursor: not-allowed; }
1348
+ .music-progress {
1349
+ margin-bottom: 8px;
1350
+ display: none;
1351
+ }
1352
+ .music-progress-bar {
1353
+ width: 100%;
1354
+ height: 4px;
1355
+ background: var(--surface3);
1356
+ border-radius: 2px;
1357
+ overflow: hidden;
1358
+ margin-bottom: 4px;
1359
+ }
1360
+ .music-progress-fill {
1361
+ height: 100%;
1362
+ background: var(--accent);
1363
+ width: 0%;
1364
+ transition: width 0.3s ease;
1365
+ }
1366
+ .music-progress-text {
1367
+ font-size: 11px;
1368
+ font-family: var(--mono);
1369
+ color: var(--text-muted);
1370
+ }
1371
+ .music-audio-player {
1372
+ width: 100%;
1373
+ margin-bottom: 8px;
1374
+ display: none;
1375
+ height: 36px;
1376
+ }
1377
+ .music-status {
1378
+ font-size: 11px;
1379
+ font-family: var(--mono);
1380
+ color: var(--text-muted);
1381
+ margin-top: 4px;
1382
+ min-height: 16px;
1383
+ }
1384
+
989
1385
  .scene-card {
990
1386
  padding: 14px 16px;
991
1387
  border-bottom: 1px solid var(--border);
@@ -1342,6 +1738,45 @@ const PREVIEW_HTML = `<!DOCTYPE html>
1342
1738
  </div>
1343
1739
  <div class="sidebar-panel" id="panel-scenes">
1344
1740
  <div id="scene-list"></div>
1741
+ <div class="music-panel" id="music-panel">
1742
+ <div class="music-panel-header" id="music-panel-header">
1743
+ Background Music
1744
+ <span class="expand-icon">&#9654;</span>
1745
+ </div>
1746
+ <div class="music-panel-body">
1747
+ <input type="text" class="music-prompt-input" id="music-prompt" placeholder="Describe the music style..." value="lofi chill ambient">
1748
+ <div class="music-presets">
1749
+ <button class="music-preset-btn" data-preset="lofi chill">lofi chill</button>
1750
+ <button class="music-preset-btn" data-preset="corporate upbeat">corporate upbeat</button>
1751
+ <button class="music-preset-btn" data-preset="ambient minimal">ambient minimal</button>
1752
+ <button class="music-preset-btn" data-preset="cinematic epic">cinematic epic</button>
1753
+ <button class="music-preset-btn" data-preset="acoustic warm">acoustic warm</button>
1754
+ </div>
1755
+ <div class="music-duration-row">
1756
+ <span>Duration</span>
1757
+ <input type="range" id="music-duration" min="10" max="60" value="30" step="5">
1758
+ <span class="music-dur-label" id="music-dur-label">30s</span>
1759
+ </div>
1760
+ <button class="music-generate-btn" id="music-generate-btn">Generate Music</button>
1761
+ <div class="music-progress" id="music-progress">
1762
+ <div class="music-progress-bar"><div class="music-progress-fill" id="music-progress-fill"></div></div>
1763
+ <div class="music-progress-text" id="music-progress-text"></div>
1764
+ </div>
1765
+ <audio class="music-audio-player" id="music-audio" controls></audio>
1766
+ <div class="music-option-row">
1767
+ <label for="music-include">Include in export</label>
1768
+ <input type="checkbox" id="music-include">
1769
+ </div>
1770
+ <div class="music-volume-row">
1771
+ <label for="music-volume">Music volume</label>
1772
+ <input type="range" id="music-volume" min="0" max="0.30" value="0.15" step="0.01">
1773
+ <span class="music-volume-value" id="music-volume-label">0.15</span>
1774
+ </div>
1775
+ <div class="music-help" id="music-help">Preview export mixes background music at a fixed low level. No re-record needed.</div>
1776
+ <button class="music-save-btn" id="music-save-btn">Use as BGM</button>
1777
+ <div class="music-status" id="music-status"></div>
1778
+ </div>
1779
+ </div>
1345
1780
  </div>
1346
1781
  <div class="sidebar-panel" id="panel-metadata" style="display:none">
1347
1782
  <div id="metadata-content" style="padding:16px;font-family:var(--mono);font-size:12px;color:var(--text-muted);white-space:pre-wrap;word-break:break-word;"></div>
@@ -2542,7 +2977,16 @@ document.getElementById('btn-export').addEventListener('click', async () => {
2542
2977
  stopAudio();
2543
2978
  showPlayIcon();
2544
2979
  try {
2545
- const resp = await fetch('/api/export', { method: 'POST' });
2980
+ const musicInclude = document.getElementById('music-include');
2981
+ const musicVolume = document.getElementById('music-volume');
2982
+ const resp = await fetch('/api/export', {
2983
+ method: 'POST',
2984
+ headers: { 'Content-Type': 'application/json' },
2985
+ body: JSON.stringify({
2986
+ includeBgm: musicInclude ? musicInclude.checked : true,
2987
+ musicVolume: musicVolume ? Number(musicVolume.value) : undefined,
2988
+ }),
2989
+ });
2546
2990
  const result = await resp.json();
2547
2991
  if (!result.ok) throw new Error(result.error);
2548
2992
  overlay.classList.add('success');
@@ -2672,6 +3116,194 @@ if (DATA.pipelineMeta) {
2672
3116
  document.getElementById('metadata-content').textContent = 'No pipeline metadata found.\\n\\nRun argo pipeline to generate metadata.';
2673
3117
  }
2674
3118
 
3119
+ // ─── Background Music (MusicGen via Transformers.js) ───────────────────────
3120
+ (function initMusicPanel() {
3121
+ const musicPanel = document.getElementById('music-panel');
3122
+ const musicHeader = document.getElementById('music-panel-header');
3123
+ const musicPrompt = document.getElementById('music-prompt');
3124
+ const musicDuration = document.getElementById('music-duration');
3125
+ const musicDurLabel = document.getElementById('music-dur-label');
3126
+ const musicGenerateBtn = document.getElementById('music-generate-btn');
3127
+ const musicProgress = document.getElementById('music-progress');
3128
+ const musicProgressFill = document.getElementById('music-progress-fill');
3129
+ const musicProgressText = document.getElementById('music-progress-text');
3130
+ const musicAudio = document.getElementById('music-audio');
3131
+ const musicInclude = document.getElementById('music-include');
3132
+ const musicVolume = document.getElementById('music-volume');
3133
+ const musicVolumeLabel = document.getElementById('music-volume-label');
3134
+ const musicHelp = document.getElementById('music-help');
3135
+ const musicSaveBtn = document.getElementById('music-save-btn');
3136
+ const musicStatus = document.getElementById('music-status');
3137
+
3138
+ let generatedWavBlob = null;
3139
+ let hasGeneratedBgm = DATA.bgm?.hasGenerated ?? false;
3140
+ const hasConfigBgm = DATA.bgm?.hasConfig ?? false;
3141
+
3142
+ function updateMusicVolumeLabel() {
3143
+ musicVolumeLabel.textContent = Number(musicVolume.value).toFixed(2);
3144
+ }
3145
+
3146
+ function updateMusicHelp() {
3147
+ const source = hasGeneratedBgm ? 'generated BGM' : (hasConfigBgm ? 'config music' : 'no music source');
3148
+ musicHelp.textContent = musicInclude.checked
3149
+ ? 'Export will include ' + source + ' at a fixed mix level. No re-record needed.'
3150
+ : 'Export will skip background music. No re-record needed.';
3151
+ }
3152
+
3153
+ // Toggle panel
3154
+ musicHeader.addEventListener('click', () => {
3155
+ musicPanel.classList.toggle('expanded');
3156
+ });
3157
+
3158
+ // Preset buttons
3159
+ document.querySelectorAll('.music-preset-btn').forEach(btn => {
3160
+ btn.addEventListener('click', () => {
3161
+ musicPrompt.value = btn.dataset.preset;
3162
+ });
3163
+ });
3164
+
3165
+ // Duration slider
3166
+ musicDuration.addEventListener('input', () => {
3167
+ musicDurLabel.textContent = musicDuration.value + 's';
3168
+ });
3169
+ musicInclude.checked = DATA.bgm?.include ?? false;
3170
+ musicVolume.value = String(DATA.bgm?.volume ?? 0.15);
3171
+ updateMusicVolumeLabel();
3172
+ updateMusicHelp();
3173
+ musicInclude.addEventListener('change', updateMusicHelp);
3174
+ musicVolume.addEventListener('input', updateMusicVolumeLabel);
3175
+ musicVolume.addEventListener('change', updateMusicHelp);
3176
+
3177
+ // WAV encoder (Float32, mono)
3178
+ function encodeWavFloat32(samples, sampleRate) {
3179
+ const numSamples = samples.length;
3180
+ const byteRate = sampleRate * 4; // Float32 = 4 bytes
3181
+ const blockAlign = 4;
3182
+ const dataSize = numSamples * 4;
3183
+ const buffer = new ArrayBuffer(44 + dataSize);
3184
+ const view = new DataView(buffer);
3185
+ // RIFF header
3186
+ view.setUint32(0, 0x52494646, false); // "RIFF"
3187
+ view.setUint32(4, 36 + dataSize, true);
3188
+ view.setUint32(8, 0x57415645, false); // "WAVE"
3189
+ // fmt chunk
3190
+ view.setUint32(12, 0x666d7420, false); // "fmt "
3191
+ view.setUint32(16, 16, true); // chunk size
3192
+ view.setUint16(20, 3, true); // format = IEEE Float
3193
+ view.setUint16(22, 1, true); // channels = 1
3194
+ view.setUint32(24, sampleRate, true);
3195
+ view.setUint32(28, byteRate, true);
3196
+ view.setUint16(32, blockAlign, true);
3197
+ view.setUint16(34, 32, true); // bits per sample
3198
+ // data chunk
3199
+ view.setUint32(36, 0x64617461, false); // "data"
3200
+ view.setUint32(40, dataSize, true);
3201
+ const floatView = new Float32Array(buffer, 44);
3202
+ floatView.set(samples);
3203
+ return new Blob([buffer], { type: 'audio/wav' });
3204
+ }
3205
+
3206
+ // MusicGen runs in a Web Worker served from /musicgen-worker.js (same-origin).
3207
+ // This avoids blob URL cross-origin import restrictions and keeps the UI responsive.
3208
+ let musicWorker = null;
3209
+
3210
+ function createMusicWorker() {
3211
+ const w = new Worker('/musicgen-worker.js', { type: 'module' });
3212
+ return w;
3213
+ }
3214
+
3215
+ function showProgress(msg) {
3216
+ musicProgress.style.display = 'block';
3217
+ musicProgressText.textContent = msg;
3218
+ }
3219
+
3220
+ function setProgressBar(pct) {
3221
+ musicProgressFill.style.width = Math.min(100, Math.max(0, pct)) + '%';
3222
+ }
3223
+
3224
+ // Generate button
3225
+ musicGenerateBtn.addEventListener('click', () => {
3226
+ const prompt = musicPrompt.value.trim();
3227
+ if (!prompt) {
3228
+ musicStatus.textContent = 'Please enter a music prompt.';
3229
+ return;
3230
+ }
3231
+ const durationSec = parseInt(musicDuration.value, 10);
3232
+
3233
+ musicGenerateBtn.disabled = true;
3234
+ musicSaveBtn.style.display = 'none';
3235
+ musicAudio.style.display = 'none';
3236
+ generatedWavBlob = null;
3237
+ showProgress('Initializing...');
3238
+ setProgressBar(10);
3239
+ musicStatus.textContent = '';
3240
+
3241
+ if (!musicWorker) {
3242
+ musicWorker = createMusicWorker();
3243
+ musicWorker.onmessage = (e) => {
3244
+ const msg = e.data;
3245
+ if (msg.type === 'progress') {
3246
+ showProgress(msg.message);
3247
+ if (msg.message.includes('tokenizer')) setProgressBar(20);
3248
+ else if (msg.message.includes('Model loaded')) setProgressBar(50);
3249
+ else if (msg.message.includes('Generating')) setProgressBar(60);
3250
+ } else if (msg.type === 'complete') {
3251
+ setProgressBar(100);
3252
+ showProgress('Done!');
3253
+ generatedWavBlob = encodeWavFloat32(msg.audioData, msg.sampleRate);
3254
+ const url = URL.createObjectURL(generatedWavBlob);
3255
+ musicAudio.src = url;
3256
+ musicAudio.style.display = 'block';
3257
+ musicSaveBtn.style.display = 'inline-block';
3258
+ musicGenerateBtn.disabled = false;
3259
+ setTimeout(() => { musicProgress.style.display = 'none'; }, 1500);
3260
+ } else if (msg.type === 'error') {
3261
+ musicGenerateBtn.disabled = false;
3262
+ musicProgress.style.display = 'none';
3263
+ musicStatus.textContent = 'Error: ' + msg.message;
3264
+ }
3265
+ };
3266
+ musicWorker.onerror = (err) => {
3267
+ musicGenerateBtn.disabled = false;
3268
+ musicProgress.style.display = 'none';
3269
+ musicStatus.textContent = 'Worker error: ' + (err.message || 'Unknown error');
3270
+ };
3271
+ }
3272
+ musicWorker.postMessage({
3273
+ type: 'generate',
3274
+ prompt: prompt,
3275
+ durationSec: durationSec,
3276
+ });
3277
+ });
3278
+
3279
+ // Save button
3280
+ musicSaveBtn.addEventListener('click', async () => {
3281
+ if (!generatedWavBlob) return;
3282
+ musicSaveBtn.disabled = true;
3283
+ musicStatus.textContent = 'Saving...';
3284
+ try {
3285
+ const resp = await fetch('/api/save-music', {
3286
+ method: 'POST',
3287
+ headers: { 'Content-Type': 'audio/wav' },
3288
+ body: generatedWavBlob,
3289
+ });
3290
+ const result = await resp.json();
3291
+ if (result.ok) {
3292
+ hasGeneratedBgm = true;
3293
+ musicInclude.checked = true;
3294
+ updateMusicHelp();
3295
+ musicStatus.textContent = 'Saved to ' + result.path;
3296
+ } else {
3297
+ musicStatus.textContent = 'Save failed.';
3298
+ }
3299
+ } catch (err) {
3300
+ musicStatus.textContent = 'Save error: ' + err.message;
3301
+ } finally {
3302
+ musicSaveBtn.disabled = false;
3303
+ }
3304
+ });
3305
+ })();
3306
+
2675
3307
  // ─── Init ──────────────────────────────────────────────────────────────────
2676
3308
  renderSceneList();
2677
3309
  snapshotAllScenes();