comfy-qa 2.4.0 → 2.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "comfy-qa",
3
- "version": "2.4.0",
3
+ "version": "2.4.1",
4
4
  "description": "ComfyUI QA automation CLI",
5
5
  "repository": {
6
6
  "type": "git",
@@ -2,9 +2,11 @@
2
2
  * Post-mix narration audio onto the recorded video.
3
3
  *
4
4
  * Two modes:
5
- * - Timed: each narration segment is placed at its measured video timestamp
6
- * (startMs from meta.json). Achieves perfect sync with pre-planned recording.
7
- * - Sequential (fallback): segments are concatenated and delayed by offsetMs.
5
+ * - Timed: each narration segment placed at its measured video timestamp (startMs).
6
+ * - Sequential (fallback): concatenated track delayed by offsetMs.
7
+ *
8
+ * Subtitles are embedded as mov_text soft stream (visible in VLC, browsers, Gemini)
9
+ * AND written as a VTT sidecar for the web player <track> element.
8
10
  */
9
11
  import { $ } from "bun";
10
12
  import * as fs from "fs";
@@ -22,22 +24,20 @@ interface Meta {
22
24
  totalDurationMs: number;
23
25
  }
24
26
 
25
- /** Format ms as SRT timestamp HH:MM:SS,mmm */
26
27
  function srtTime(ms: number): string {
27
28
  const h = Math.floor(ms / 3600000);
28
29
  const m = Math.floor((ms % 3600000) / 60000);
29
30
  const s = Math.floor((ms % 60000) / 1000);
30
- const msr = ms % 1000;
31
- return `${String(h).padStart(2, "0")}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")},${String(msr).padStart(3, "0")}`;
31
+ const r = ms % 1000;
32
+ return `${String(h).padStart(2,"0")}:${String(m).padStart(2,"0")}:${String(s).padStart(2,"0")},${String(r).padStart(3,"0")}`;
32
33
  }
33
34
 
34
- /** Format ms as WebVTT timestamp HH:MM:SS.mmm */
35
35
  function vttTime(ms: number): string {
36
36
  return srtTime(ms).replace(",", ".");
37
37
  }
38
38
 
39
- /** Generate SRT + WebVTT subtitle files. Uses startMs if available, else sequential. */
40
- function generateSubtitles(meta: Meta, fallbackOffsetMs: number, outDir: string): void {
39
+ /** Write narration.srt + narration.vtt to outDir. Returns srtPath. */
40
+ function writeSubtitleFiles(meta: Meta, fallbackOffsetMs: number, outDir: string): string {
41
41
  const srtLines: string[] = [];
42
42
  const vttLines: string[] = ["WEBVTT", ""];
43
43
  let cursor = fallbackOffsetMs;
@@ -45,29 +45,17 @@ function generateSubtitles(meta: Meta, fallbackOffsetMs: number, outDir: string)
45
45
  meta.segments.forEach((seg, i) => {
46
46
  const start = seg.startMs ?? cursor;
47
47
  const end = start + seg.durationMs;
48
- cursor = seg.startMs != null ? end : end; // advance cursor either way
49
-
48
+ cursor = end;
50
49
  srtLines.push(String(i + 1), `${srtTime(start)} --> ${srtTime(end)}`, seg.text, "");
51
50
  vttLines.push(String(i + 1), `${vttTime(start)} --> ${vttTime(end)}`, seg.text, "");
52
51
  });
53
52
 
54
- fs.writeFileSync(path.join(outDir, "narration.srt"), srtLines.join("\n"));
53
+ const srtPath = path.join(outDir, "narration.srt");
54
+ fs.writeFileSync(srtPath, srtLines.join("\n"));
55
55
  fs.writeFileSync(path.join(outDir, "narration.vtt"), vttLines.join("\n"));
56
+ return srtPath;
56
57
  }
57
58
 
58
- /**
59
- * Mix narration segments onto video.
60
- *
61
- * If meta.json segments have `startMs`, each segment is placed at that exact
62
- * video timestamp (timed mode — perfectly synced with pre-planned recording).
63
- * Otherwise falls back to a single adelay of `offsetMs` on the concatenated track.
64
- *
65
- * @param videoPath recorded video (webm or mp4)
66
- * @param trackPath concatenated narration_track.wav
67
- * @param metaPath meta.json with segments array
68
- * @param offsetMs fallback global adelay (0 in timed mode)
69
- * @param outPath output mp4
70
- */
71
59
  export async function postMix(
72
60
  videoPath: string,
73
61
  trackPath: string,
@@ -78,43 +66,53 @@ export async function postMix(
78
66
  const meta: Meta = JSON.parse(fs.readFileSync(metaPath, "utf-8"));
79
67
  const outDir = path.dirname(outPath);
80
68
 
81
- const timedMode = meta.segments.every((s) => s.startMs != null);
82
- console.log(` [post-mix] Mode: ${timedMode ? "timed (real timestamps)" : "sequential (adelay)"}`);
69
+ const timedMode = meta.segments.length > 0 && meta.segments.every((s) => s.startMs != null);
70
+ console.log(` [post-mix] Mode: ${timedMode ? "timed" : "sequential"} | ${meta.segments.length} segments`);
83
71
 
84
- // Build ffmpeg audio filter
85
- let audioFilter: string;
86
- let inputFlag: string[];
72
+ // Write subtitle files first (SRT embedded into mp4, VTT served as sidecar)
73
+ const srtPath = writeSubtitleFiles(meta, offsetMs, outDir);
87
74
 
88
75
  if (timedMode) {
89
- // Each segment WAV file is delayed to its measured video timestamp.
90
- // We need individual WAV paths — stored alongside narration_track in narration/ dir.
91
76
  const narrationDir = path.dirname(trackPath);
92
77
  const segWavs = meta.segments.map((s) => path.join(narrationDir, `${s.id}.wav`));
93
78
  const missing = segWavs.filter((p) => !fs.existsSync(p));
94
- if (missing.length > 0) {
95
- // Fall back to sequential if individual WAVs are missing
96
- console.log(` [post-mix] Missing ${missing.length} segment WAVs — falling back to sequential`);
97
- timedFallback();
98
- } else {
99
- // -i video -i seg0.wav -i seg1.wav ...
100
- inputFlag = segWavs.flatMap((p) => ["-i", p]);
101
- const delays = meta.segments.map((s, i) => `[${i + 1}:a]adelay=${s.startMs}|${s.startMs}[a${i}]`).join(";");
102
- const mix = meta.segments.map((_, i) => `[a${i}]`).join("");
103
- audioFilter = `${delays};${mix}amix=inputs=${meta.segments.length}:normalize=0[aout]`;
104
-
105
- console.log(` [post-mix] Mixing ${meta.segments.length} timed segments…`);
106
- await $`ffmpeg -y -i ${videoPath} ${inputFlag} -filter_complex ${audioFilter} -map 0:v -map [aout] -c:v libx264 -preset fast -pix_fmt yuv420p -c:a aac -b:a 128k -shortest ${outPath}`.quiet();
107
- generateSubtitles(meta, 0, outDir);
108
- console.log(` [post-mix] Final video → ${outPath}`);
79
+
80
+ if (missing.length === 0) {
81
+ // Build per-segment adelay filter
82
+ const audioInputs = segWavs.flatMap((p) => ["-i", p]);
83
+ const n = meta.segments.length;
84
+ const delays = meta.segments.map((s, i) =>
85
+ `[${i + 1}:a]adelay=${s.startMs}|${s.startMs}[a${i}]`
86
+ ).join(";");
87
+ const mixIn = meta.segments.map((_, i) => `[a${i}]`).join("");
88
+ const audioFilter = `${delays};${mixIn}amix=inputs=${n}:normalize=0[aout]`;
89
+ const srtInputIdx = n + 1;
90
+
91
+ console.log(` [post-mix] Timed mix + subtitle embed…`);
92
+ await $`ffmpeg -y -i ${videoPath} ${audioInputs} -i ${srtPath} \
93
+ -filter_complex ${audioFilter} \
94
+ -map 0:v -map [aout] -map ${String(srtInputIdx)}:s \
95
+ -c:v libx264 -preset fast -pix_fmt yuv420p \
96
+ -c:a aac -b:a 128k \
97
+ -c:s mov_text -metadata:s:s:0 language=eng \
98
+ -shortest ${outPath}`.quiet();
99
+
100
+ console.log(` [post-mix] ✓ ${outPath}`);
109
101
  return;
110
102
  }
103
+ console.log(` [post-mix] Missing ${missing.length} WAVs — falling back to sequential`);
111
104
  }
112
105
 
113
- // Sequential mode (or timed fallback)
114
- function timedFallback() {}
106
+ // Sequential mode
115
107
  const adelay = `${offsetMs}|${offsetMs}`;
116
- console.log(` [post-mix] Overlaying track (adelay=${offsetMs}ms)…`);
117
- await $`ffmpeg -y -i ${videoPath} -i ${trackPath} -filter_complex ${`[1:a]adelay=${adelay}[aout]`} -map 0:v -map [aout] -c:v libx264 -preset fast -pix_fmt yuv420p -c:a aac -b:a 128k -shortest ${outPath}`.quiet();
118
- generateSubtitles(meta, offsetMs, outDir);
119
- console.log(` [post-mix] Final video → ${outPath}`);
108
+ console.log(` [post-mix] Sequential mix + subtitle embed…`);
109
+ await $`ffmpeg -y -i ${videoPath} -i ${trackPath} -i ${srtPath} \
110
+ -filter_complex ${`[1:a]adelay=${adelay}[aout]`} \
111
+ -map 0:v -map [aout] -map 2:s \
112
+ -c:v libx264 -preset fast -pix_fmt yuv420p \
113
+ -c:a aac -b:a 128k \
114
+ -c:s mov_text -metadata:s:s:0 language=eng \
115
+ -shortest ${outPath}`.quiet();
116
+
117
+ console.log(` [post-mix] ✓ ${outPath}`);
120
118
  }