comfy-qa 2.4.0 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/recorder/post-mix.ts +51 -53
package/package.json
CHANGED
package/src/recorder/post-mix.ts
CHANGED
|
@@ -2,9 +2,11 @@
|
|
|
2
2
|
* Post-mix narration audio onto the recorded video.
|
|
3
3
|
*
|
|
4
4
|
* Two modes:
|
|
5
|
-
* - Timed: each narration segment
|
|
6
|
-
*
|
|
7
|
-
*
|
|
5
|
+
* - Timed: each narration segment placed at its measured video timestamp (startMs).
|
|
6
|
+
* - Sequential (fallback): concatenated track delayed by offsetMs.
|
|
7
|
+
*
|
|
8
|
+
* Subtitles are embedded as mov_text soft stream (visible in VLC, browsers, Gemini)
|
|
9
|
+
* AND written as a VTT sidecar for the web player <track> element.
|
|
8
10
|
*/
|
|
9
11
|
import { $ } from "bun";
|
|
10
12
|
import * as fs from "fs";
|
|
@@ -22,22 +24,20 @@ interface Meta {
|
|
|
22
24
|
totalDurationMs: number;
|
|
23
25
|
}
|
|
24
26
|
|
|
25
|
-
/** Format ms as SRT timestamp HH:MM:SS,mmm */
|
|
26
27
|
function srtTime(ms: number): string {
|
|
27
28
|
const h = Math.floor(ms / 3600000);
|
|
28
29
|
const m = Math.floor((ms % 3600000) / 60000);
|
|
29
30
|
const s = Math.floor((ms % 60000) / 1000);
|
|
30
|
-
const
|
|
31
|
-
return `${String(h).padStart(2,
|
|
31
|
+
const r = ms % 1000;
|
|
32
|
+
return `${String(h).padStart(2,"0")}:${String(m).padStart(2,"0")}:${String(s).padStart(2,"0")},${String(r).padStart(3,"0")}`;
|
|
32
33
|
}
|
|
33
34
|
|
|
34
|
-
/** Format ms as WebVTT timestamp HH:MM:SS.mmm */
|
|
35
35
|
function vttTime(ms: number): string {
|
|
36
36
|
return srtTime(ms).replace(",", ".");
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
-
/**
|
|
40
|
-
function
|
|
39
|
+
/** Write narration.srt + narration.vtt to outDir. Returns srtPath. */
|
|
40
|
+
function writeSubtitleFiles(meta: Meta, fallbackOffsetMs: number, outDir: string): string {
|
|
41
41
|
const srtLines: string[] = [];
|
|
42
42
|
const vttLines: string[] = ["WEBVTT", ""];
|
|
43
43
|
let cursor = fallbackOffsetMs;
|
|
@@ -45,29 +45,17 @@ function generateSubtitles(meta: Meta, fallbackOffsetMs: number, outDir: string)
|
|
|
45
45
|
meta.segments.forEach((seg, i) => {
|
|
46
46
|
const start = seg.startMs ?? cursor;
|
|
47
47
|
const end = start + seg.durationMs;
|
|
48
|
-
cursor =
|
|
49
|
-
|
|
48
|
+
cursor = end;
|
|
50
49
|
srtLines.push(String(i + 1), `${srtTime(start)} --> ${srtTime(end)}`, seg.text, "");
|
|
51
50
|
vttLines.push(String(i + 1), `${vttTime(start)} --> ${vttTime(end)}`, seg.text, "");
|
|
52
51
|
});
|
|
53
52
|
|
|
54
|
-
|
|
53
|
+
const srtPath = path.join(outDir, "narration.srt");
|
|
54
|
+
fs.writeFileSync(srtPath, srtLines.join("\n"));
|
|
55
55
|
fs.writeFileSync(path.join(outDir, "narration.vtt"), vttLines.join("\n"));
|
|
56
|
+
return srtPath;
|
|
56
57
|
}
|
|
57
58
|
|
|
58
|
-
/**
|
|
59
|
-
* Mix narration segments onto video.
|
|
60
|
-
*
|
|
61
|
-
* If meta.json segments have `startMs`, each segment is placed at that exact
|
|
62
|
-
* video timestamp (timed mode — perfectly synced with pre-planned recording).
|
|
63
|
-
* Otherwise falls back to a single adelay of `offsetMs` on the concatenated track.
|
|
64
|
-
*
|
|
65
|
-
* @param videoPath recorded video (webm or mp4)
|
|
66
|
-
* @param trackPath concatenated narration_track.wav
|
|
67
|
-
* @param metaPath meta.json with segments array
|
|
68
|
-
* @param offsetMs fallback global adelay (0 in timed mode)
|
|
69
|
-
* @param outPath output mp4
|
|
70
|
-
*/
|
|
71
59
|
export async function postMix(
|
|
72
60
|
videoPath: string,
|
|
73
61
|
trackPath: string,
|
|
@@ -78,43 +66,53 @@ export async function postMix(
|
|
|
78
66
|
const meta: Meta = JSON.parse(fs.readFileSync(metaPath, "utf-8"));
|
|
79
67
|
const outDir = path.dirname(outPath);
|
|
80
68
|
|
|
81
|
-
const timedMode = meta.segments.every((s) => s.startMs != null);
|
|
82
|
-
console.log(` [post-mix] Mode: ${timedMode ? "timed
|
|
69
|
+
const timedMode = meta.segments.length > 0 && meta.segments.every((s) => s.startMs != null);
|
|
70
|
+
console.log(` [post-mix] Mode: ${timedMode ? "timed" : "sequential"} | ${meta.segments.length} segments`);
|
|
83
71
|
|
|
84
|
-
//
|
|
85
|
-
|
|
86
|
-
let inputFlag: string[];
|
|
72
|
+
// Write subtitle files first (SRT embedded into mp4, VTT served as sidecar)
|
|
73
|
+
const srtPath = writeSubtitleFiles(meta, offsetMs, outDir);
|
|
87
74
|
|
|
88
75
|
if (timedMode) {
|
|
89
|
-
// Each segment WAV file is delayed to its measured video timestamp.
|
|
90
|
-
// We need individual WAV paths — stored alongside narration_track in narration/ dir.
|
|
91
76
|
const narrationDir = path.dirname(trackPath);
|
|
92
77
|
const segWavs = meta.segments.map((s) => path.join(narrationDir, `${s.id}.wav`));
|
|
93
78
|
const missing = segWavs.filter((p) => !fs.existsSync(p));
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
const
|
|
103
|
-
audioFilter = `${delays};${
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
79
|
+
|
|
80
|
+
if (missing.length === 0) {
|
|
81
|
+
// Build per-segment adelay filter
|
|
82
|
+
const audioInputs = segWavs.flatMap((p) => ["-i", p]);
|
|
83
|
+
const n = meta.segments.length;
|
|
84
|
+
const delays = meta.segments.map((s, i) =>
|
|
85
|
+
`[${i + 1}:a]adelay=${s.startMs}|${s.startMs}[a${i}]`
|
|
86
|
+
).join(";");
|
|
87
|
+
const mixIn = meta.segments.map((_, i) => `[a${i}]`).join("");
|
|
88
|
+
const audioFilter = `${delays};${mixIn}amix=inputs=${n}:normalize=0[aout]`;
|
|
89
|
+
const srtInputIdx = n + 1;
|
|
90
|
+
|
|
91
|
+
console.log(` [post-mix] Timed mix + subtitle embed…`);
|
|
92
|
+
await $`ffmpeg -y -i ${videoPath} ${audioInputs} -i ${srtPath} \
|
|
93
|
+
-filter_complex ${audioFilter} \
|
|
94
|
+
-map 0:v -map [aout] -map ${String(srtInputIdx)}:s \
|
|
95
|
+
-c:v libx264 -preset fast -pix_fmt yuv420p \
|
|
96
|
+
-c:a aac -b:a 128k \
|
|
97
|
+
-c:s mov_text -metadata:s:s:0 language=eng \
|
|
98
|
+
-shortest ${outPath}`.quiet();
|
|
99
|
+
|
|
100
|
+
console.log(` [post-mix] ✓ ${outPath}`);
|
|
109
101
|
return;
|
|
110
102
|
}
|
|
103
|
+
console.log(` [post-mix] Missing ${missing.length} WAVs — falling back to sequential`);
|
|
111
104
|
}
|
|
112
105
|
|
|
113
|
-
// Sequential mode
|
|
114
|
-
function timedFallback() {}
|
|
106
|
+
// Sequential mode
|
|
115
107
|
const adelay = `${offsetMs}|${offsetMs}`;
|
|
116
|
-
console.log(` [post-mix]
|
|
117
|
-
await $`ffmpeg -y -i ${videoPath} -i ${trackPath} -
|
|
118
|
-
|
|
119
|
-
|
|
108
|
+
console.log(` [post-mix] Sequential mix + subtitle embed…`);
|
|
109
|
+
await $`ffmpeg -y -i ${videoPath} -i ${trackPath} -i ${srtPath} \
|
|
110
|
+
-filter_complex ${`[1:a]adelay=${adelay}[aout]`} \
|
|
111
|
+
-map 0:v -map [aout] -map 2:s \
|
|
112
|
+
-c:v libx264 -preset fast -pix_fmt yuv420p \
|
|
113
|
+
-c:a aac -b:a 128k \
|
|
114
|
+
-c:s mov_text -metadata:s:s:0 language=eng \
|
|
115
|
+
-shortest ${outPath}`.quiet();
|
|
116
|
+
|
|
117
|
+
console.log(` [post-mix] ✓ ${outPath}`);
|
|
120
118
|
}
|