polysync 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
polysync/__init__.py ADDED
@@ -0,0 +1,19 @@
1
+ """polysync — multicam audio sync + director-style auto-edit.
2
+
3
+ Align N recordings of one event by audio cross-correlation (envelope-based,
4
+ robust at low SNR), emit reversible `.sync.json` sidecars (originals are never
5
+ re-encoded), then auto-cut / picture-in-picture them into a single MP4.
6
+
7
+ Public API:
8
+ from polysync import compute_sync, SyncResult, SyncError
9
+ from polysync.sidecar import read_sidecar, write_sidecar
10
+ """
11
+ from .sync import compute_sync, SyncResult, SyncError
12
+ from .sidecar import read_sidecar, write_sidecar, sidecar_path, SCHEMA_VERSION
13
+
14
+ __version__ = "0.1.0"
15
+ __all__ = [
16
+ "compute_sync", "SyncResult", "SyncError",
17
+ "read_sidecar", "write_sidecar", "sidecar_path", "SCHEMA_VERSION",
18
+ "__version__",
19
+ ]
polysync/audio.py ADDED
@@ -0,0 +1,130 @@
1
+ """Shared audio primitives — the pieces sync, verify, and edit all need.
2
+
3
+ Everything here is either pure numpy/scipy (unit-testable without media) or a
4
+ thin ffmpeg/ffprobe wrapper. Keeping these in one place is the whole reason
5
+ polysync is a package and not three copy-pasted scripts.
6
+ """
7
+ import subprocess
8
+ from pathlib import Path
9
+
10
+ import numpy as np
11
+ from scipy import signal
12
+
13
+
14
+ def loudest_audio_stream(video_path):
15
+ """Return the index N of the audio stream (`0:a:N`) with the highest mean
16
+ volume, probed over a 60 s window mid-file.
17
+
18
+ Why this matters: pro cameras often record multiple audio tracks where the
19
+ first one is dead. Sony FX6 MXF clips carry 4 mono PCM tracks and commonly
20
+ leave a:0 / a:1 silent (~-90 dB) with the real room mic on a:2 / a:3.
21
+ Hard-coding `0:a:0` would cross-correlate silence and fail to sync, so pick
22
+ the loudest track instead. Single-stream files (most MP4 cams) short-circuit
23
+ to a:0.
24
+ """
25
+ video_path = Path(video_path)
26
+ streams = subprocess.run(
27
+ ["ffprobe", "-v", "error", "-select_streams", "a",
28
+ "-show_entries", "stream=index", "-of", "csv=p=0", str(video_path)],
29
+ check=True, capture_output=True, text=True,
30
+ ).stdout.strip().splitlines()
31
+ if len(streams) <= 1:
32
+ return 0
33
+ best_idx, best_db = 0, -1e9
34
+ for ch in range(len(streams)):
35
+ err = subprocess.run(
36
+ ["ffmpeg", "-nostdin", "-hide_banner", "-ss", "300", "-t", "60",
37
+ "-i", str(video_path), "-map", "0:a:%d" % ch,
38
+ "-af", "volumedetect", "-f", "null", "-"],
39
+ capture_output=True, text=True,
40
+ ).stderr
41
+ for line in err.splitlines():
42
+ if "mean_volume" in line:
43
+ try:
44
+ db = float(line.split("mean_volume:")[1].strip().split()[0])
45
+ except (IndexError, ValueError):
46
+ db = -1e9
47
+ if db > best_db:
48
+ best_db, best_idx = db, ch
49
+ break
50
+ print(" [%s] loudest audio stream: a:%d (%.1f dB)"
51
+ % (video_path.name, best_idx, best_db))
52
+ return best_idx
53
+
54
+
55
+ def extract_pcm(video_path, dst, sr, stream=None):
56
+ """Extract one audio track as mono signed-16 PCM at `sr` Hz.
57
+
58
+ `stream` is the `0:a:N` index; if None, auto-select the loudest track.
59
+ No `-itsoffset` is ever applied here — offsets are pure metadata and are
60
+ handled by index arithmetic / `-itsoffset` at consume time downstream.
61
+ """
62
+ video_path = Path(video_path)
63
+ ch = loudest_audio_stream(video_path) if stream is None else stream
64
+ subprocess.run(
65
+ ["ffmpeg", "-nostdin", "-y", "-i", str(video_path),
66
+ "-map", "0:a:%d" % ch, "-ac", "1", "-ar", str(sr),
67
+ "-f", "s16le", str(dst)],
68
+ check=True, stderr=subprocess.DEVNULL,
69
+ )
70
+
71
+
72
+ def read_pcm(path):
73
+ """Read a raw s16le file into a float32 array."""
74
+ return np.fromfile(str(path), dtype=np.int16).astype(np.float32)
75
+
76
+
77
+ def media_duration(path):
78
+ """Container duration in seconds, via ffprobe."""
79
+ out = subprocess.run(
80
+ ["ffprobe", "-v", "error", "-show_entries", "format=duration",
81
+ "-of", "default=nw=1:nk=1", str(path)],
82
+ check=True, capture_output=True, text=True,
83
+ )
84
+ return float(out.stdout.strip())
85
+
86
+
87
+ def frame_rms(x, sr, hop_ms=10, win_ms=50):
88
+ """Sliding-window RMS of `x`. Returns (rms_per_frame, frame_sr_hz).
89
+
90
+ Uses a cumulative-sum trick so it's O(n) regardless of window size. This is
91
+ the shared primitive behind both the sync envelope (log of this, high-passed)
92
+ and the edit per-second loudness.
93
+ """
94
+ hop = int(sr * hop_ms / 1000)
95
+ win = int(sr * win_ms / 1000)
96
+ n = (len(x) - win) // hop + 1
97
+ if n <= 0:
98
+ return np.zeros(0, dtype=np.float32), sr / hop
99
+ sq = x.astype(np.float64) ** 2
100
+ csq = np.concatenate([[0.0], np.cumsum(sq)])
101
+ out = np.empty(n, dtype=np.float32)
102
+ for i in range(n):
103
+ s = i * hop
104
+ out[i] = np.sqrt(max(1e-9, (csq[s + win] - csq[s]) / win))
105
+ return out, sr / hop
106
+
107
+
108
+ def log_envelope(x, sr, hop_ms=10, win_ms=50, highpass_hz=0.05):
109
+ """Log-energy envelope, high-passed to strip slow gain/drift offsets.
110
+
111
+ This is what sync cross-correlates: it captures dialogue/music dynamics
112
+ that BOTH mics hear regardless of their frequency response — the reason
113
+ the matcher is robust even when the two cameras have very different mics.
114
+ """
115
+ rms, fsr = frame_rms(x, sr, hop_ms, win_ms)
116
+ env = np.log(rms + 1e-3)
117
+ if highpass_hz:
118
+ env = highpass(env, fsr, highpass_hz)
119
+ return env, fsr
120
+
121
+
122
+ def highpass(x, fs, cut_hz=0.05):
123
+ sos = signal.butter(2, cut_hz, btype="high", fs=fs, output="sos")
124
+ return signal.sosfiltfilt(sos, x).astype(np.float32)
125
+
126
+
127
+ def normalize(x):
128
+ x = x - x.mean()
129
+ s = x.std()
130
+ return x / s if s > 0 else x
polysync/cli.py ADDED
@@ -0,0 +1,79 @@
1
+ """`polysync` command-line entry point.
2
+
3
+ polysync sync REFERENCE SOURCE [--partial]
4
+ polysync verify REFERENCE SOURCE SIDECAR [--apply-drift]
5
+ polysync edit IN1 IN2 ... --out edl.json [--mode rotation|greedy]
6
+ polysync render-cuts EDL --out out.mp4
7
+ polysync render-pip EDL --out out.mp4 [--pip bottom-right]
8
+ """
9
+ import argparse
10
+ import sys
11
+
12
+ from . import __version__
13
+ from .sync import sync_files, SyncError
14
+ from .verify import verify_files
15
+ from .edit import autoedit, render_cuts, render_pip
16
+
17
+ USAGE = __doc__
18
+
19
+
20
+ def _cmd_sync(argv):
21
+ ap = argparse.ArgumentParser(prog="polysync sync")
22
+ ap.add_argument("reference", help="Reference recording (defines the timeline)")
23
+ ap.add_argument("source", help="Source to align to the reference")
24
+ ap.add_argument("--partial", action="store_true",
25
+ help="Lenient mode for a source covering only part of the "
26
+ "reference's span; degrades gracefully, writes only the "
27
+ "source sidecar.")
28
+ args = ap.parse_args(argv)
29
+ try:
30
+ sync_files(args.reference, args.source, partial=args.partial)
31
+ except SyncError as e:
32
+ print("ERROR: %s" % e, file=sys.stderr)
33
+ return 1
34
+ return 0
35
+
36
+
37
+ def _cmd_verify(argv):
38
+ ap = argparse.ArgumentParser(prog="polysync verify")
39
+ ap.add_argument("reference")
40
+ ap.add_argument("source")
41
+ ap.add_argument("sidecar", help="The source's <source>.sync.json")
42
+ ap.add_argument("--apply-drift", action="store_true")
43
+ ap.add_argument("--step", type=float, default=600.0,
44
+ help="Probe spacing in seconds (default 10 min)")
45
+ args = ap.parse_args(argv)
46
+ try:
47
+ passed, _ = verify_files(args.reference, args.source, args.sidecar,
48
+ step=args.step, apply_drift=args.apply_drift)
49
+ except ValueError as e:
50
+ print("ERROR: %s" % e, file=sys.stderr)
51
+ return 2
52
+ return 0 if passed else 1
53
+
54
+
55
+ def main(argv=None):
56
+ argv = list(sys.argv[1:] if argv is None else argv)
57
+ if not argv or argv[0] in ("-h", "--help", "help"):
58
+ print(USAGE)
59
+ return 0
60
+ if argv[0] in ("-V", "--version"):
61
+ print("polysync %s" % __version__)
62
+ return 0
63
+
64
+ cmd, rest = argv[0], argv[1:]
65
+ dispatch = {
66
+ "sync": _cmd_sync,
67
+ "verify": _cmd_verify,
68
+ "edit": lambda a: autoedit.main(a) or 0,
69
+ "render-cuts": lambda a: render_cuts.main(a) or 0,
70
+ "render-pip": lambda a: render_pip.main(a) or 0,
71
+ }
72
+ if cmd not in dispatch:
73
+ print("Unknown command %r.\n%s" % (cmd, USAGE), file=sys.stderr)
74
+ return 2
75
+ return dispatch[cmd](rest)
76
+
77
+
78
+ if __name__ == "__main__":
79
+ sys.exit(main())
@@ -0,0 +1,9 @@
1
+ """Director-style multicam auto-edit on top of polysync sidecars.
2
+
3
+ autoedit — build an EDL (which cam is on screen each second) from synced inputs
4
+ render_cuts — render the EDL to one MP4 (hard cuts)
5
+ render_pip — render the EDL with a picture-in-picture inset
6
+ """
7
+ from .autoedit import build_edl
8
+
9
+ __all__ = ["build_edl"]
@@ -0,0 +1,321 @@
1
+ """Build a director-style EDL from N synced camera angles.
2
+
3
+ Inputs are ORIGINAL untouched media; each should have a `<input>.sync.json`
4
+ sidecar (from `polysync sync`). Sidecars give per-cam `delta_seconds` and
5
+ `overlap_in_reference`. Missing sidecar => cam assumed at delta=0, full coverage.
6
+
7
+ Decisions are audio-energy-driven only: per second, the cam whose mic is
8
+ loudest relative to the others (active-speaker proxy) wins, subject to dwell
9
+ hysteresis and coverage. No face/framing detection.
10
+ """
11
+ import argparse
12
+ import json
13
+ import tempfile
14
+ import warnings
15
+ from pathlib import Path
16
+
17
+ import numpy as np
18
+
19
+ from .. import audio
20
+ from ..sidecar import read_sidecar, SCHEMA_VERSION
21
+
22
+ SR = 16000
23
+ FRAME_HZ = 1.0
24
+ ENV_HOP_MS = 100
25
+ ENV_WIN_MS = 200
26
+
27
+
28
+ def _per_sec_envelope(x):
29
+ """Log-RMS envelope of `x` collapsed to one value per reference second."""
30
+ rms, fsr = audio.frame_rms(x, SR, hop_ms=ENV_HOP_MS, win_ms=ENV_WIN_MS)
31
+ env = np.log(rms + 1e-3)
32
+ return env, fsr
33
+
34
+
35
+ def _lift_to_reference(env, env_sr, delta_sec, total_ref_sec):
36
+ """Lift a cam-local per-frame envelope into the reference timeline at 1 Hz.
37
+
38
+ Reference second t reads the cam's local second (t - delta_sec). Seconds
39
+ outside the cam's recorded range become -inf so the editor never picks them.
40
+ """
41
+ n_per = int(env_sr / FRAME_HZ)
42
+ take = (len(env) // n_per) * n_per
43
+ local = env[:take].reshape(-1, n_per).mean(axis=1) if take else np.zeros(0)
44
+ out = np.full(total_ref_sec, -np.inf, dtype=np.float32)
45
+ for t in range(total_ref_sec):
46
+ tl = int(t - delta_sec)
47
+ if 0 <= tl < len(local):
48
+ out[t] = local[tl]
49
+ return out
50
+
51
+
52
+ def _coverage_from_sidecar(input_path, total):
53
+ _, ovl, _ = read_sidecar(input_path)
54
+ if ovl is None:
55
+ return (0.0, float(total))
56
+ return (max(0.0, ovl[0]), min(float(total), ovl[1]))
57
+
58
+
59
+ def _parse_coverage_flag(values, k_total, total):
60
+ cov = [(0.0, float(total))] * k_total
61
+ for v in (values or []):
62
+ parts = v.split(":")
63
+ if len(parts) != 3:
64
+ raise SystemExit("--coverage expects CAM:START:END, got %r" % v)
65
+ k = int(parts[0])
66
+ if not (0 <= k < k_total):
67
+ raise SystemExit("--coverage cam %d out of range" % k)
68
+ cov[k] = (float(parts[1]), float(parts[2]))
69
+ return cov
70
+
71
+
72
+ def _covered_at(cov, t):
73
+ return [k for k, (s, e) in enumerate(cov) if s <= t < e]
74
+
75
+
76
+ def rotation_edit(scores, coverage, min_dwell=8, max_dwell=15,
77
+ opening_dwell=10, seed=42):
78
+ """Alternate among covered cams with varying dwell; force a switch when the
79
+ active cam leaves coverage."""
80
+ T, K = scores.shape
81
+ rng = np.random.default_rng(seed)
82
+ seq = np.full(T, -1, dtype=np.int32)
83
+
84
+ def best_at(t, candidates, win=opening_dwell):
85
+ end = min(T, t + win)
86
+ return max(candidates,
87
+ key=lambda k: scores[t:end, k].mean() if end > t else scores[t, k])
88
+
89
+ # The overlap window often starts a few seconds in (no cam covers t=0).
90
+ # Open at the first covered second; leading seconds are backfilled below.
91
+ cur_set = _covered_at(coverage, 0)
92
+ t_open = 0
93
+ if not cur_set:
94
+ t_open = next((t for t in range(T) if _covered_at(coverage, t)), -1)
95
+ if t_open < 0:
96
+ raise SystemExit("No camera is covered at any time")
97
+ cur_set = _covered_at(coverage, t_open)
98
+ cur = best_at(t_open, cur_set)
99
+ t = t_open
100
+ while t < T:
101
+ dwell = int(rng.integers(min_dwell, max_dwell + 1))
102
+ end = t
103
+ while end < t + dwell and end < T:
104
+ if cur not in _covered_at(coverage, end):
105
+ break
106
+ seq[end] = cur
107
+ end += 1
108
+ if end >= T:
109
+ break
110
+ cands = [k for k in _covered_at(coverage, end) if k != cur]
111
+ if not cands:
112
+ cands = _covered_at(coverage, end)
113
+ if not cands:
114
+ seq[end] = cur
115
+ t = end + 1
116
+ continue
117
+ upcoming = min(T, end + 6)
118
+ cur = max(cands, key=lambda k: scores[end:upcoming, k].mean()
119
+ if upcoming > end else scores[end, k])
120
+ t = end
121
+ for t in range(T):
122
+ if seq[t] == -1:
123
+ cands = _covered_at(coverage, t)
124
+ seq[t] = cands[0] if cands else 0
125
+ return seq
126
+
127
+
128
+ def greedy_edit(scores, coverage, min_dwell=4, max_dwell=18, lookahead=4,
129
+ switch_threshold=0.0, opening_dwell=8):
130
+ """Greedy hard-cut editor with min/max dwell hysteresis."""
131
+ T, K = scores.shape
132
+
133
+ def win_mean(t, k, w):
134
+ end = min(T, t + w)
135
+ return scores[t:end, k].mean() if end > t else scores[t, k]
136
+
137
+ seq = np.full(T, -1, dtype=np.int32)
138
+ cands0 = _covered_at(coverage, 0)
139
+ t_open = 0
140
+ if not cands0:
141
+ t_open = next((t for t in range(T) if _covered_at(coverage, t)), -1)
142
+ if t_open < 0:
143
+ raise SystemExit("No camera is covered at any time")
144
+ cands0 = _covered_at(coverage, t_open)
145
+ seq[t_open] = max(cands0, key=lambda k: win_mean(t_open, k, opening_dwell))
146
+ streak = 1
147
+ for t in range(t_open + 1, T):
148
+ cur = seq[t - 1]
149
+ if cur not in _covered_at(coverage, t):
150
+ cands = [k for k in _covered_at(coverage, t) if k != cur] or _covered_at(coverage, t)
151
+ if not cands:
152
+ seq[t] = cur; streak += 1; continue
153
+ seq[t] = max(cands, key=lambda k: win_mean(t, k, lookahead))
154
+ streak = 1; continue
155
+ if streak < min_dwell:
156
+ seq[t] = cur; streak += 1; continue
157
+ cands = [k for k in _covered_at(coverage, t) if k != cur]
158
+ if not cands:
159
+ seq[t] = cur; streak += 1; continue
160
+ if streak >= max_dwell:
161
+ seq[t] = max(cands, key=lambda k: win_mean(t, k, lookahead))
162
+ streak = 1; continue
163
+ cur_s = win_mean(t, cur, lookahead)
164
+ best_k = max(cands, key=lambda k: win_mean(t, k, lookahead))
165
+ if win_mean(t, best_k, lookahead) > cur_s + switch_threshold:
166
+ seq[t] = best_k; streak = 1
167
+ else:
168
+ seq[t] = cur; streak += 1
169
+ # Backfill any leading uncovered seconds (before t_open) with a covered cam.
170
+ for t in range(T):
171
+ if seq[t] == -1:
172
+ cands = _covered_at(coverage, t)
173
+ seq[t] = cands[0] if cands else 0
174
+ return seq
175
+
176
+
177
+ def edl_from_seq(seq):
178
+ edl = []
179
+ i = 0
180
+ while i < len(seq):
181
+ j = i
182
+ while j < len(seq) and seq[j] == seq[i]:
183
+ j += 1
184
+ edl.append({"start": float(i), "end": float(j), "cam": int(seq[i])})
185
+ i = j
186
+ return edl
187
+
188
+
189
+ def build_edl(inputs, mode="rotation", audio_source=None, min_dwell=8,
190
+ max_dwell=15, switch_threshold=0.0, seed=42, coverage_flags=None,
191
+ verbose=True):
192
+ """Compute the EDL plan dict for a list of input paths."""
193
+ inputs = [Path(p) for p in inputs]
194
+ K = len(inputs)
195
+
196
+ deltas, cov_from_sc, has_sc = [], [], []
197
+ for p in inputs:
198
+ d, ovl, has = read_sidecar(p)
199
+ deltas.append(d); cov_from_sc.append(ovl); has_sc.append(has)
200
+ missing = [p.name for p, h in zip(inputs, has_sc) if not h]
201
+ if missing and verbose:
202
+ print("WARN: no sidecar for %s; assuming delta=0, full coverage. "
203
+ "Run `polysync sync` first if these should be offset." % missing)
204
+
205
+ durations, envs = [], []
206
+ with tempfile.TemporaryDirectory() as td:
207
+ td = Path(td)
208
+ for i, p in enumerate(inputs):
209
+ out = td / ("%d.pcm" % i)
210
+ audio.extract_pcm(p, out, SR)
211
+ x = audio.read_pcm(out)
212
+ durations.append(len(x) / SR)
213
+ envs.append(_per_sec_envelope(x))
214
+
215
+ cov_ends = [ovl[1] for ovl in cov_from_sc if ovl is not None]
216
+ total = int(max(cov_ends)) if cov_ends else int(min(durations))
217
+
218
+ per_sec = np.full((total, K), -np.inf, dtype=np.float32)
219
+ for k, (env, esr) in enumerate(envs):
220
+ per_sec[:, k] = _lift_to_reference(env, esr, deltas[k], total)
221
+
222
+ coverage = [_coverage_from_sidecar(p, total) for p in inputs]
223
+ if coverage_flags:
224
+ overrides = _parse_coverage_flag(coverage_flags, K, total)
225
+ for v in coverage_flags:
226
+ k = int(v.split(":")[0])
227
+ coverage[k] = overrides[k]
228
+
229
+ if verbose:
230
+ print("Cameras (%d):" % K)
231
+ for k, p in enumerate(inputs):
232
+ s, e = coverage[k]
233
+ print(" cam%d: %s coverage [%.1f .. %.1f]s" % (k, p.name, s, e))
234
+
235
+ finite = np.where(np.isfinite(per_sec), per_sec, np.nan)
236
+ if K > 1:
237
+ scores = np.full_like(per_sec, -np.inf)
238
+ with warnings.catch_warnings(): # all-nan seconds -> nan, handled below
239
+ warnings.simplefilter("ignore", RuntimeWarning)
240
+ for k in range(K):
241
+ others = np.nanmean(np.delete(finite, k, axis=1), axis=1)
242
+ diff = finite[:, k] - others
243
+ scores[:, k] = np.where(np.isfinite(diff), diff, -np.inf)
244
+ else:
245
+ scores = per_sec.copy()
246
+
247
+ if audio_source is None:
248
+ spread = []
249
+ for k in range(K):
250
+ v = finite[:, k]
251
+ v = v[np.isfinite(v)]
252
+ spread.append(0.0 if len(v) == 0 else
253
+ float(np.percentile(v, 90) - np.percentile(v, 10)))
254
+ cov_pct = np.array([(coverage[k][1] - coverage[k][0]) / max(1, total)
255
+ for k in range(K)])
256
+ audio_src = int(np.argmax(np.array(spread) + 0.5 * cov_pct))
257
+ else:
258
+ audio_src = audio_source
259
+
260
+ if K == 1:
261
+ seq = np.zeros(total, dtype=np.int32)
262
+ elif mode == "rotation":
263
+ seq = rotation_edit(scores, coverage, min_dwell=min_dwell,
264
+ max_dwell=max_dwell, seed=seed)
265
+ else:
266
+ seq = greedy_edit(scores, coverage, min_dwell=min_dwell,
267
+ max_dwell=max_dwell, switch_threshold=switch_threshold)
268
+ edl = edl_from_seq(seq)
269
+
270
+ return {
271
+ "_about": ("EDL produced by polysync.edit.autoedit. Times are in the "
272
+ "reference timeline. deltas[k] is the per-input offset; "
273
+ "render scripts apply ffmpeg -itsoffset deltas[k] so they "
274
+ "read original (un-trimmed) files."),
275
+ "schema_version": SCHEMA_VERSION,
276
+ "inputs": [str(p) for p in inputs],
277
+ "deltas": [float(d) for d in deltas],
278
+ "duration_sec": total,
279
+ "audio_source": audio_src,
280
+ "coverage": [list(c) for c in coverage],
281
+ "edl": edl,
282
+ }
283
+
284
+
285
+ def main(argv=None):
286
+ ap = argparse.ArgumentParser(prog="polysync edit",
287
+ description="Build a multicam auto-edit EDL.")
288
+ ap.add_argument("inputs", type=Path, nargs="+",
289
+ help="Synced video files (camera 0, 1, ...)")
290
+ ap.add_argument("--audio-source", type=int, default=None,
291
+ help="Cam index to use as master audio (default: highest "
292
+ "dynamic-range covered cam)")
293
+ ap.add_argument("--mode", choices=["rotation", "greedy"], default="rotation")
294
+ ap.add_argument("--min-dwell", type=int, default=8)
295
+ ap.add_argument("--max-dwell", type=int, default=15)
296
+ ap.add_argument("--switch-threshold", type=float, default=0.0)
297
+ ap.add_argument("--seed", type=int, default=42)
298
+ ap.add_argument("--coverage", action="append", default=None,
299
+ help="Override per-cam coverage CAM:START:END (repeatable)")
300
+ ap.add_argument("--out", type=Path, required=True, help="output EDL json")
301
+ args = ap.parse_args(argv)
302
+
303
+ plan = build_edl(
304
+ args.inputs, mode=args.mode, audio_source=args.audio_source,
305
+ min_dwell=args.min_dwell, max_dwell=args.max_dwell,
306
+ switch_threshold=args.switch_threshold, seed=args.seed,
307
+ coverage_flags=args.coverage,
308
+ )
309
+ args.out.write_text(json.dumps(plan, indent=2))
310
+ edl, total = plan["edl"], plan["duration_sec"]
311
+ print("\nEDL: %d segments; audio_source=cam%d; saved %s"
312
+ % (len(edl), plan["audio_source"], args.out))
313
+ counts = {}
314
+ for row in edl:
315
+ counts[row["cam"]] = counts.get(row["cam"], 0) + (row["end"] - row["start"])
316
+ for k, dur in sorted(counts.items()):
317
+ print(" cam%d: %.0fs on screen (%.0f%%)" % (k, dur, 100 * dur / total))
318
+
319
+
320
+ if __name__ == "__main__":
321
+ main()
@@ -0,0 +1,72 @@
1
+ """Render an autoedit EDL into one MP4 with hard cuts (no transitions / PiP).
2
+
3
+ Applies each input's `delta` via `ffmpeg -itsoffset` so EDL times (reference
4
+ timeline) work directly inside the filter graph — originals are read untouched.
5
+ """
6
+ import argparse
7
+ import json
8
+ import subprocess
9
+ from pathlib import Path
10
+
11
+
12
+ def render_cuts(edl_path, out, encoder="hevc_videotoolbox", bitrate="12M",
13
+ width=1920, height=1080, fps=30, run=True):
14
+ plan = json.loads(Path(edl_path).read_text())
15
+ inputs = plan["inputs"]
16
+ deltas = plan.get("deltas", [0.0] * len(inputs))
17
+ edl = plan["edl"]
18
+ audio_src = plan["audio_source"]
19
+ W, H = width, height
20
+
21
+ cmd = ["ffmpeg", "-nostdin", "-y"]
22
+ for src, dlt in zip(inputs, deltas):
23
+ if abs(dlt) > 1e-9:
24
+ cmd.extend(["-itsoffset", "%.6f" % dlt])
25
+ cmd.extend(["-i", src])
26
+
27
+ filters = []
28
+ for i, row in enumerate(edl):
29
+ filters.append(
30
+ "[%d:v]trim=start=%s:end=%s,setpts=PTS-STARTPTS,"
31
+ "scale=%d:%d:force_original_aspect_ratio=decrease,"
32
+ "pad=%d:%d:(ow-iw)/2:(oh-ih)/2,setsar=1,fps=%d[v%d]"
33
+ % (row["cam"], row["start"], row["end"], W, H, W, H, fps, i)
34
+ )
35
+ concat = "".join("[v%d]" % i for i in range(len(edl)))
36
+ filters.append("%sconcat=n=%d:v=1:a=0[vout]" % (concat, len(edl)))
37
+ fc = ";".join(filters)
38
+
39
+ audio_offset = edl[0]["start"] if edl else 0.0
40
+ duration = plan["duration_sec"]
41
+ fc += (";[%d:a:0]atrim=start=%s:duration=%s,asetpts=PTS-STARTPTS[aout]"
42
+ % (audio_src, audio_offset, duration))
43
+ cmd.extend([
44
+ "-filter_complex", fc,
45
+ "-map", "[vout]", "-map", "[aout]",
46
+ "-t", str(duration),
47
+ "-c:v", encoder, "-b:v", bitrate, "-tag:v", "hvc1",
48
+ "-c:a", "aac", "-b:a", "192k",
49
+ "-movflags", "+faststart", str(out),
50
+ ])
51
+ if run:
52
+ print(" ".join(cmd))
53
+ subprocess.run(cmd, check=True)
54
+ return cmd
55
+
56
+
57
+ def main(argv=None):
58
+ ap = argparse.ArgumentParser(prog="polysync render-cuts")
59
+ ap.add_argument("edl", type=Path)
60
+ ap.add_argument("--out", type=Path, required=True)
61
+ ap.add_argument("--encoder", default="hevc_videotoolbox")
62
+ ap.add_argument("--bitrate", default="12M")
63
+ ap.add_argument("--width", type=int, default=1920)
64
+ ap.add_argument("--height", type=int, default=1080)
65
+ ap.add_argument("--fps", type=int, default=30)
66
+ args = ap.parse_args(argv)
67
+ render_cuts(args.edl, args.out, encoder=args.encoder, bitrate=args.bitrate,
68
+ width=args.width, height=args.height, fps=args.fps)
69
+
70
+
71
+ if __name__ == "__main__":
72
+ main()