polysync 0.1.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {polysync-0.1.0/src/polysync.egg-info → polysync-0.3.0}/PKG-INFO +1 -1
  2. {polysync-0.1.0 → polysync-0.3.0}/pyproject.toml +1 -1
  3. {polysync-0.1.0 → polysync-0.3.0}/src/polysync/__init__.py +1 -1
  4. polysync-0.3.0/src/polysync/edit/audiomix.py +124 -0
  5. polysync-0.3.0/src/polysync/edit/grade.py +110 -0
  6. polysync-0.3.0/src/polysync/edit/render_cuts.py +112 -0
  7. {polysync-0.1.0 → polysync-0.3.0}/src/polysync/edit/render_pip.py +52 -15
  8. {polysync-0.1.0 → polysync-0.3.0/src/polysync.egg-info}/PKG-INFO +1 -1
  9. {polysync-0.1.0 → polysync-0.3.0}/src/polysync.egg-info/SOURCES.txt +2 -0
  10. polysync-0.1.0/src/polysync/edit/render_cuts.py +0 -72
  11. {polysync-0.1.0 → polysync-0.3.0}/LICENSE +0 -0
  12. {polysync-0.1.0 → polysync-0.3.0}/README.md +0 -0
  13. {polysync-0.1.0 → polysync-0.3.0}/setup.cfg +0 -0
  14. {polysync-0.1.0 → polysync-0.3.0}/src/polysync/audio.py +0 -0
  15. {polysync-0.1.0 → polysync-0.3.0}/src/polysync/cli.py +0 -0
  16. {polysync-0.1.0 → polysync-0.3.0}/src/polysync/edit/__init__.py +0 -0
  17. {polysync-0.1.0 → polysync-0.3.0}/src/polysync/edit/autoedit.py +0 -0
  18. {polysync-0.1.0 → polysync-0.3.0}/src/polysync/sidecar.py +0 -0
  19. {polysync-0.1.0 → polysync-0.3.0}/src/polysync/sync.py +0 -0
  20. {polysync-0.1.0 → polysync-0.3.0}/src/polysync/verify.py +0 -0
  21. {polysync-0.1.0 → polysync-0.3.0}/src/polysync.egg-info/dependency_links.txt +0 -0
  22. {polysync-0.1.0 → polysync-0.3.0}/src/polysync.egg-info/entry_points.txt +0 -0
  23. {polysync-0.1.0 → polysync-0.3.0}/src/polysync.egg-info/requires.txt +0 -0
  24. {polysync-0.1.0 → polysync-0.3.0}/src/polysync.egg-info/top_level.txt +0 -0
  25. {polysync-0.1.0 → polysync-0.3.0}/tests/test_sync_synthetic.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: polysync
3
- Version: 0.1.0
3
+ Version: 0.3.0
4
4
  Summary: Multicam audio sync and director-style auto-edit — align N angles of one event by audio cross-correlation, then cut/PiP them into one MP4. Reversible sidecars, never re-encodes the originals.
5
5
  Author: 王建硕 (Jian Shuo Wang)
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "polysync"
7
- version = "0.1.0"
7
+ version = "0.3.0"
8
8
  description = "Multicam audio sync and director-style auto-edit — align N angles of one event by audio cross-correlation, then cut/PiP them into one MP4. Reversible sidecars, never re-encodes the originals."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -11,7 +11,7 @@ Public API:
11
11
  from .sync import compute_sync, SyncResult, SyncError
12
12
  from .sidecar import read_sidecar, write_sidecar, sidecar_path, SCHEMA_VERSION
13
13
 
14
- __version__ = "0.1.0"
14
+ __version__ = "0.3.0"
15
15
  __all__ = [
16
16
  "compute_sync", "SyncResult", "SyncError",
17
17
  "read_sidecar", "write_sidecar", "sidecar_path", "SCHEMA_VERSION",
@@ -0,0 +1,124 @@
1
+ """Speaker-gated ("ducked") audio mix for multicam interviews.
2
+
3
+ The default render takes a single camera's mic as the soundtrack. With close,
4
+ bleeding mics that's noisy: every mic also picks up the *other* speaker plus room
5
+ tone, so a constant sum sounds muddy and loudness-normalization pumps the bleed
6
+ up during pauses. This builds a cleaner track instead: per moment, keep only the
7
+ ACTIVE speaker's mic at full level and duck the rest.
8
+
9
+ Who's active is decided by each mic's energy relative to ITS OWN baseline (not
10
+ absolute level) — that's what tracks the talker despite a louder close mic
11
+ bleeding. Far/room mics (e.g. a wide establishing cam) are auto-excluded: any
12
+ mic whose overall level is >`exclude_db` below the loudest is dropped as an
13
+ audio candidate so the reverby room mic is never selected.
14
+
15
+ `build_ducked_audio` returns a finished wav (gated → high-pass → light denoise →
16
+ loudness-normalized). The renderers use it in place of the single-cam audio when
17
+ `--duck-audio` is passed.
18
+ """
19
+ import subprocess
20
+ import tempfile
21
+ from pathlib import Path
22
+
23
+ import numpy as np
24
+
25
+ from .. import audio
26
+
27
+
28
+ def _aligned_mic(path, delta, sr, n):
29
+ """Extract a cam's loudest mic at `sr`, shifted into the reference timeline
30
+ (so index t corresponds to reference second t/sr), length `n` samples."""
31
+ with tempfile.NamedTemporaryFile(suffix=".pcm", delete=False) as tf:
32
+ tmp = tf.name
33
+ audio.extract_pcm(path, tmp, sr) # loudest stream, mono
34
+ x = audio.read_pcm(tmp)
35
+ Path(tmp).unlink(missing_ok=True)
36
+ pad = int(round(delta * sr))
37
+ if pad > 0:
38
+ x = np.concatenate([np.zeros(pad, np.float32), x])
39
+ elif pad < 0:
40
+ x = x[-pad:]
41
+ if len(x) < n:
42
+ x = np.pad(x, (0, n - len(x)))
43
+ return x[:n]
44
+
45
+
46
+ def build_ducked_audio(inputs, deltas, coverage, duration, out_path, sr=48000,
47
+ duck_db=-18.0, frame_ms=100.0, margin=0.20,
48
+ exclude_db=14.0, audio_cams=None, verbose=True):
49
+ """Write a speaker-gated, cleaned wav to `out_path`. Returns out_path.
50
+
51
+ `audio_cams` (list of cam indices) explicitly picks which mics to gate among
52
+ — use it to exclude a wide/room mic that sits at a similar LEVEL to a real
53
+ speaker mic (level alone can't tell a close lav from a near room mic). If
54
+ None, fall back to dropping any mic >`exclude_db` below the loudest.
55
+ """
56
+ n = int(duration * sr)
57
+ mics = [_aligned_mic(p, d, sr, n) for p, d in zip(inputs, deltas)]
58
+
59
+ lvl = np.array([20 * np.log10(np.sqrt(np.mean(m ** 2)) + 1e-6) for m in mics])
60
+ if audio_cams:
61
+ keep = [k for k in audio_cams if 0 <= k < len(mics)]
62
+ else:
63
+ keep = [k for k in range(len(mics)) if lvl[k] >= lvl.max() - exclude_db]
64
+ if verbose:
65
+ print(" mic levels(dB): %s; audio candidates: %s"
66
+ % ([round(float(x), 1) for x in lvl], keep))
67
+
68
+ hop = int(frame_ms / 1000 * sr)
69
+ nf = n // hop
70
+
71
+ def frame_logE(x):
72
+ return np.array([np.log(np.sqrt(np.mean(x[i*hop:(i+1)*hop] ** 2) + 1) + 1)
73
+ for i in range(nf)])
74
+
75
+ # coverage mask per cam (frames where the cam has valid footage in ref time)
76
+ cov = np.zeros((len(mics), nf), dtype=bool)
77
+ for k in range(len(mics)):
78
+ s, e = coverage[k] if k < len(coverage) else (0.0, duration)
79
+ cov[k, max(0, int(s/ (frame_ms/1000))): int(e/(frame_ms/1000))] = True
80
+
81
+ # baseline-normalized energy per candidate
82
+ norm = np.full((len(mics), nf), -1e9)
83
+ for k in keep:
84
+ E = frame_logE(mics[k])
85
+ base = np.median(E[cov[k]]) if cov[k].any() else np.median(E)
86
+ norm[k] = np.where(cov[k], E - base, -1e9)
87
+
88
+ # active cam per frame = argmax normalized among covered candidates
89
+ active = np.full(nf, keep[0], dtype=int)
90
+ for f in range(nf):
91
+ vals = [(norm[k, f], k) for k in keep if cov[k, f]]
92
+ if vals:
93
+ active[f] = max(vals)[1]
94
+
95
+ # gain mask per cam (active=1 else duck), smoothed to crossfade
96
+ duck = 10 ** (duck_db / 20.0)
97
+ out = np.zeros(n, dtype=np.float32)
98
+ ker = np.ones(int(0.2 * sr)) / int(0.2 * sr)
99
+ for k in range(len(mics)):
100
+ if k not in keep:
101
+ continue
102
+ gf = np.where(active == k, 1.0, duck)
103
+ gs = np.repeat(gf, hop)
104
+ gs = np.pad(gs, (0, n - len(gs)), mode="edge")
105
+ gs = np.convolve(gs, ker, "same")
106
+ out += mics[k] * gs
107
+
108
+ pk = np.max(np.abs(out))
109
+ if pk > 0:
110
+ out *= 0.95 * 32767 / pk
111
+ with tempfile.NamedTemporaryFile(suffix=".pcm", delete=False) as tf:
112
+ raw = tf.name
113
+ out.astype(np.int16).tofile(raw)
114
+
115
+ # high-pass rumble, light FFT denoise, loudness-normalize -> finished wav
116
+ subprocess.run(
117
+ ["ffmpeg", "-nostdin", "-y", "-hide_banner", "-loglevel", "error",
118
+ "-f", "s16le", "-ar", str(sr), "-ac", "1", "-i", raw,
119
+ "-af", "highpass=f=70,afftdn=nr=10,loudnorm=I=-16:TP=-1.5:LRA=11",
120
+ "-ar", str(sr), "-ac", "2", str(out_path)],
121
+ check=True,
122
+ )
123
+ Path(raw).unlink(missing_ok=True)
124
+ return out_path
@@ -0,0 +1,110 @@
1
+ """Color grading + orientation helpers for the renderers.
2
+
3
+ Raw camera footage almost never renders correctly straight off the card. Two
4
+ things bite every time and are handled here:
5
+
6
+ 1. **Log color.** Sony cameras (FX3/FX6) shoot S-Log3 / S-Gamut3.Cine by default
7
+ — flat, grey, low-contrast. It MUST be converted to Rec.709 with a LUT or it
8
+ looks broken. Check the `.XML` sidecar's `CaptureGammaEquation` (`s-log3-cine`)
9
+ or run `ffprobe ... color_transfer`. `--log slog3` generates and applies the
10
+ conversion LUT for you.
11
+ 2. **Orientation.** Phones / vertically-mounted cameras record rotated. Some
12
+ (FX3) write a rotation flag and ffmpeg auto-rotates; others (FX6 turned on its
13
+ side) write NO flag and come out lying down. `--rotate cam:deg` fixes those.
14
+
15
+ Performance note baked into `segment_filter`: the LUT is applied AFTER the
16
+ downscale, not before. A 3D LUT on 4K (8 MP) is ~4x slower than on 1080p — and
17
+ the result is visually identical. Always scale, then grade.
18
+ """
19
+ import os
20
+ import tempfile
21
+
22
+ import numpy as np
23
+
24
+
25
+ def make_slog3_709_lut(path, size=33):
26
+ """Write a Sony S-Log3 / S-Gamut3.Cine -> Rec.709 3D LUT (.cube) to `path`."""
27
+ def slog3_to_lin(n): # n in [0,1] == 10-bit code value / 1023
28
+ cv = n * 1023.0
29
+ return np.where(
30
+ cv >= 171.2102946929,
31
+ (10 ** ((cv - 420.0) / 261.5)) * 0.19 - 0.01,
32
+ (cv - 95.0) * 0.01125000 / (171.2102946929 - 95.0),
33
+ )
34
+ # S-Gamut3.Cine -> Rec.709 (linear) matrix, D65
35
+ M = np.array([[1.6269, -0.3576, -0.2693],
36
+ [-0.0928, 1.3478, -0.2550],
37
+ [0.0387, -0.1622, 1.1235]])
38
+ def oetf709(L):
39
+ L = np.clip(L, 0, 1)
40
+ return np.where(L < 0.018, 4.5 * L, 1.099 * np.power(L, 0.45) - 0.099)
41
+ lines = ["TITLE \"SLog3 SGamut3Cine to Rec709\"", f"LUT_3D_SIZE {size}",
42
+ "DOMAIN_MIN 0 0 0", "DOMAIN_MAX 1 1 1"]
43
+ for b in range(size):
44
+ for g in range(size):
45
+ for r in range(size):
46
+ lin = slog3_to_lin(np.array([r, g, b]) / (size - 1))
47
+ out = oetf709(M @ lin)
48
+ lines.append("%.6f %.6f %.6f" % (out[0], out[1], out[2]))
49
+ with open(path, "w") as f:
50
+ f.write("\n".join(lines) + "\n")
51
+ return path
52
+
53
+
54
+ # Built-in log profiles -> on-the-fly LUT generators. Cached in tempdir so
55
+ # repeated render calls in one session don't regenerate.
56
+ _BUILTIN = {"slog3": make_slog3_709_lut}
57
+
58
+
59
+ def resolve_lut(lut=None, log=None):
60
+ """Return a .cube path: explicit `lut` file wins; else generate from `log`."""
61
+ if lut:
62
+ return lut
63
+ if not log:
64
+ return None
65
+ key = log.lower()
66
+ if key not in _BUILTIN:
67
+ raise SystemExit("unknown --log %r (known: %s)" % (log, ", ".join(_BUILTIN)))
68
+ cache = os.path.join(tempfile.gettempdir(), "polysync_%s_709.cube" % key)
69
+ if not os.path.exists(cache):
70
+ _BUILTIN[key](cache)
71
+ return cache
72
+
73
+
74
+ def parse_rotate(values):
75
+ """Parse repeatable `--rotate cam:deg` into {cam_index: degrees}. Degrees in
76
+ {90, 180, 270, -90}. 90 = clockwise."""
77
+ out = {}
78
+ for v in (values or []):
79
+ cam, _, deg = v.partition(":")
80
+ out[int(cam)] = int(deg)
81
+ return out
82
+
83
+
84
+ def _transpose_chain(deg):
85
+ """ffmpeg filter fragment to rotate `deg` clockwise (90/180/270/-90)."""
86
+ deg = deg % 360
87
+ if deg == 90:
88
+ return "transpose=1,"
89
+ if deg == 270:
90
+ return "transpose=2,"
91
+ if deg == 180:
92
+ return "transpose=1,transpose=1,"
93
+ return ""
94
+
95
+
96
+ def segment_filter(cam, start, end, idx, W, H, fps, rotate_deg=0, lut=None,
97
+ pip=False):
98
+ """Build one segment's video filter chain. Order: trim -> rotate -> scale ->
99
+ crop/pad -> LUT (after downscale, for speed) -> sar -> fps. With `pip=True`
100
+ the frame fills (crop) instead of pad — used for main/inset tiles."""
101
+ rot = _transpose_chain(rotate_deg)
102
+ if pip:
103
+ fit = ("scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d"
104
+ % (W, H, W, H))
105
+ else:
106
+ fit = ("scale=%d:%d:force_original_aspect_ratio=decrease,"
107
+ "pad=%d:%d:(ow-iw)/2:(oh-ih)/2" % (W, H, W, H))
108
+ grade = ("lut3d=%s," % lut) if lut else ""
109
+ return ("[%d:v]trim=start=%s:end=%s,setpts=PTS-STARTPTS,%s%s,%ssetsar=1,"
110
+ "fps=%d[v%d]" % (cam, start, end, rot, fit, grade, fps, idx))
@@ -0,0 +1,112 @@
1
+ """Render an autoedit EDL into one MP4 with hard cuts (no transitions / PiP).
2
+
3
+ Applies each input's `delta` via `ffmpeg -itsoffset` so EDL times (reference
4
+ timeline) work directly inside the filter graph — originals are read untouched.
5
+
6
+ Raw footage usually needs `--log slog3` (Sony S-Log3 -> Rec.709 grade) and, for
7
+ vertically-shot cameras with no rotation flag, `--rotate cam:90`. For vertical
8
+ delivery (小红书 / Reels / Shorts) pass `--width 1080 --height 1920 --fill`.
9
+ """
10
+ import argparse
11
+ import json
12
+ import subprocess
13
+ import tempfile
14
+ from pathlib import Path
15
+
16
+ from .grade import resolve_lut, parse_rotate, segment_filter
17
+ from .audiomix import build_ducked_audio
18
+
19
+
20
+ def render_cuts(edl_path, out, encoder="hevc_videotoolbox", bitrate="12M",
21
+ width=1920, height=1080, fps=30, lut=None, log=None,
22
+ rotate=None, fill=False, duck_audio=False, duck_db=-18.0,
23
+ audio_cams=None, run=True):
24
+ plan = json.loads(Path(edl_path).read_text())
25
+ inputs = plan["inputs"]
26
+ deltas = plan.get("deltas", [0.0] * len(inputs))
27
+ edl = plan["edl"]
28
+ audio_src = plan["audio_source"]
29
+ duration = plan["duration_sec"]
30
+ W, H = width, height
31
+ lut_path = resolve_lut(lut, log)
32
+ rot = parse_rotate(rotate)
33
+
34
+ # Speaker-gated soundtrack: build a cleaned wav up front, use it as the audio.
35
+ ducked_wav = None
36
+ if duck_audio:
37
+ coverage = plan.get("coverage", [[0.0, duration]] * len(inputs))
38
+ ducked_wav = str(Path(tempfile.mkdtemp()) / "ducked.wav")
39
+ build_ducked_audio(inputs, deltas, coverage, duration, ducked_wav,
40
+ duck_db=duck_db, audio_cams=audio_cams)
41
+
42
+ cmd = ["ffmpeg", "-nostdin", "-y"]
43
+ for src, dlt in zip(inputs, deltas):
44
+ if abs(dlt) > 1e-9:
45
+ cmd.extend(["-itsoffset", "%.6f" % dlt])
46
+ cmd.extend(["-i", src])
47
+ if ducked_wav:
48
+ cmd.extend(["-i", ducked_wav]) # extra input, already ref-aligned
49
+
50
+ filters = [
51
+ segment_filter(row["cam"], row["start"], row["end"], i, W, H, fps,
52
+ rotate_deg=rot.get(row["cam"], 0), lut=lut_path, pip=fill)
53
+ for i, row in enumerate(edl)
54
+ ]
55
+ concat = "".join("[v%d]" % i for i in range(len(edl)))
56
+ filters.append("%sconcat=n=%d:v=1:a=0[vout]" % (concat, len(edl)))
57
+ fc = ";".join(filters)
58
+
59
+ cmd.extend(["-filter_complex", fc, "-map", "[vout]"])
60
+ if ducked_wav:
61
+ cmd.extend(["-map", "%d:a:0" % len(inputs)])
62
+ else:
63
+ audio_offset = edl[0]["start"] if edl else 0.0
64
+ fc2 = ("[%d:a:0]atrim=start=%s:duration=%s,asetpts=PTS-STARTPTS[aout]"
65
+ % (audio_src, audio_offset, duration))
66
+ cmd[cmd.index("-filter_complex") + 1] = fc + ";" + fc2
67
+ cmd.extend(["-map", "[aout]"])
68
+ cmd.extend([
69
+ "-t", str(duration),
70
+ "-c:v", encoder, "-b:v", bitrate, "-tag:v", "hvc1",
71
+ "-c:a", "aac", "-b:a", "192k",
72
+ "-movflags", "+faststart", str(out),
73
+ ])
74
+ if run:
75
+ print(" ".join(cmd))
76
+ subprocess.run(cmd, check=True)
77
+ return cmd
78
+
79
+
80
+ def main(argv=None):
81
+ ap = argparse.ArgumentParser(prog="polysync render-cuts")
82
+ ap.add_argument("edl", type=Path)
83
+ ap.add_argument("--out", type=Path, required=True)
84
+ ap.add_argument("--encoder", default="hevc_videotoolbox")
85
+ ap.add_argument("--bitrate", default="12M")
86
+ ap.add_argument("--width", type=int, default=1920)
87
+ ap.add_argument("--height", type=int, default=1080)
88
+ ap.add_argument("--fps", type=int, default=30)
89
+ ap.add_argument("--lut", help="3D LUT (.cube) applied after downscale")
90
+ ap.add_argument("--log", help="built-in log->Rec.709 grade (e.g. slog3)")
91
+ ap.add_argument("--rotate", action="append",
92
+ help="per-cam rotation CAM:DEG (90=CW), repeatable")
93
+ ap.add_argument("--fill", action="store_true",
94
+ help="crop to fill instead of letterbox-pad (use for vertical)")
95
+ ap.add_argument("--duck-audio", action="store_true",
96
+ help="speaker-gated soundtrack: keep the active speaker's mic, "
97
+ "duck the rest (cleaner than a single-cam mic for interviews)")
98
+ ap.add_argument("--duck-db", type=float, default=-18.0,
99
+ help="level of ducked (inactive) mics, dB (default -18)")
100
+ ap.add_argument("--audio-cams",
101
+ help="comma-separated cam indices to gate among (e.g. 0,1) — "
102
+ "exclude wide/room mics; default = auto by level")
103
+ args = ap.parse_args(argv)
104
+ cams = [int(x) for x in args.audio_cams.split(",")] if args.audio_cams else None
105
+ render_cuts(args.edl, args.out, encoder=args.encoder, bitrate=args.bitrate,
106
+ width=args.width, height=args.height, fps=args.fps,
107
+ lut=args.lut, log=args.log, rotate=args.rotate, fill=args.fill,
108
+ duck_audio=args.duck_audio, duck_db=args.duck_db, audio_cams=cams)
109
+
110
+
111
+ if __name__ == "__main__":
112
+ main()
@@ -9,8 +9,12 @@ Per-segment EDL rows may carry a `pip` field (cam index) to override the picker.
9
9
  import argparse
10
10
  import json
11
11
  import subprocess
12
+ import tempfile
12
13
  from pathlib import Path
13
14
 
15
+ from .grade import resolve_lut, parse_rotate, _transpose_chain
16
+ from .audiomix import build_ducked_audio
17
+
14
18
  POSITIONS = {
15
19
  "bottom-right": ("W-w-{m}", "H-h-{m}"),
16
20
  "top-right": ("W-w-{m}", "{m}"),
@@ -41,14 +45,25 @@ def pick_pip(row, K, coverage, mode="next"):
41
45
  def render_pip(edl_path, out, encoder="hevc_videotoolbox", bitrate="12M",
42
46
  width=1920, height=1080, fps=30, pip="bottom-right",
43
47
  pip_width=480, pip_margin=24, border_px=4, pip_pick="next",
44
- run=True):
48
+ lut=None, log=None, rotate=None, duck_audio=False, duck_db=-18.0,
49
+ audio_cams=None, run=True):
45
50
  plan = json.loads(Path(edl_path).read_text())
46
51
  inputs = plan["inputs"]
47
52
  deltas = plan.get("deltas", [0.0] * len(inputs))
48
53
  edl = plan["edl"]
49
54
  audio_src = plan["audio_source"]
55
+ duration = plan["duration_sec"]
50
56
  K = len(inputs)
51
- coverage = plan.get("coverage", [[0.0, plan["duration_sec"]]] * K)
57
+ coverage = plan.get("coverage", [[0.0, duration]] * K)
58
+ lut_path = resolve_lut(lut, log)
59
+ rot = parse_rotate(rotate)
60
+ grade = ("lut3d=%s," % lut_path) if lut_path else ""
61
+
62
+ ducked_wav = None
63
+ if duck_audio:
64
+ ducked_wav = str(Path(tempfile.mkdtemp()) / "ducked.wav")
65
+ build_ducked_audio(inputs, deltas, coverage, duration, ducked_wav,
66
+ duck_db=duck_db, audio_cams=audio_cams)
52
67
 
53
68
  W, H = width, height
54
69
  pw = pip_width
@@ -63,6 +78,8 @@ def render_pip(edl_path, out, encoder="hevc_videotoolbox", bitrate="12M",
63
78
  if abs(dlt) > 1e-9:
64
79
  cmd.extend(["-itsoffset", "%.6f" % dlt])
65
80
  cmd.extend(["-i", src])
81
+ if ducked_wav:
82
+ cmd.extend(["-i", ducked_wav]) # extra input, already ref-aligned
66
83
 
67
84
  filters = []
68
85
  for i, row in enumerate(edl):
@@ -70,10 +87,11 @@ def render_pip(edl_path, out, encoder="hevc_videotoolbox", bitrate="12M",
70
87
  s, e = row["start"], row["end"]
71
88
  main_label = "m%d" % i if K > 1 else "v%d" % i
72
89
  filters.append(
73
- "[%d:v]trim=start=%s:end=%s,setpts=PTS-STARTPTS,"
90
+ "[%d:v]trim=start=%s:end=%s,setpts=PTS-STARTPTS,%s"
74
91
  "scale=%d:%d:force_original_aspect_ratio=decrease,"
75
- "pad=%d:%d:(ow-iw)/2:(oh-ih)/2,setsar=1,fps=%d[%s]"
76
- % (cam, s, e, W, H, W, H, fps, main_label)
92
+ "pad=%d:%d:(ow-iw)/2:(oh-ih)/2,%ssetsar=1,fps=%d[%s]"
93
+ % (cam, s, e, _transpose_chain(rot.get(cam, 0)),
94
+ W, H, W, H, grade, fps, main_label)
77
95
  )
78
96
  if K == 1:
79
97
  continue
@@ -82,10 +100,11 @@ def render_pip(edl_path, out, encoder="hevc_videotoolbox", bitrate="12M",
82
100
  filters.append("[m%d]copy[v%d]" % (i, i))
83
101
  continue
84
102
  chain = (
85
- "[%d:v]trim=start=%s:end=%s,setpts=PTS-STARTPTS,"
103
+ "[%d:v]trim=start=%s:end=%s,setpts=PTS-STARTPTS,%s"
86
104
  "scale=%d:%d:force_original_aspect_ratio=decrease,"
87
- "pad=%d:%d:(ow-iw)/2:(oh-ih)/2,"
88
- % (pip_cam, s, e, pw, ph, pw, ph)
105
+ "pad=%d:%d:(ow-iw)/2:(oh-ih)/2,%s"
106
+ % (pip_cam, s, e, _transpose_chain(rot.get(pip_cam, 0)),
107
+ pw, ph, pw, ph, grade)
89
108
  )
90
109
  if bw > 0:
91
110
  chain += "pad=%d:%d:%d:%d:white," % (pw + 2 * bw, ph + 2 * bw, bw, bw)
@@ -96,14 +115,19 @@ def render_pip(edl_path, out, encoder="hevc_videotoolbox", bitrate="12M",
96
115
 
97
116
  concat = "".join("[v%d]" % i for i in range(len(edl)))
98
117
  filters.append("%sconcat=n=%d:v=1:a=0[vout]" % (concat, len(edl)))
99
- audio_offset = edl[0]["start"] if edl else 0.0
100
- dur = plan["duration_sec"]
118
+ dur = duration
101
119
  fc = ";".join(filters)
102
- fc += (";[%d:a:0]atrim=start=%s:duration=%s,asetpts=PTS-STARTPTS[aout]"
103
- % (audio_src, audio_offset, dur))
120
+ cmd.extend(["-filter_complex", None, "-map", "[vout]"]) # fc filled below
121
+ if ducked_wav:
122
+ cmd[cmd.index("-filter_complex") + 1] = fc
123
+ cmd.extend(["-map", "%d:a:0" % K])
124
+ else:
125
+ audio_offset = edl[0]["start"] if edl else 0.0
126
+ fc += (";[%d:a:0]atrim=start=%s:duration=%s,asetpts=PTS-STARTPTS[aout]"
127
+ % (audio_src, audio_offset, dur))
128
+ cmd[cmd.index("-filter_complex") + 1] = fc
129
+ cmd.extend(["-map", "[aout]"])
104
130
  cmd.extend([
105
- "-filter_complex", fc,
106
- "-map", "[vout]", "-map", "[aout]",
107
131
  "-t", str(dur),
108
132
  "-c:v", encoder, "-b:v", bitrate, "-tag:v", "hvc1",
109
133
  "-c:a", "aac", "-b:a", "192k",
@@ -130,11 +154,24 @@ def main(argv=None):
130
154
  ap.add_argument("--pip-margin", type=int, default=24)
131
155
  ap.add_argument("--border-px", type=int, default=4)
132
156
  ap.add_argument("--pip-pick", choices=["next", "second-best"], default="next")
157
+ ap.add_argument("--lut", help="3D LUT (.cube) applied after downscale")
158
+ ap.add_argument("--log", help="built-in log->Rec.709 grade (e.g. slog3)")
159
+ ap.add_argument("--rotate", action="append",
160
+ help="per-cam rotation CAM:DEG (90=CW), repeatable")
161
+ ap.add_argument("--duck-audio", action="store_true",
162
+ help="speaker-gated soundtrack (keep active speaker's mic, duck rest)")
163
+ ap.add_argument("--duck-db", type=float, default=-18.0,
164
+ help="level of ducked (inactive) mics, dB (default -18)")
165
+ ap.add_argument("--audio-cams",
166
+ help="comma-separated cam indices to gate among (e.g. 0,1)")
133
167
  args = ap.parse_args(argv)
168
+ cams = [int(x) for x in args.audio_cams.split(",")] if args.audio_cams else None
134
169
  render_pip(args.edl, args.out, encoder=args.encoder, bitrate=args.bitrate,
135
170
  width=args.width, height=args.height, fps=args.fps, pip=args.pip,
136
171
  pip_width=args.pip_width, pip_margin=args.pip_margin,
137
- border_px=args.border_px, pip_pick=args.pip_pick)
172
+ border_px=args.border_px, pip_pick=args.pip_pick,
173
+ lut=args.lut, log=args.log, rotate=args.rotate,
174
+ duck_audio=args.duck_audio, duck_db=args.duck_db, audio_cams=cams)
138
175
 
139
176
 
140
177
  if __name__ == "__main__":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: polysync
3
- Version: 0.1.0
3
+ Version: 0.3.0
4
4
  Summary: Multicam audio sync and director-style auto-edit — align N angles of one event by audio cross-correlation, then cut/PiP them into one MP4. Reversible sidecars, never re-encodes the originals.
5
5
  Author: 王建硕 (Jian Shuo Wang)
6
6
  License: MIT
@@ -14,7 +14,9 @@ src/polysync.egg-info/entry_points.txt
14
14
  src/polysync.egg-info/requires.txt
15
15
  src/polysync.egg-info/top_level.txt
16
16
  src/polysync/edit/__init__.py
17
+ src/polysync/edit/audiomix.py
17
18
  src/polysync/edit/autoedit.py
19
+ src/polysync/edit/grade.py
18
20
  src/polysync/edit/render_cuts.py
19
21
  src/polysync/edit/render_pip.py
20
22
  tests/test_sync_synthetic.py
@@ -1,72 +0,0 @@
1
- """Render an autoedit EDL into one MP4 with hard cuts (no transitions / PiP).
2
-
3
- Applies each input's `delta` via `ffmpeg -itsoffset` so EDL times (reference
4
- timeline) work directly inside the filter graph — originals are read untouched.
5
- """
6
- import argparse
7
- import json
8
- import subprocess
9
- from pathlib import Path
10
-
11
-
12
- def render_cuts(edl_path, out, encoder="hevc_videotoolbox", bitrate="12M",
13
- width=1920, height=1080, fps=30, run=True):
14
- plan = json.loads(Path(edl_path).read_text())
15
- inputs = plan["inputs"]
16
- deltas = plan.get("deltas", [0.0] * len(inputs))
17
- edl = plan["edl"]
18
- audio_src = plan["audio_source"]
19
- W, H = width, height
20
-
21
- cmd = ["ffmpeg", "-nostdin", "-y"]
22
- for src, dlt in zip(inputs, deltas):
23
- if abs(dlt) > 1e-9:
24
- cmd.extend(["-itsoffset", "%.6f" % dlt])
25
- cmd.extend(["-i", src])
26
-
27
- filters = []
28
- for i, row in enumerate(edl):
29
- filters.append(
30
- "[%d:v]trim=start=%s:end=%s,setpts=PTS-STARTPTS,"
31
- "scale=%d:%d:force_original_aspect_ratio=decrease,"
32
- "pad=%d:%d:(ow-iw)/2:(oh-ih)/2,setsar=1,fps=%d[v%d]"
33
- % (row["cam"], row["start"], row["end"], W, H, W, H, fps, i)
34
- )
35
- concat = "".join("[v%d]" % i for i in range(len(edl)))
36
- filters.append("%sconcat=n=%d:v=1:a=0[vout]" % (concat, len(edl)))
37
- fc = ";".join(filters)
38
-
39
- audio_offset = edl[0]["start"] if edl else 0.0
40
- duration = plan["duration_sec"]
41
- fc += (";[%d:a:0]atrim=start=%s:duration=%s,asetpts=PTS-STARTPTS[aout]"
42
- % (audio_src, audio_offset, duration))
43
- cmd.extend([
44
- "-filter_complex", fc,
45
- "-map", "[vout]", "-map", "[aout]",
46
- "-t", str(duration),
47
- "-c:v", encoder, "-b:v", bitrate, "-tag:v", "hvc1",
48
- "-c:a", "aac", "-b:a", "192k",
49
- "-movflags", "+faststart", str(out),
50
- ])
51
- if run:
52
- print(" ".join(cmd))
53
- subprocess.run(cmd, check=True)
54
- return cmd
55
-
56
-
57
- def main(argv=None):
58
- ap = argparse.ArgumentParser(prog="polysync render-cuts")
59
- ap.add_argument("edl", type=Path)
60
- ap.add_argument("--out", type=Path, required=True)
61
- ap.add_argument("--encoder", default="hevc_videotoolbox")
62
- ap.add_argument("--bitrate", default="12M")
63
- ap.add_argument("--width", type=int, default=1920)
64
- ap.add_argument("--height", type=int, default=1080)
65
- ap.add_argument("--fps", type=int, default=30)
66
- args = ap.parse_args(argv)
67
- render_cuts(args.edl, args.out, encoder=args.encoder, bitrate=args.bitrate,
68
- width=args.width, height=args.height, fps=args.fps)
69
-
70
-
71
- if __name__ == "__main__":
72
- main()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes