PyPI - polysync - Versions diffs - 0.1.0__tar.gz - Mend

polysync 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

polysync-0.1.0/LICENSE +21 -0
polysync-0.1.0/PKG-INFO +115 -0
polysync-0.1.0/README.md +89 -0
polysync-0.1.0/pyproject.toml +43 -0
polysync-0.1.0/setup.cfg +4 -0
polysync-0.1.0/src/polysync/__init__.py +19 -0
polysync-0.1.0/src/polysync/audio.py +130 -0
polysync-0.1.0/src/polysync/cli.py +79 -0
polysync-0.1.0/src/polysync/edit/__init__.py +9 -0
polysync-0.1.0/src/polysync/edit/autoedit.py +321 -0
polysync-0.1.0/src/polysync/edit/render_cuts.py +72 -0
polysync-0.1.0/src/polysync/edit/render_pip.py +141 -0
polysync-0.1.0/src/polysync/sidecar.py +88 -0
polysync-0.1.0/src/polysync/sync.py +206 -0
polysync-0.1.0/src/polysync/verify.py +118 -0
polysync-0.1.0/src/polysync.egg-info/PKG-INFO +115 -0
polysync-0.1.0/src/polysync.egg-info/SOURCES.txt +20 -0
polysync-0.1.0/src/polysync.egg-info/dependency_links.txt +1 -0
polysync-0.1.0/src/polysync.egg-info/entry_points.txt +2 -0
polysync-0.1.0/src/polysync.egg-info/requires.txt +5 -0
polysync-0.1.0/src/polysync.egg-info/top_level.txt +1 -0
polysync-0.1.0/tests/test_sync_synthetic.py +89 -0

polysync-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 王建硕 (Jian Shuo Wang)
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

polysync-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,115 @@
+Metadata-Version: 2.4
+Name: polysync
+Version: 0.1.0
+Summary: Multicam audio sync and director-style auto-edit — align N angles of one event by audio cross-correlation, then cut/PiP them into one MP4. Reversible sidecars, never re-encodes the originals.
+Author: 王建硕 (Jian Shuo Wang)
+License: MIT
+Project-URL: Homepage, https://github.com/jianshuo/polysync
+Project-URL: Issues, https://github.com/jianshuo/polysync/issues
+Keywords: multicam,audio-sync,video-editing,cross-correlation,ffmpeg,picture-in-picture,podcast,interview
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Console
+Classifier: Intended Audience :: End Users/Desktop
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Topic :: Multimedia :: Video
+Classifier: Topic :: Multimedia :: Sound/Audio :: Analysis
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy>=1.21
+Requires-Dist: scipy>=1.7
+Provides-Extra: dev
+Requires-Dist: pytest>=7; extra == "dev"
+Dynamic: license-file
+# polysync
+**Multicam audio sync + director-style auto-edit.** Align N recordings of the
+same event by audio cross-correlation, then cut or picture-in-picture them into
+a single MP4 — driven entirely by who's talking.
+What makes it different from "yet another sync tool":
+- **Reversible sidecars, never re-encodes the originals.** Sync writes a tiny
+  `<input>.sync.json` next to each file holding a single offset. A 75-min 4K
+  3-camera shoot is 250+ GB; baking offsets into re-encoded copies would double
+  that and lose quality. Downstream applies the offset with `ffmpeg -itsoffset`
+  at consume time. Originals are touched read-only, always.
+- **Envelope cross-correlation, not raw waveform.** Matches the log-energy
+  envelope, which both mics hear regardless of their frequency response — robust
+  even when a second camera's on-board mic sounds nothing like the main one.
+- **Clock-drift aware.** Cheap recorders drift 5–50 ppm; polysync fits the drift
+  across the recording and reports it separately, so long-form lip-sync can
+  correct it while camera-cut editing can ignore it.
+- **Handles the messy real cases.** Auto-picks the loudest audio track (pro
+  cameras often leave track 1 dead), partial-coverage clips that only span part
+  of the session, and independent verification of the result.
+## Install
+```bash
+pip install polysync          # once published
+# or, from a checkout:
+pip install -e ".[dev]"
+```
+Requires **Python ≥ 3.9** and **ffmpeg / ffprobe** on your `PATH`
+(`brew install ffmpeg`, `apt install ffmpeg`, …). Python deps: `numpy`, `scipy`.
+## Quickstart
+```bash
+# 1. Sync each angle to a reference camera (writes <file>.sync.json sidecars)
+polysync sync  CAM_A.mp4 CAM_B.mxf
+polysync sync  CAM_A.mp4 CAM_C.mxf
+# 2. (optional) Verify the alignment — re-checks residual independently
+polysync verify CAM_A.mp4 CAM_B.mxf CAM_B.mxf.sync.json
+# 3. Build an auto-edit decision list (who's on screen each second)
+polysync edit  CAM_A.mp4 CAM_B.mxf CAM_C.mxf --out edl.json
+# 4. Render — hard cuts, or with a picture-in-picture inset
+polysync render-cuts edl.json --out final.mp4
+polysync render-pip  edl.json --out final.mp4 --pip bottom-right
+```
+A clip that only covers **part** of the session (a Riverside / phone / lavalier
+recording that started mid-way):
+```bash
+polysync sync REFERENCE.mp4 PARTIAL.m4a --partial
+```
+## How it consumes the sidecar
+`delta_seconds` is the source's `t=0` in the reference's timeline (positive =
+source starts later). To align by hand:
+```bash
+ffmpeg -itsoffset $(jq -r .delta_seconds CAM_B.mxf.sync.json) -i CAM_B.mxf \
+       -i CAM_A.mp4 -filter_complex "[0:v][1:v]hstack" out.mp4
+```
+The `edit` / `render-*` commands read every sidecar automatically.
+## Python API
+```python
+from polysync import compute_sync           # pure-numpy core, unit-testable
+from polysync.sync import sync_files         # file → sidecar
+from polysync.verify import verify_files
+from polysync.edit import build_edl
+```
+## Status
+Beta (0.1). Sync + verify are battle-tested on real Sony FX3/FX6 multicam
+interview footage; the auto-edit is audio-energy-driven (no face detection).
+Issues and PRs welcome.
+## License
+MIT © 王建硕 (Jian Shuo Wang)

polysync-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,89 @@
+# polysync
+**Multicam audio sync + director-style auto-edit.** Align N recordings of the
+same event by audio cross-correlation, then cut or picture-in-picture them into
+a single MP4 — driven entirely by who's talking.
+What makes it different from "yet another sync tool":
+- **Reversible sidecars, never re-encodes the originals.** Sync writes a tiny
+  `<input>.sync.json` next to each file holding a single offset. A 75-min 4K
+  3-camera shoot is 250+ GB; baking offsets into re-encoded copies would double
+  that and lose quality. Downstream applies the offset with `ffmpeg -itsoffset`
+  at consume time. Originals are touched read-only, always.
+- **Envelope cross-correlation, not raw waveform.** Matches the log-energy
+  envelope, which both mics hear regardless of their frequency response — robust
+  even when a second camera's on-board mic sounds nothing like the main one.
+- **Clock-drift aware.** Cheap recorders drift 5–50 ppm; polysync fits the drift
+  across the recording and reports it separately, so long-form lip-sync can
+  correct it while camera-cut editing can ignore it.
+- **Handles the messy real cases.** Auto-picks the loudest audio track (pro
+  cameras often leave track 1 dead), partial-coverage clips that only span part
+  of the session, and independent verification of the result.
+## Install
+```bash
+pip install polysync          # once published
+# or, from a checkout:
+pip install -e ".[dev]"
+```
+Requires **Python ≥ 3.9** and **ffmpeg / ffprobe** on your `PATH`
+(`brew install ffmpeg`, `apt install ffmpeg`, …). Python deps: `numpy`, `scipy`.
+## Quickstart
+```bash
+# 1. Sync each angle to a reference camera (writes <file>.sync.json sidecars)
+polysync sync  CAM_A.mp4 CAM_B.mxf
+polysync sync  CAM_A.mp4 CAM_C.mxf
+# 2. (optional) Verify the alignment — re-checks residual independently
+polysync verify CAM_A.mp4 CAM_B.mxf CAM_B.mxf.sync.json
+# 3. Build an auto-edit decision list (who's on screen each second)
+polysync edit  CAM_A.mp4 CAM_B.mxf CAM_C.mxf --out edl.json
+# 4. Render — hard cuts, or with a picture-in-picture inset
+polysync render-cuts edl.json --out final.mp4
+polysync render-pip  edl.json --out final.mp4 --pip bottom-right
+```
+A clip that only covers **part** of the session (a Riverside / phone / lavalier
+recording that started mid-way):
+```bash
+polysync sync REFERENCE.mp4 PARTIAL.m4a --partial
+```
+## How it consumes the sidecar
+`delta_seconds` is the source's `t=0` in the reference's timeline (positive =
+source starts later). To align by hand:
+```bash
+ffmpeg -itsoffset $(jq -r .delta_seconds CAM_B.mxf.sync.json) -i CAM_B.mxf \
+       -i CAM_A.mp4 -filter_complex "[0:v][1:v]hstack" out.mp4
+```
+The `edit` / `render-*` commands read every sidecar automatically.
+## Python API
+```python
+from polysync import compute_sync           # pure-numpy core, unit-testable
+from polysync.sync import sync_files         # file → sidecar
+from polysync.verify import verify_files
+from polysync.edit import build_edl
+```
+## Status
+Beta (0.1). Sync + verify are battle-tested on real Sony FX3/FX6 multicam
+interview footage; the auto-edit is audio-energy-driven (no face detection).
+Issues and PRs welcome.
+## License
+MIT © 王建硕 (Jian Shuo Wang)

polysync-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,43 @@
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "polysync"
+version = "0.1.0"
+description = "Multicam audio sync and director-style auto-edit — align N angles of one event by audio cross-correlation, then cut/PiP them into one MP4. Reversible sidecars, never re-encodes the originals."
+readme = "README.md"
+requires-python = ">=3.9"
+license = { text = "MIT" }
+authors = [{ name = "王建硕 (Jian Shuo Wang)" }]
+keywords = ["multicam", "audio-sync", "video-editing", "cross-correlation", "ffmpeg", "picture-in-picture", "podcast", "interview"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Environment :: Console",
+    "Intended Audience :: End Users/Desktop",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Topic :: Multimedia :: Video",
+    "Topic :: Multimedia :: Sound/Audio :: Analysis",
+]
+dependencies = [
+    "numpy>=1.21",
+    "scipy>=1.7",
+]
+[project.optional-dependencies]
+dev = ["pytest>=7"]
+[project.urls]
+Homepage = "https://github.com/jianshuo/polysync"
+Issues = "https://github.com/jianshuo/polysync/issues"
+[project.scripts]
+polysync = "polysync.cli:main"
+[tool.setuptools.packages.find]
+where = ["src"]
+[tool.pytest.ini_options]
+testpaths = ["tests"]

polysync-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

polysync-0.1.0/src/polysync/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""polysync — multicam audio sync + director-style auto-edit.
+Align N recordings of one event by audio cross-correlation (envelope-based,
+robust at low SNR), emit reversible `.sync.json` sidecars (originals are never
+re-encoded), then auto-cut / picture-in-picture them into a single MP4.
+Public API:
+    from polysync import compute_sync, SyncResult, SyncError
+    from polysync.sidecar import read_sidecar, write_sidecar
+"""
+from .sync import compute_sync, SyncResult, SyncError
+from .sidecar import read_sidecar, write_sidecar, sidecar_path, SCHEMA_VERSION
+__version__ = "0.1.0"
+__all__ = [
+    "compute_sync", "SyncResult", "SyncError",
+    "read_sidecar", "write_sidecar", "sidecar_path", "SCHEMA_VERSION",
+    "__version__",
+]

polysync-0.1.0/src/polysync/audio.py ADDED Viewed

@@ -0,0 +1,130 @@
+"""Shared audio primitives — the pieces sync, verify, and edit all need.
+Everything here is either pure numpy/scipy (unit-testable without media) or a
+thin ffmpeg/ffprobe wrapper. Keeping these in one place is the whole reason
+polysync is a package and not three copy-pasted scripts.
+"""
+import subprocess
+from pathlib import Path
+import numpy as np
+from scipy import signal
+def loudest_audio_stream(video_path):
+    """Return the index N of the audio stream (`0:a:N`) with the highest mean
+    volume, probed over a 60 s window mid-file.
+    Why this matters: pro cameras often record multiple audio tracks where the
+    first one is dead. Sony FX6 MXF clips carry 4 mono PCM tracks and commonly
+    leave a:0 / a:1 silent (~-90 dB) with the real room mic on a:2 / a:3.
+    Hard-coding `0:a:0` would cross-correlate silence and fail to sync, so pick
+    the loudest track instead. Single-stream files (most MP4 cams) short-circuit
+    to a:0.
+    """
+    video_path = Path(video_path)
+    streams = subprocess.run(
+        ["ffprobe", "-v", "error", "-select_streams", "a",
+         "-show_entries", "stream=index", "-of", "csv=p=0", str(video_path)],
+        check=True, capture_output=True, text=True,
+    ).stdout.strip().splitlines()
+    if len(streams) <= 1:
+        return 0
+    best_idx, best_db = 0, -1e9
+    for ch in range(len(streams)):
+        err = subprocess.run(
+            ["ffmpeg", "-nostdin", "-hide_banner", "-ss", "300", "-t", "60",
+             "-i", str(video_path), "-map", "0:a:%d" % ch,
+             "-af", "volumedetect", "-f", "null", "-"],
+            capture_output=True, text=True,
+        ).stderr
+        for line in err.splitlines():
+            if "mean_volume" in line:
+                try:
+                    db = float(line.split("mean_volume:")[1].strip().split()[0])
+                except (IndexError, ValueError):
+                    db = -1e9
+                if db > best_db:
+                    best_db, best_idx = db, ch
+                break
+    print("  [%s] loudest audio stream: a:%d (%.1f dB)"
+          % (video_path.name, best_idx, best_db))
+    return best_idx
+def extract_pcm(video_path, dst, sr, stream=None):
+    """Extract one audio track as mono signed-16 PCM at `sr` Hz.
+    `stream` is the `0:a:N` index; if None, auto-select the loudest track.
+    No `-itsoffset` is ever applied here — offsets are pure metadata and are
+    handled by index arithmetic / `-itsoffset` at consume time downstream.
+    """
+    video_path = Path(video_path)
+    ch = loudest_audio_stream(video_path) if stream is None else stream
+    subprocess.run(
+        ["ffmpeg", "-nostdin", "-y", "-i", str(video_path),
+         "-map", "0:a:%d" % ch, "-ac", "1", "-ar", str(sr),
+         "-f", "s16le", str(dst)],
+        check=True, stderr=subprocess.DEVNULL,
+    )
+def read_pcm(path):
+    """Read a raw s16le file into a float32 array."""
+    return np.fromfile(str(path), dtype=np.int16).astype(np.float32)
+def media_duration(path):
+    """Container duration in seconds, via ffprobe."""
+    out = subprocess.run(
+        ["ffprobe", "-v", "error", "-show_entries", "format=duration",
+         "-of", "default=nw=1:nk=1", str(path)],
+        check=True, capture_output=True, text=True,
+    )
+    return float(out.stdout.strip())
+def frame_rms(x, sr, hop_ms=10, win_ms=50):
+    """Sliding-window RMS of `x`. Returns (rms_per_frame, frame_sr_hz).
+    Uses a cumulative-sum trick so it's O(n) regardless of window size. This is
+    the shared primitive behind both the sync envelope (log of this, high-passed)
+    and the edit per-second loudness.
+    """
+    hop = int(sr * hop_ms / 1000)
+    win = int(sr * win_ms / 1000)
+    n = (len(x) - win) // hop + 1
+    if n <= 0:
+        return np.zeros(0, dtype=np.float32), sr / hop
+    sq = x.astype(np.float64) ** 2
+    csq = np.concatenate([[0.0], np.cumsum(sq)])
+    out = np.empty(n, dtype=np.float32)
+    for i in range(n):
+        s = i * hop
+        out[i] = np.sqrt(max(1e-9, (csq[s + win] - csq[s]) / win))
+    return out, sr / hop
+def log_envelope(x, sr, hop_ms=10, win_ms=50, highpass_hz=0.05):
+    """Log-energy envelope, high-passed to strip slow gain/drift offsets.
+    This is what sync cross-correlates: it captures dialogue/music dynamics
+    that BOTH mics hear regardless of their frequency response — the reason
+    the matcher is robust even when the two cameras have very different mics.
+    """
+    rms, fsr = frame_rms(x, sr, hop_ms, win_ms)
+    env = np.log(rms + 1e-3)
+    if highpass_hz:
+        env = highpass(env, fsr, highpass_hz)
+    return env, fsr
+def highpass(x, fs, cut_hz=0.05):
+    sos = signal.butter(2, cut_hz, btype="high", fs=fs, output="sos")
+    return signal.sosfiltfilt(sos, x).astype(np.float32)
+def normalize(x):
+    x = x - x.mean()
+    s = x.std()
+    return x / s if s > 0 else x

polysync-0.1.0/src/polysync/cli.py ADDED Viewed

@@ -0,0 +1,79 @@
+"""`polysync` command-line entry point.
+    polysync sync        REFERENCE SOURCE [--partial]
+    polysync verify      REFERENCE SOURCE SIDECAR [--apply-drift]
+    polysync edit        IN1 IN2 ... --out edl.json [--mode rotation|greedy]
+    polysync render-cuts EDL --out out.mp4
+    polysync render-pip  EDL --out out.mp4 [--pip bottom-right]
+"""
+import argparse
+import sys
+from . import __version__
+from .sync import sync_files, SyncError
+from .verify import verify_files
+from .edit import autoedit, render_cuts, render_pip
+USAGE = __doc__
+def _cmd_sync(argv):
+    ap = argparse.ArgumentParser(prog="polysync sync")
+    ap.add_argument("reference", help="Reference recording (defines the timeline)")
+    ap.add_argument("source", help="Source to align to the reference")
+    ap.add_argument("--partial", action="store_true",
+                    help="Lenient mode for a source covering only part of the "
+                         "reference's span; degrades gracefully, writes only the "
+                         "source sidecar.")
+    args = ap.parse_args(argv)
+    try:
+        sync_files(args.reference, args.source, partial=args.partial)
+    except SyncError as e:
+        print("ERROR: %s" % e, file=sys.stderr)
+        return 1
+    return 0
+def _cmd_verify(argv):
+    ap = argparse.ArgumentParser(prog="polysync verify")
+    ap.add_argument("reference")
+    ap.add_argument("source")
+    ap.add_argument("sidecar", help="The source's <source>.sync.json")
+    ap.add_argument("--apply-drift", action="store_true")
+    ap.add_argument("--step", type=float, default=600.0,
+                    help="Probe spacing in seconds (default 10 min)")
+    args = ap.parse_args(argv)
+    try:
+        passed, _ = verify_files(args.reference, args.source, args.sidecar,
+                                 step=args.step, apply_drift=args.apply_drift)
+    except ValueError as e:
+        print("ERROR: %s" % e, file=sys.stderr)
+        return 2
+    return 0 if passed else 1
+def main(argv=None):
+    argv = list(sys.argv[1:] if argv is None else argv)
+    if not argv or argv[0] in ("-h", "--help", "help"):
+        print(USAGE)
+        return 0
+    if argv[0] in ("-V", "--version"):
+        print("polysync %s" % __version__)
+        return 0
+    cmd, rest = argv[0], argv[1:]
+    dispatch = {
+        "sync": _cmd_sync,
+        "verify": _cmd_verify,
+        "edit": lambda a: autoedit.main(a) or 0,
+        "render-cuts": lambda a: render_cuts.main(a) or 0,
+        "render-pip": lambda a: render_pip.main(a) or 0,
+    }
+    if cmd not in dispatch:
+        print("Unknown command %r.\n%s" % (cmd, USAGE), file=sys.stderr)
+        return 2
+    return dispatch[cmd](rest)
+if __name__ == "__main__":
+    sys.exit(main())

polysync-0.1.0/src/polysync/edit/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Director-style multicam auto-edit on top of polysync sidecars.
+autoedit  — build an EDL (which cam is on screen each second) from synced inputs
+render_cuts — render the EDL to one MP4 (hard cuts)
+render_pip  — render the EDL with a picture-in-picture inset
+"""
+from .autoedit import build_edl
+__all__ = ["build_edl"]