double-ender-sync 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- double_ender_sync-0.1.0/LICENSE +21 -0
- double_ender_sync-0.1.0/MANIFEST.in +32 -0
- double_ender_sync-0.1.0/PKG-INFO +316 -0
- double_ender_sync-0.1.0/README.md +280 -0
- double_ender_sync-0.1.0/THIRD_PARTY_NOTICES.md +31 -0
- double_ender_sync-0.1.0/pyproject.toml +67 -0
- double_ender_sync-0.1.0/setup.cfg +4 -0
- double_ender_sync-0.1.0/src/double_ender_sync/__init__.py +6 -0
- double_ender_sync-0.1.0/src/double_ender_sync/alignment/local_adjust.py +143 -0
- double_ender_sync-0.1.0/src/double_ender_sync/alignment/offset.py +80 -0
- double_ender_sync-0.1.0/src/double_ender_sync/alignment/timeline.py +104 -0
- double_ender_sync-0.1.0/src/double_ender_sync/analysis/anchors.py +48 -0
- double_ender_sync-0.1.0/src/double_ender_sync/analysis/drift.py +145 -0
- double_ender_sync-0.1.0/src/double_ender_sync/analysis/features.py +43 -0
- double_ender_sync-0.1.0/src/double_ender_sync/analysis/vad.py +55 -0
- double_ender_sync-0.1.0/src/double_ender_sync/api.py +78 -0
- double_ender_sync-0.1.0/src/double_ender_sync/audio/io.py +94 -0
- double_ender_sync-0.1.0/src/double_ender_sync/audio/normalize.py +10 -0
- double_ender_sync-0.1.0/src/double_ender_sync/audio/render.py +21 -0
- double_ender_sync-0.1.0/src/double_ender_sync/audio/resample.py +15 -0
- double_ender_sync-0.1.0/src/double_ender_sync/cli.py +313 -0
- double_ender_sync-0.1.0/src/double_ender_sync/gui.py +399 -0
- double_ender_sync-0.1.0/src/double_ender_sync/i18n/__init__.py +4 -0
- double_ender_sync-0.1.0/src/double_ender_sync/i18n/catalog.py +67 -0
- double_ender_sync-0.1.0/src/double_ender_sync/i18n/locales/en.json +46 -0
- double_ender_sync-0.1.0/src/double_ender_sync/i18n/locales/ja.json +46 -0
- double_ender_sync-0.1.0/src/double_ender_sync/i18n/resolver.py +39 -0
- double_ender_sync-0.1.0/src/double_ender_sync/i18n/validate.py +89 -0
- double_ender_sync-0.1.0/src/double_ender_sync/report/report.py +183 -0
- double_ender_sync-0.1.0/src/double_ender_sync/types.py +17 -0
- double_ender_sync-0.1.0/src/double_ender_sync.egg-info/PKG-INFO +316 -0
- double_ender_sync-0.1.0/src/double_ender_sync.egg-info/SOURCES.txt +34 -0
- double_ender_sync-0.1.0/src/double_ender_sync.egg-info/dependency_links.txt +1 -0
- double_ender_sync-0.1.0/src/double_ender_sync.egg-info/entry_points.txt +4 -0
- double_ender_sync-0.1.0/src/double_ender_sync.egg-info/requires.txt +21 -0
- double_ender_sync-0.1.0/src/double_ender_sync.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 double-ender-sync contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
include README.md
|
|
2
|
+
include LICENSE
|
|
3
|
+
include THIRD_PARTY_NOTICES.md
|
|
4
|
+
|
|
5
|
+
# package source
|
|
6
|
+
graft src
|
|
7
|
+
|
|
8
|
+
# exclude development/editor/agent metadata
|
|
9
|
+
prune .devcontainer
|
|
10
|
+
prune .vscode
|
|
11
|
+
exclude .editorconfig
|
|
12
|
+
exclude .gitleaks.toml
|
|
13
|
+
exclude .pre-commit-config.yaml
|
|
14
|
+
exclude AGENTS.md
|
|
15
|
+
exclude CLAUDE.md
|
|
16
|
+
exclude GEMINI.md
|
|
17
|
+
|
|
18
|
+
# exclude VCS/CI/build/cache artifacts
|
|
19
|
+
prune tests
|
|
20
|
+
prune docs
|
|
21
|
+
prune .git
|
|
22
|
+
prune .github
|
|
23
|
+
prune .venv
|
|
24
|
+
prune venv
|
|
25
|
+
prune dist
|
|
26
|
+
prune build
|
|
27
|
+
prune *.egg-info
|
|
28
|
+
prune .pytest_cache
|
|
29
|
+
prune .mypy_cache
|
|
30
|
+
prune .ruff_cache
|
|
31
|
+
|
|
32
|
+
global-exclude __pycache__ *.py[cod] .DS_Store *.log
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: double-ender-sync
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Double-ender time alignment engine for podcast production
|
|
5
|
+
Author: Toshiyuki Ogura
|
|
6
|
+
Keywords: podcast,double-ender,audio,alignment,sync
|
|
7
|
+
Classifier: Development Status :: 3 - Alpha
|
|
8
|
+
Classifier: Environment :: Console
|
|
9
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: numpy<2,>=1.26
|
|
19
|
+
Requires-Dist: scipy>=1.11
|
|
20
|
+
Requires-Dist: soundfile>=0.12
|
|
21
|
+
Requires-Dist: librosa>=0.10
|
|
22
|
+
Requires-Dist: pytest
|
|
23
|
+
Requires-Dist: PySide6>=6.7
|
|
24
|
+
Provides-Extra: stretch
|
|
25
|
+
Requires-Dist: librosa>=0.10; extra == "stretch"
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest; extra == "dev"
|
|
28
|
+
Requires-Dist: PySide6>=6.7; extra == "dev"
|
|
29
|
+
Provides-Extra: gui
|
|
30
|
+
Requires-Dist: PySide6>=6.7; extra == "gui"
|
|
31
|
+
Provides-Extra: all
|
|
32
|
+
Requires-Dist: librosa>=0.10; extra == "all"
|
|
33
|
+
Requires-Dist: pytest; extra == "all"
|
|
34
|
+
Requires-Dist: PySide6>=6.7; extra == "all"
|
|
35
|
+
Dynamic: license-file
|
|
36
|
+
|
|
37
|
+
# double-ender-sync
|
|
38
|
+
|
|
39
|
+
`double-ender-sync` is a CLI tool that aligns each speaker's local recording to a mixed reference recording ("master") for podcast post-production.
|
|
40
|
+
|
|
41
|
+
It focuses on **time alignment and diagnostics**, not final audio mixing.
|
|
42
|
+
|
|
43
|
+
## Project status
|
|
44
|
+
|
|
45
|
+
This project is currently **experimental (alpha)**.
|
|
46
|
+
|
|
47
|
+
It can produce useful alignment results for some double-ender podcast recordings, but it is not yet a fully validated production-grade editor. Always review generated reports, markers, warnings, and synced audio manually before using outputs in final production.
|
|
48
|
+
|
|
49
|
+
## What this tool does
|
|
50
|
+
|
|
51
|
+
- Detects initial timing offset between each local track and the master.
|
|
52
|
+
- Estimates long-duration clock drift from multiple anchor points.
|
|
53
|
+
- Applies global time correction and exports synced WAV files.
|
|
54
|
+
- Produces alignment diagnostics (`sync-report.json`, markers, warnings) so editors can review confidence and problem areas.
|
|
55
|
+
|
|
56
|
+
Offset definition:
|
|
57
|
+
|
|
58
|
+
```text
|
|
59
|
+
offset_seconds = master_time - local_time
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Install
|
|
63
|
+
|
|
64
|
+
### Requirements
|
|
65
|
+
|
|
66
|
+
- Python 3.11+
|
|
67
|
+
- WAV input files for master and local tracks
|
|
68
|
+
|
|
69
|
+
### From source
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install .
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Development install
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
python -m venv .venv
|
|
79
|
+
source .venv/bin/activate
|
|
80
|
+
python -m pip install --upgrade pip
|
|
81
|
+
pip install -e .
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Run tests:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
pip install -e ".[dev]"
|
|
88
|
+
pytest
|
|
89
|
+
|
|
90
|
+
# for pitch-preserving stretch option
|
|
91
|
+
pip install -e ".[stretch]"
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
After installation, the command is available as:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
double-ender-sync --help
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Quick start
|
|
101
|
+
|
|
102
|
+
Input example:
|
|
103
|
+
|
|
104
|
+
```text
|
|
105
|
+
input/
|
|
106
|
+
master.wav
|
|
107
|
+
speaker-a.wav
|
|
108
|
+
speaker-b.wav
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Run:
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
double-ender-sync \
|
|
115
|
+
--master input/master.wav \
|
|
116
|
+
--track input/speaker-a.wav \
|
|
117
|
+
--track input/speaker-b.wav \
|
|
118
|
+
--out output/
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Output files
|
|
122
|
+
|
|
123
|
+
Typical output:
|
|
124
|
+
|
|
125
|
+
```text
|
|
126
|
+
output/
|
|
127
|
+
speaker-a.synced.wav
|
|
128
|
+
speaker-b.synced.wav
|
|
129
|
+
sync-report.json
|
|
130
|
+
sync-markers.csv
|
|
131
|
+
warnings.txt
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Useful options
|
|
135
|
+
|
|
136
|
+
- `--analysis-sample-rate 16000`
|
|
137
|
+
Set analysis sample rate used for feature extraction/matching.
|
|
138
|
+
- `--local-adjust-enabled`
|
|
139
|
+
Enable **experimental** optional local adjustment around large residual errors. This is disabled by default and should only be used after manual report/audio review.
|
|
140
|
+
- `--local-adjust-threshold-ms 80`
|
|
141
|
+
Threshold for triggering local adjustment diagnostics/correction.
|
|
142
|
+
- `--normalize-output`
|
|
143
|
+
Normalize final synced WAV peak level before writing. Disabled by default.
|
|
144
|
+
- `--stretch-ratio-warning-threshold 0.003`
|
|
145
|
+
Warn when `abs(stretch_ratio - 1.0)` exceeds threshold (default `0.003` = 0.3%).
|
|
146
|
+
- `--stretch-ratio-auto-continue`
|
|
147
|
+
Skip interactive confirmation and continue even when stretch ratio warning threshold is exceeded.
|
|
148
|
+
- `--stretch-method {resample,pitch_preserving}`
|
|
149
|
+
Global correction method. `resample` is default. `pitch_preserving` uses librosa and prioritizes pitch stability for larger drift corrections.
|
|
150
|
+
- `--debug`
|
|
151
|
+
Enable debug logging to identify which stage is running when resource usage spikes.
|
|
152
|
+
- `--log-file output/debug.log`
|
|
153
|
+
Write logs to a specific file path (default: `output/double-ender-sync.log`).
|
|
154
|
+
|
|
155
|
+
Use `double-ender-sync --help` for the full option list.
|
|
156
|
+
|
|
157
|
+
### GUI (PySide6, drag & drop)
|
|
158
|
+
|
|
159
|
+
This project also provides an optional desktop GUI built with PySide6.
|
|
160
|
+
|
|
161
|
+
Install with GUI dependency:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
pip install -e ".[gui]"
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
Launch GUI:
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
double-ender-sync-gui
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### Language option (`--lang`) common specification
|
|
174
|
+
|
|
175
|
+
Project-wide behavior for language resolution is fixed as follows:
|
|
176
|
+
|
|
177
|
+
- `--lang <code>` is accepted (for example: `en`, `ja`).
|
|
178
|
+
- If `--lang` is omitted, system locale is used (`LC_ALL` then `LANG`).
|
|
179
|
+
- If the normalized language is unsupported, fallback is `en`.
|
|
180
|
+
- Regional codes are normalized to their language part before support checks (for example: `en-US` -> `en`, `ja_JP.UTF-8` -> `ja`).
|
|
181
|
+
- GUI applies this resolver first, and the same resolver is reusable from CLI/API so each entry point does not need separate language detection logic.
|
|
182
|
+
|
|
183
|
+
Examples:
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
double-ender-sync-gui --lang en
|
|
187
|
+
double-ender-sync-gui --lang ja
|
|
188
|
+
double-ender-sync-gui
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
GUI features (current):
|
|
192
|
+
|
|
193
|
+
- Select `master.wav`
|
|
194
|
+
- Drag and drop multiple speaker `.wav` tracks
|
|
195
|
+
- Choose output directory
|
|
196
|
+
- Run the same alignment pipeline as CLI
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
## Python API (import from another project)
|
|
201
|
+
|
|
202
|
+
In addition to CLI usage, you can run the same pipeline from Python.
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
from pathlib import Path
|
|
206
|
+
|
|
207
|
+
from double_ender_sync import AlignmentOptions, run_alignment
|
|
208
|
+
|
|
209
|
+
options = AlignmentOptions(
|
|
210
|
+
master=Path("input/master.wav"),
|
|
211
|
+
tracks=[Path("input/speaker-a.wav"), Path("input/speaker-b.wav")],
|
|
212
|
+
out=Path("output"),
|
|
213
|
+
analysis_sample_rate=16000,
|
|
214
|
+
local_adjust_enabled=False,
|
|
215
|
+
normalize_output=False,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
exit_code = run_alignment(options)
|
|
219
|
+
if exit_code != 0:
|
|
220
|
+
raise RuntimeError(f"alignment failed with exit code {exit_code}")
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
`run_alignment(...)` returns the same exit code semantics as the CLI `main(...)`.
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
## Translation operations rules
|
|
227
|
+
|
|
228
|
+
- Translation keys are domain-prefixed and stable (`gui.*`, `cli.*`, `api.*`, `errors.*`, `warnings.*`).
|
|
229
|
+
- Never use display text itself as a key.
|
|
230
|
+
- Missing key behavior is unified:
|
|
231
|
+
- If the target locale does not have the key, fallback to `en`.
|
|
232
|
+
- If `en` also does not have the key, show the key string and emit a warning log.
|
|
233
|
+
- Placeholder formatting is unified (for example: `"File not found: {path}"`).
|
|
234
|
+
- Placeholder names must match exactly across all languages for the same key.
|
|
235
|
+
|
|
236
|
+
### Adding a new language
|
|
237
|
+
|
|
238
|
+
1. Add a locale file: `src/double_ender_sync/i18n/locales/<lang>.json`.
|
|
239
|
+
2. Add `<lang>` to `SUPPORTED_LANGUAGES` in `src/double_ender_sync/i18n/resolver.py`.
|
|
240
|
+
3. Run required key validation: `double-ender-sync-validate-locales` (or `python -m double_ender_sync.i18n.validate`).
|
|
241
|
+
4. Verify UI rendering manually:
|
|
242
|
+
- launch `double-ender-sync-gui` with your locale selected,
|
|
243
|
+
- confirm labels/dialog/errors render correctly,
|
|
244
|
+
- run one alignment and check runtime messages/logs.
|
|
245
|
+
|
|
246
|
+
## Intended use case
|
|
247
|
+
|
|
248
|
+
This tool is intended for podcast double-ender workflows where:
|
|
249
|
+
|
|
250
|
+
- each participant records a local WAV file,
|
|
251
|
+
- a mixed call recording is available as timing reference,
|
|
252
|
+
- local recordings contain enough speech anchors across the session,
|
|
253
|
+
- final output is reviewed and edited by a human in a DAW.
|
|
254
|
+
|
|
255
|
+
It may perform poorly when:
|
|
256
|
+
|
|
257
|
+
- the master recording is heavily compressed/noisy or missing large sections,
|
|
258
|
+
- a local track contains very little speech,
|
|
259
|
+
- local and master recordings contain different edits,
|
|
260
|
+
- long dropouts or repeated phrases confuse anchor matching,
|
|
261
|
+
- timing changes are non-linear and not well approximated by a simple drift model.
|
|
262
|
+
|
|
263
|
+
## Reviewing the result
|
|
264
|
+
|
|
265
|
+
After running the tool, inspect:
|
|
266
|
+
|
|
267
|
+
- `warnings.txt` for low-confidence regions and skipped adjustments,
|
|
268
|
+
- `sync-markers.csv` for anchor/residual positions,
|
|
269
|
+
- `sync-report.json` for per-track offset/stretch/residual diagnostics,
|
|
270
|
+
- exported `.synced.wav` files by listening in your DAW.
|
|
271
|
+
|
|
272
|
+
Do not treat generated synced files as final mastered audio.
|
|
273
|
+
|
|
274
|
+
## Temporary files
|
|
275
|
+
|
|
276
|
+
This tool creates temporary memory-mapped files during analysis to reduce peak RAM usage for long recordings. These temporary files are cleaned up at the end of a normal CLI run.
|
|
277
|
+
|
|
278
|
+
## Current implementation status
|
|
279
|
+
|
|
280
|
+
Implemented pipeline includes:
|
|
281
|
+
|
|
282
|
+
1. audio loading and normalization for analysis,
|
|
283
|
+
2. speech-region detection (RMS-based),
|
|
284
|
+
3. anchor selection and matching against master,
|
|
285
|
+
4. initial offset estimation,
|
|
286
|
+
5. multi-anchor linear drift estimation,
|
|
287
|
+
6. global correction and synced WAV export,
|
|
288
|
+
7. detailed reporting with warnings/errors.
|
|
289
|
+
|
|
290
|
+
## Scope and non-goals
|
|
291
|
+
|
|
292
|
+
This project does **not** do final podcast mastering tasks such as:
|
|
293
|
+
|
|
294
|
+
- noise reduction,
|
|
295
|
+
- EQ/compression/loudness normalization,
|
|
296
|
+
- transcript-based editing,
|
|
297
|
+
- final mixdown/publishing.
|
|
298
|
+
|
|
299
|
+
The expected workflow is:
|
|
300
|
+
|
|
301
|
+
```text
|
|
302
|
+
raw recordings -> double-ender-sync -> synced WAV + report -> human DAW edit
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
## Licensing and distribution policy
|
|
306
|
+
|
|
307
|
+
Project code is MIT licensed.
|
|
308
|
+
|
|
309
|
+
Current policy is **source-only distribution** from this repository. No official prebuilt binaries are published.
|
|
310
|
+
|
|
311
|
+
Before publishing any binary builds in the future, review third-party obligations (especially LGPL-related components) and update distribution/legal documentation accordingly.
|
|
312
|
+
|
|
313
|
+
See:
|
|
314
|
+
|
|
315
|
+
- `THIRD_PARTY_NOTICES.md`
|
|
316
|
+
- `docs/licensing-source-only.md`
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
# double-ender-sync
|
|
2
|
+
|
|
3
|
+
`double-ender-sync` is a CLI tool that aligns each speaker's local recording to a mixed reference recording ("master") for podcast post-production.
|
|
4
|
+
|
|
5
|
+
It focuses on **time alignment and diagnostics**, not final audio mixing.
|
|
6
|
+
|
|
7
|
+
## Project status
|
|
8
|
+
|
|
9
|
+
This project is currently **experimental (alpha)**.
|
|
10
|
+
|
|
11
|
+
It can produce useful alignment results for some double-ender podcast recordings, but it is not yet a fully validated production-grade editor. Always review generated reports, markers, warnings, and synced audio manually before using outputs in final production.
|
|
12
|
+
|
|
13
|
+
## What this tool does
|
|
14
|
+
|
|
15
|
+
- Detects initial timing offset between each local track and the master.
|
|
16
|
+
- Estimates long-duration clock drift from multiple anchor points.
|
|
17
|
+
- Applies global time correction and exports synced WAV files.
|
|
18
|
+
- Produces alignment diagnostics (`sync-report.json`, markers, warnings) so editors can review confidence and problem areas.
|
|
19
|
+
|
|
20
|
+
Offset definition:
|
|
21
|
+
|
|
22
|
+
```text
|
|
23
|
+
offset_seconds = master_time - local_time
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Install
|
|
27
|
+
|
|
28
|
+
### Requirements
|
|
29
|
+
|
|
30
|
+
- Python 3.11+
|
|
31
|
+
- WAV input files for master and local tracks
|
|
32
|
+
|
|
33
|
+
### From source
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install .
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Development install
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
python -m venv .venv
|
|
43
|
+
source .venv/bin/activate
|
|
44
|
+
python -m pip install --upgrade pip
|
|
45
|
+
pip install -e .
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Run tests:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install -e ".[dev]"
|
|
52
|
+
pytest
|
|
53
|
+
|
|
54
|
+
# for pitch-preserving stretch option
|
|
55
|
+
pip install -e ".[stretch]"
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
After installation, the command is available as:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
double-ender-sync --help
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Quick start
|
|
65
|
+
|
|
66
|
+
Input example:
|
|
67
|
+
|
|
68
|
+
```text
|
|
69
|
+
input/
|
|
70
|
+
master.wav
|
|
71
|
+
speaker-a.wav
|
|
72
|
+
speaker-b.wav
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Run:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
double-ender-sync \
|
|
79
|
+
--master input/master.wav \
|
|
80
|
+
--track input/speaker-a.wav \
|
|
81
|
+
--track input/speaker-b.wav \
|
|
82
|
+
--out output/
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Output files
|
|
86
|
+
|
|
87
|
+
Typical output:
|
|
88
|
+
|
|
89
|
+
```text
|
|
90
|
+
output/
|
|
91
|
+
speaker-a.synced.wav
|
|
92
|
+
speaker-b.synced.wav
|
|
93
|
+
sync-report.json
|
|
94
|
+
sync-markers.csv
|
|
95
|
+
warnings.txt
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Useful options
|
|
99
|
+
|
|
100
|
+
- `--analysis-sample-rate 16000`
|
|
101
|
+
Set analysis sample rate used for feature extraction/matching.
|
|
102
|
+
- `--local-adjust-enabled`
|
|
103
|
+
Enable **experimental** optional local adjustment around large residual errors. This is disabled by default and should only be used after manual report/audio review.
|
|
104
|
+
- `--local-adjust-threshold-ms 80`
|
|
105
|
+
Threshold for triggering local adjustment diagnostics/correction.
|
|
106
|
+
- `--normalize-output`
|
|
107
|
+
Normalize final synced WAV peak level before writing. Disabled by default.
|
|
108
|
+
- `--stretch-ratio-warning-threshold 0.003`
|
|
109
|
+
Warn when `abs(stretch_ratio - 1.0)` exceeds threshold (default `0.003` = 0.3%).
|
|
110
|
+
- `--stretch-ratio-auto-continue`
|
|
111
|
+
Skip interactive confirmation and continue even when stretch ratio warning threshold is exceeded.
|
|
112
|
+
- `--stretch-method {resample,pitch_preserving}`
|
|
113
|
+
Global correction method. `resample` is default. `pitch_preserving` uses librosa and prioritizes pitch stability for larger drift corrections.
|
|
114
|
+
- `--debug`
|
|
115
|
+
Enable debug logging to identify which stage is running when resource usage spikes.
|
|
116
|
+
- `--log-file output/debug.log`
|
|
117
|
+
Write logs to a specific file path (default: `output/double-ender-sync.log`).
|
|
118
|
+
|
|
119
|
+
Use `double-ender-sync --help` for the full option list.
|
|
120
|
+
|
|
121
|
+
### GUI (PySide6, drag & drop)
|
|
122
|
+
|
|
123
|
+
This project also provides an optional desktop GUI built with PySide6.
|
|
124
|
+
|
|
125
|
+
Install with GUI dependency:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
pip install -e ".[gui]"
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Launch GUI:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
double-ender-sync-gui
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Language option (`--lang`) common specification
|
|
138
|
+
|
|
139
|
+
Project-wide behavior for language resolution is fixed as follows:
|
|
140
|
+
|
|
141
|
+
- `--lang <code>` is accepted (for example: `en`, `ja`).
|
|
142
|
+
- If `--lang` is omitted, system locale is used (`LC_ALL` then `LANG`).
|
|
143
|
+
- If the normalized language is unsupported, fallback is `en`.
|
|
144
|
+
- Regional codes are normalized to their language part before support checks (for example: `en-US` -> `en`, `ja_JP.UTF-8` -> `ja`).
|
|
145
|
+
- GUI applies this resolver first, and the same resolver is reusable from CLI/API so each entry point does not need separate language detection logic.
|
|
146
|
+
|
|
147
|
+
Examples:
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
double-ender-sync-gui --lang en
|
|
151
|
+
double-ender-sync-gui --lang ja
|
|
152
|
+
double-ender-sync-gui
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
GUI features (current):
|
|
156
|
+
|
|
157
|
+
- Select `master.wav`
|
|
158
|
+
- Drag and drop multiple speaker `.wav` tracks
|
|
159
|
+
- Choose output directory
|
|
160
|
+
- Run the same alignment pipeline as CLI
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
## Python API (import from another project)
|
|
165
|
+
|
|
166
|
+
In addition to CLI usage, you can run the same pipeline from Python.
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
from pathlib import Path
|
|
170
|
+
|
|
171
|
+
from double_ender_sync import AlignmentOptions, run_alignment
|
|
172
|
+
|
|
173
|
+
options = AlignmentOptions(
|
|
174
|
+
master=Path("input/master.wav"),
|
|
175
|
+
tracks=[Path("input/speaker-a.wav"), Path("input/speaker-b.wav")],
|
|
176
|
+
out=Path("output"),
|
|
177
|
+
analysis_sample_rate=16000,
|
|
178
|
+
local_adjust_enabled=False,
|
|
179
|
+
normalize_output=False,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
exit_code = run_alignment(options)
|
|
183
|
+
if exit_code != 0:
|
|
184
|
+
raise RuntimeError(f"alignment failed with exit code {exit_code}")
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
`run_alignment(...)` returns the same exit code semantics as the CLI `main(...)`.
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
## Translation operations rules
|
|
191
|
+
|
|
192
|
+
- Translation keys are domain-prefixed and stable (`gui.*`, `cli.*`, `api.*`, `errors.*`, `warnings.*`).
|
|
193
|
+
- Never use display text itself as a key.
|
|
194
|
+
- Missing key behavior is unified:
|
|
195
|
+
- If the target locale does not have the key, fallback to `en`.
|
|
196
|
+
- If `en` also does not have the key, show the key string and emit a warning log.
|
|
197
|
+
- Placeholder formatting is unified (for example: `"File not found: {path}"`).
|
|
198
|
+
- Placeholder names must match exactly across all languages for the same key.
|
|
199
|
+
|
|
200
|
+
### Adding a new language
|
|
201
|
+
|
|
202
|
+
1. Add a locale file: `src/double_ender_sync/i18n/locales/<lang>.json`.
|
|
203
|
+
2. Add `<lang>` to `SUPPORTED_LANGUAGES` in `src/double_ender_sync/i18n/resolver.py`.
|
|
204
|
+
3. Run required key validation: `double-ender-sync-validate-locales` (or `python -m double_ender_sync.i18n.validate`).
|
|
205
|
+
4. Verify UI rendering manually:
|
|
206
|
+
- launch `double-ender-sync-gui` with your locale selected,
|
|
207
|
+
- confirm labels/dialog/errors render correctly,
|
|
208
|
+
- run one alignment and check runtime messages/logs.
|
|
209
|
+
|
|
210
|
+
## Intended use case
|
|
211
|
+
|
|
212
|
+
This tool is intended for podcast double-ender workflows where:
|
|
213
|
+
|
|
214
|
+
- each participant records a local WAV file,
|
|
215
|
+
- a mixed call recording is available as timing reference,
|
|
216
|
+
- local recordings contain enough speech anchors across the session,
|
|
217
|
+
- final output is reviewed and edited by a human in a DAW.
|
|
218
|
+
|
|
219
|
+
It may perform poorly when:
|
|
220
|
+
|
|
221
|
+
- the master recording is heavily compressed/noisy or missing large sections,
|
|
222
|
+
- a local track contains very little speech,
|
|
223
|
+
- local and master recordings contain different edits,
|
|
224
|
+
- long dropouts or repeated phrases confuse anchor matching,
|
|
225
|
+
- timing changes are non-linear and not well approximated by a simple drift model.
|
|
226
|
+
|
|
227
|
+
## Reviewing the result
|
|
228
|
+
|
|
229
|
+
After running the tool, inspect:
|
|
230
|
+
|
|
231
|
+
- `warnings.txt` for low-confidence regions and skipped adjustments,
|
|
232
|
+
- `sync-markers.csv` for anchor/residual positions,
|
|
233
|
+
- `sync-report.json` for per-track offset/stretch/residual diagnostics,
|
|
234
|
+
- exported `.synced.wav` files by listening in your DAW.
|
|
235
|
+
|
|
236
|
+
Do not treat generated synced files as final mastered audio.
|
|
237
|
+
|
|
238
|
+
## Temporary files
|
|
239
|
+
|
|
240
|
+
This tool creates temporary memory-mapped files during analysis to reduce peak RAM usage for long recordings. These temporary files are cleaned up at the end of a normal CLI run.
|
|
241
|
+
|
|
242
|
+
## Current implementation status
|
|
243
|
+
|
|
244
|
+
Implemented pipeline includes:
|
|
245
|
+
|
|
246
|
+
1. audio loading and normalization for analysis,
|
|
247
|
+
2. speech-region detection (RMS-based),
|
|
248
|
+
3. anchor selection and matching against master,
|
|
249
|
+
4. initial offset estimation,
|
|
250
|
+
5. multi-anchor linear drift estimation,
|
|
251
|
+
6. global correction and synced WAV export,
|
|
252
|
+
7. detailed reporting with warnings/errors.
|
|
253
|
+
|
|
254
|
+
## Scope and non-goals
|
|
255
|
+
|
|
256
|
+
This project does **not** do final podcast mastering tasks such as:
|
|
257
|
+
|
|
258
|
+
- noise reduction,
|
|
259
|
+
- EQ/compression/loudness normalization,
|
|
260
|
+
- transcript-based editing,
|
|
261
|
+
- final mixdown/publishing.
|
|
262
|
+
|
|
263
|
+
The expected workflow is:
|
|
264
|
+
|
|
265
|
+
```text
|
|
266
|
+
raw recordings -> double-ender-sync -> synced WAV + report -> human DAW edit
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
## Licensing and distribution policy
|
|
270
|
+
|
|
271
|
+
Project code is MIT licensed.
|
|
272
|
+
|
|
273
|
+
Current policy is **source-only distribution** from this repository. No official prebuilt binaries are published.
|
|
274
|
+
|
|
275
|
+
Before publishing any binary builds in the future, review third-party obligations (especially LGPL-related components) and update distribution/legal documentation accordingly.
|
|
276
|
+
|
|
277
|
+
See:
|
|
278
|
+
|
|
279
|
+
- `THIRD_PARTY_NOTICES.md`
|
|
280
|
+
- `docs/licensing-source-only.md`
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Third-Party Notices
|
|
2
|
+
|
|
3
|
+
This project includes and depends on third-party software components. Each component is licensed under its own terms.
|
|
4
|
+
|
|
5
|
+
## Runtime dependencies
|
|
6
|
+
|
|
7
|
+
- **PySide6 (optional GUI dependency)**
|
|
8
|
+
- License: LGPL (or commercial alternatives depending on usage terms)
|
|
9
|
+
- Upstream: https://doc.qt.io/qtforpython-6/
|
|
10
|
+
|
|
11
|
+
- **soundfile**
|
|
12
|
+
- License: BSD-3-Clause
|
|
13
|
+
- Upstream: https://github.com/bastibe/python-soundfile
|
|
14
|
+
|
|
15
|
+
- **libsndfile**
|
|
16
|
+
- License: LGPL-2.1-or-later
|
|
17
|
+
- Upstream: https://libsndfile.github.io/libsndfile/
|
|
18
|
+
|
|
19
|
+
- **numpy**
|
|
20
|
+
- License: BSD-3-Clause
|
|
21
|
+
- Upstream: https://numpy.org/
|
|
22
|
+
|
|
23
|
+
- **scipy**
|
|
24
|
+
- License: BSD-3-Clause
|
|
25
|
+
- Upstream: https://scipy.org/
|
|
26
|
+
|
|
27
|
+
## Project distribution policy
|
|
28
|
+
|
|
29
|
+
Current project policy is source-only distribution. No official prebuilt binaries are published by this repository.
|
|
30
|
+
|
|
31
|
+
If you redistribute built artifacts, you are responsible for compliance with all applicable third-party licenses.
|