millet-record 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- millet_record/__init__.py +76 -0
- millet_record/_bin/.gitignore +11 -0
- millet_record/audio.py +252 -0
- millet_record/capture.py +1068 -0
- millet_record/cli.py +532 -0
- millet_record/languages.py +99 -0
- millet_record/utils.py +54 -0
- millet_record-0.4.0.dist-info/METADATA +150 -0
- millet_record-0.4.0.dist-info/RECORD +14 -0
- millet_record-0.4.0.dist-info/WHEEL +5 -0
- millet_record-0.4.0.dist-info/entry_points.txt +3 -0
- millet_record-0.4.0.dist-info/licenses/LICENSE +12 -0
- millet_record-0.4.0.dist-info/licenses/NOTICE +47 -0
- millet_record-0.4.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""millet-record — lightweight capture-only subset of millet (formerly meetscribe-record).
|
|
2
|
+
|
|
3
|
+
Public modules:
|
|
4
|
+
millet_record.capture — RecordingSession, dual-channel capture
|
|
5
|
+
(Linux: ffmpeg+PulseAudio; macOS 14.4+
|
|
6
|
+
arm64: meet-record-mac sidecar)
|
|
7
|
+
millet_record.audio — stereo channel reading + ffmpeg compression
|
|
8
|
+
millet_record.utils — formatting helpers
|
|
9
|
+
millet_record.languages — language constants
|
|
10
|
+
millet_record.cli — `millet` console-script entry point
|
|
11
|
+
(with deprecation-aliased `meet` for two
|
|
12
|
+
minor versions)
|
|
13
|
+
|
|
14
|
+
Named after the Ottoman millet system. Part of the vezir ecosystem.
|
|
15
|
+
|
|
16
|
+
Version is the single source of truth here; pyproject.toml's
|
|
17
|
+
[project] section pulls it dynamically via setuptools.dynamic.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
__version__ = "0.4.0"
|
|
21
|
+
|
|
22
|
+
# ── Backward-compat: meet_record alias ──────────────────────────────────────
|
|
23
|
+
# Existing code (e.g. older meetscribe-offline 0.8.3 compatibility shims,
|
|
24
|
+
# third-party scripts) imports ``from meet_record.X import …``. We register
|
|
25
|
+
# this package as both ``millet_record`` (canonical) and ``meet_record``
|
|
26
|
+
# (legacy) in sys.modules so both import paths resolve to the same package.
|
|
27
|
+
# Submodules are also aliased lazily via a MetaPathFinder so we don't pay
|
|
28
|
+
# the import cost up-front (capture pulls ffmpeg detection on Linux).
|
|
29
|
+
#
|
|
30
|
+
# Removed in millet-record 0.6.0 (matches the `meet` console-script
|
|
31
|
+
# deprecation timeline).
|
|
32
|
+
import sys as _sys
|
|
33
|
+
import importlib as _importlib
|
|
34
|
+
import importlib.abc as _abc
|
|
35
|
+
import importlib.machinery as _machinery
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class _MeetRecordAliasFinder(_abc.MetaPathFinder):
|
|
39
|
+
"""Resolve ``meet_record`` and ``meet_record.X`` to ``millet_record[.X]``."""
|
|
40
|
+
|
|
41
|
+
def find_spec(self, fullname, path, target=None):
|
|
42
|
+
if fullname == "meet_record":
|
|
43
|
+
return _machinery.ModuleSpec(
|
|
44
|
+
fullname,
|
|
45
|
+
loader=_MeetRecordAliasLoader(),
|
|
46
|
+
is_package=True,
|
|
47
|
+
)
|
|
48
|
+
if fullname.startswith("meet_record."):
|
|
49
|
+
new_name = "millet_record." + fullname[len("meet_record."):]
|
|
50
|
+
try:
|
|
51
|
+
mod = _importlib.import_module(new_name)
|
|
52
|
+
except ImportError:
|
|
53
|
+
return None
|
|
54
|
+
_sys.modules[fullname] = mod
|
|
55
|
+
return _machinery.ModuleSpec(
|
|
56
|
+
fullname,
|
|
57
|
+
loader=_MeetRecordAliasLoader(),
|
|
58
|
+
is_package=hasattr(mod, "__path__"),
|
|
59
|
+
)
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class _MeetRecordAliasLoader(_abc.Loader):
|
|
64
|
+
def create_module(self, spec):
|
|
65
|
+
if spec.name == "meet_record":
|
|
66
|
+
return _sys.modules[__name__]
|
|
67
|
+
new_name = "millet_record." + spec.name[len("meet_record."):]
|
|
68
|
+
return _sys.modules.get(new_name) or _importlib.import_module(new_name)
|
|
69
|
+
|
|
70
|
+
def exec_module(self, module): # noqa: D401 - aliased; nothing to exec
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
_sys.modules.setdefault("meet_record", _sys.modules[__name__])
|
|
75
|
+
if not any(isinstance(f, _MeetRecordAliasFinder) for f in _sys.meta_path):
|
|
76
|
+
_sys.meta_path.append(_MeetRecordAliasFinder())
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# Ignore everything in this directory except this file.
|
|
2
|
+
#
|
|
3
|
+
# meet_record/_bin/ holds the macOS arm64 meet-record-mac binary in
|
|
4
|
+
# the macOS wheel. The binary itself is built by mac.yml in CI and
|
|
5
|
+
# copied here at wheel-build time by release.yml; it's never
|
|
6
|
+
# committed. This .gitignore both keeps the directory present in the
|
|
7
|
+
# tree (so setuptools.package-data can reference _bin/* during build)
|
|
8
|
+
# and prevents accidental commits of a locally-built artefact.
|
|
9
|
+
|
|
10
|
+
*
|
|
11
|
+
!.gitignore
|
millet_record/audio.py
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""Audio utilities for meetscribe.
|
|
2
|
+
|
|
3
|
+
Low-level helpers for reading stereo audio files, computing per-speaker
|
|
4
|
+
channel energy, and compressing recordings.
|
|
5
|
+
|
|
6
|
+
Extracted from label.py and transcribe.py to eliminate duplication.
|
|
7
|
+
All I/O uses ffmpeg/ffprobe (via subprocess) so that any audio format
|
|
8
|
+
supported by ffmpeg (WAV, OGG/Opus, FLAC, …) can be read transparently.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
import subprocess
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import NamedTuple
|
|
18
|
+
|
|
19
|
+
import numpy as np
|
|
20
|
+
|
|
21
|
+
log = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class StereoChannels(NamedTuple):
|
|
25
|
+
"""Parsed stereo audio data returned by :func:`read_stereo_channels`."""
|
|
26
|
+
|
|
27
|
+
mic: np.ndarray # Left channel (your microphone), float32
|
|
28
|
+
system: np.ndarray # Right channel (system/remote audio), float32
|
|
29
|
+
sample_rate: int # Frames per second
|
|
30
|
+
sampwidth: int # Bytes per sample (always 2 — decoded to int16)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def read_stereo_channels(audio_path: Path) -> StereoChannels | None:
|
|
34
|
+
"""Read a stereo audio file and return separate mic and system channels.
|
|
35
|
+
|
|
36
|
+
Uses ffmpeg to decode to raw PCM, so any format ffmpeg supports
|
|
37
|
+
(WAV, OGG/Opus, FLAC, …) works transparently.
|
|
38
|
+
|
|
39
|
+
Returns None (instead of raising) if the file is mono, cannot be
|
|
40
|
+
opened, or decoding fails. Callers should fall back to a safe
|
|
41
|
+
default in that case.
|
|
42
|
+
|
|
43
|
+
The returned arrays are float32 copies — safe to modify.
|
|
44
|
+
"""
|
|
45
|
+
# Probe channel count first.
|
|
46
|
+
probe_cmd = [
|
|
47
|
+
"ffprobe", "-v", "quiet",
|
|
48
|
+
"-show_entries", "stream=channels,sample_rate",
|
|
49
|
+
"-of", "json",
|
|
50
|
+
str(audio_path),
|
|
51
|
+
]
|
|
52
|
+
try:
|
|
53
|
+
probe = subprocess.run(probe_cmd, capture_output=True, text=True)
|
|
54
|
+
if probe.returncode != 0:
|
|
55
|
+
return None
|
|
56
|
+
info = json.loads(probe.stdout)
|
|
57
|
+
stream = info.get("streams", [{}])[0]
|
|
58
|
+
n_channels = int(stream.get("channels", 0))
|
|
59
|
+
sample_rate = int(stream.get("sample_rate", 0))
|
|
60
|
+
except Exception:
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
if n_channels != 2 or sample_rate == 0:
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
# Decode full file to raw s16le PCM via ffmpeg.
|
|
67
|
+
decode_cmd = [
|
|
68
|
+
"ffmpeg", "-v", "quiet",
|
|
69
|
+
"-i", str(audio_path),
|
|
70
|
+
"-f", "s16le",
|
|
71
|
+
"-acodec", "pcm_s16le",
|
|
72
|
+
"-ar", str(sample_rate),
|
|
73
|
+
"-ac", "2",
|
|
74
|
+
"-", # write to stdout
|
|
75
|
+
]
|
|
76
|
+
try:
|
|
77
|
+
result = subprocess.run(decode_cmd, capture_output=True)
|
|
78
|
+
if result.returncode != 0:
|
|
79
|
+
return None
|
|
80
|
+
raw = result.stdout
|
|
81
|
+
except Exception:
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
if len(raw) == 0:
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
samples = np.frombuffer(raw, dtype=np.int16)
|
|
88
|
+
if len(samples) % 2 != 0:
|
|
89
|
+
samples = samples[:-1]
|
|
90
|
+
samples = samples.reshape(-1, 2).astype(np.float32)
|
|
91
|
+
|
|
92
|
+
return StereoChannels(
|
|
93
|
+
mic=samples[:, 0],
|
|
94
|
+
system=samples[:, 1],
|
|
95
|
+
sample_rate=sample_rate,
|
|
96
|
+
sampwidth=2,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ─── Audio compression ─────────────────────────────────────────────────────
|
|
101
|
+
|
|
102
|
+
def _get_audio_duration(path: Path) -> float | None:
|
|
103
|
+
"""Return duration in seconds via ffprobe, or None on failure."""
|
|
104
|
+
cmd = [
|
|
105
|
+
"ffprobe", "-v", "quiet",
|
|
106
|
+
"-show_entries", "format=duration",
|
|
107
|
+
"-of", "csv=p=0",
|
|
108
|
+
str(path),
|
|
109
|
+
]
|
|
110
|
+
try:
|
|
111
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
112
|
+
if result.returncode == 0 and result.stdout.strip():
|
|
113
|
+
return float(result.stdout.strip())
|
|
114
|
+
except Exception:
|
|
115
|
+
pass
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def compress_audio(
|
|
120
|
+
wav_path: Path,
|
|
121
|
+
*,
|
|
122
|
+
keep_wav: bool = False,
|
|
123
|
+
bitrate: str = "48k",
|
|
124
|
+
) -> Path:
|
|
125
|
+
"""Compress a WAV file to OGG/Opus and optionally delete the original.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
wav_path: Path to the stereo WAV recording.
|
|
129
|
+
keep_wav: If True, keep the WAV file after compression.
|
|
130
|
+
bitrate: Opus bitrate (default 48k — transparent for speech).
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Path to the compressed .ogg file.
|
|
134
|
+
|
|
135
|
+
Raises:
|
|
136
|
+
RuntimeError: If ffmpeg fails or duration validation fails.
|
|
137
|
+
FileNotFoundError: If the WAV file does not exist.
|
|
138
|
+
"""
|
|
139
|
+
wav_path = Path(wav_path)
|
|
140
|
+
if not wav_path.exists():
|
|
141
|
+
raise FileNotFoundError(f"Audio file not found: {wav_path}")
|
|
142
|
+
|
|
143
|
+
ogg_path = wav_path.with_suffix(".ogg")
|
|
144
|
+
|
|
145
|
+
cmd = [
|
|
146
|
+
"ffmpeg", "-y", "-v", "quiet",
|
|
147
|
+
"-i", str(wav_path),
|
|
148
|
+
"-c:a", "libopus",
|
|
149
|
+
"-b:a", bitrate,
|
|
150
|
+
"-vn",
|
|
151
|
+
str(ogg_path),
|
|
152
|
+
]
|
|
153
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
154
|
+
if result.returncode != 0:
|
|
155
|
+
# Clean up partial output.
|
|
156
|
+
ogg_path.unlink(missing_ok=True)
|
|
157
|
+
raise RuntimeError(
|
|
158
|
+
f"Audio compression failed (ffmpeg exit {result.returncode}): "
|
|
159
|
+
f"{result.stderr.strip()}"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Validate: durations must match within 1 second.
|
|
163
|
+
wav_dur = _get_audio_duration(wav_path)
|
|
164
|
+
ogg_dur = _get_audio_duration(ogg_path)
|
|
165
|
+
if wav_dur is not None and ogg_dur is not None:
|
|
166
|
+
if abs(wav_dur - ogg_dur) > 1.0:
|
|
167
|
+
ogg_path.unlink(missing_ok=True)
|
|
168
|
+
raise RuntimeError(
|
|
169
|
+
f"Duration mismatch after compression: WAV={wav_dur:.1f}s "
|
|
170
|
+
f"vs OGG={ogg_dur:.1f}s (diff > 1s)"
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Gather sizes for logging before potentially deleting the WAV.
|
|
174
|
+
wav_size = wav_path.stat().st_size
|
|
175
|
+
ogg_size = ogg_path.stat().st_size
|
|
176
|
+
ratio = wav_size / ogg_size if ogg_size > 0 else 0
|
|
177
|
+
|
|
178
|
+
if not keep_wav:
|
|
179
|
+
wav_path.unlink()
|
|
180
|
+
log.info("Deleted %s after compression", wav_path.name)
|
|
181
|
+
|
|
182
|
+
log.info(
|
|
183
|
+
"Compressed %s -> %s (%.1f MB -> %.1f MB, %.0fx)",
|
|
184
|
+
wav_path.name, ogg_path.name,
|
|
185
|
+
wav_size / 1_048_576, ogg_size / 1_048_576, ratio,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
return ogg_path
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def compute_speaker_channel_energy(
|
|
192
|
+
mic_ch: np.ndarray,
|
|
193
|
+
sys_ch: np.ndarray,
|
|
194
|
+
segments: list, # list[Segment] — avoid circular import
|
|
195
|
+
sample_rate: int,
|
|
196
|
+
) -> dict[str, float]:
|
|
197
|
+
"""Compute the mic-channel energy ratio for each speaker.
|
|
198
|
+
|
|
199
|
+
For each speaker, accumulates RMS energy on the mic channel and on
|
|
200
|
+
the system channel across all their segments, then returns a dict
|
|
201
|
+
mapping ``speaker_id -> mic_ratio`` where::
|
|
202
|
+
|
|
203
|
+
mic_ratio = avg_mic_rms / (avg_mic_rms + avg_sys_rms)
|
|
204
|
+
|
|
205
|
+
A ratio > 0.5 means the speaker is dominant on the mic (i.e. YOU).
|
|
206
|
+
Speakers with no audio frames get a ratio of 0.5 (unknown).
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
mic_ch: Float32 array of left-channel (mic) samples.
|
|
210
|
+
sys_ch: Float32 array of right-channel (system) samples.
|
|
211
|
+
segments: List of Segment objects with .start, .end, .speaker.
|
|
212
|
+
sample_rate: Frames per second (used to convert timestamps to indices).
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Dict mapping speaker ID to mic-ratio float in [0.0, 1.0].
|
|
216
|
+
"""
|
|
217
|
+
n = len(mic_ch)
|
|
218
|
+
mic_energy: dict[str, float] = {}
|
|
219
|
+
sys_energy: dict[str, float] = {}
|
|
220
|
+
total_frames: dict[str, int] = {}
|
|
221
|
+
|
|
222
|
+
for seg in segments:
|
|
223
|
+
if not seg.speaker:
|
|
224
|
+
continue
|
|
225
|
+
start = max(0, min(int(seg.start * sample_rate), n))
|
|
226
|
+
end = max(0, min(int(seg.end * sample_rate), n))
|
|
227
|
+
if end <= start:
|
|
228
|
+
continue
|
|
229
|
+
|
|
230
|
+
mic_slice = mic_ch[start:end]
|
|
231
|
+
sys_slice = sys_ch[start:end]
|
|
232
|
+
count = end - start
|
|
233
|
+
|
|
234
|
+
mic_rms = float(np.sqrt(np.mean(mic_slice ** 2)))
|
|
235
|
+
sys_rms = float(np.sqrt(np.mean(sys_slice ** 2)))
|
|
236
|
+
|
|
237
|
+
spk = seg.speaker
|
|
238
|
+
mic_energy[spk] = mic_energy.get(spk, 0.0) + mic_rms * count
|
|
239
|
+
sys_energy[spk] = sys_energy.get(spk, 0.0) + sys_rms * count
|
|
240
|
+
total_frames[spk] = total_frames.get(spk, 0) + count
|
|
241
|
+
|
|
242
|
+
mic_ratio: dict[str, float] = {}
|
|
243
|
+
for spk, frames in total_frames.items():
|
|
244
|
+
if frames == 0:
|
|
245
|
+
mic_ratio[spk] = 0.5
|
|
246
|
+
continue
|
|
247
|
+
avg_mic = mic_energy.get(spk, 0.0) / frames
|
|
248
|
+
avg_sys = sys_energy.get(spk, 0.0) / frames
|
|
249
|
+
denom = avg_mic + avg_sys
|
|
250
|
+
mic_ratio[spk] = avg_mic / denom if denom > 0 else 0.5
|
|
251
|
+
|
|
252
|
+
return mic_ratio
|