millet-record 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,76 @@
1
+ """millet-record — lightweight capture-only subset of millet (formerly meetscribe-record).
2
+
3
+ Public modules:
4
+ millet_record.capture — RecordingSession, dual-channel capture
5
+ (Linux: ffmpeg+PulseAudio; macOS 14.4+
6
+ arm64: meet-record-mac sidecar)
7
+ millet_record.audio — stereo channel reading + ffmpeg compression
8
+ millet_record.utils — formatting helpers
9
+ millet_record.languages — language constants
10
+ millet_record.cli — `millet` console-script entry point
11
+ (with deprecation-aliased `meet` for two
12
+ minor versions)
13
+
14
+ Named after the Ottoman millet system. Part of the vezir ecosystem.
15
+
16
+ Version is the single source of truth here; pyproject.toml's
17
+ [project] section pulls it dynamically via setuptools.dynamic.
18
+ """
19
+
20
+ __version__ = "0.4.0"
21
+
22
+ # ── Backward-compat: meet_record alias ──────────────────────────────────────
23
+ # Existing code (e.g. older meetscribe-offline 0.8.3 compatibility shims,
24
+ # third-party scripts) imports ``from meet_record.X import …``. We register
25
+ # this package as both ``millet_record`` (canonical) and ``meet_record``
26
+ # (legacy) in sys.modules so both import paths resolve to the same package.
27
+ # Submodules are also aliased lazily via a MetaPathFinder so we don't pay
28
+ # the import cost up-front (capture pulls ffmpeg detection on Linux).
29
+ #
30
+ # Removed in millet-record 0.6.0 (matches the `meet` console-script
31
+ # deprecation timeline).
32
+ import sys as _sys
33
+ import importlib as _importlib
34
+ import importlib.abc as _abc
35
+ import importlib.machinery as _machinery
36
+
37
+
38
+ class _MeetRecordAliasFinder(_abc.MetaPathFinder):
39
+ """Resolve ``meet_record`` and ``meet_record.X`` to ``millet_record[.X]``."""
40
+
41
+ def find_spec(self, fullname, path, target=None):
42
+ if fullname == "meet_record":
43
+ return _machinery.ModuleSpec(
44
+ fullname,
45
+ loader=_MeetRecordAliasLoader(),
46
+ is_package=True,
47
+ )
48
+ if fullname.startswith("meet_record."):
49
+ new_name = "millet_record." + fullname[len("meet_record."):]
50
+ try:
51
+ mod = _importlib.import_module(new_name)
52
+ except ImportError:
53
+ return None
54
+ _sys.modules[fullname] = mod
55
+ return _machinery.ModuleSpec(
56
+ fullname,
57
+ loader=_MeetRecordAliasLoader(),
58
+ is_package=hasattr(mod, "__path__"),
59
+ )
60
+ return None
61
+
62
+
63
+ class _MeetRecordAliasLoader(_abc.Loader):
64
+ def create_module(self, spec):
65
+ if spec.name == "meet_record":
66
+ return _sys.modules[__name__]
67
+ new_name = "millet_record." + spec.name[len("meet_record."):]
68
+ return _sys.modules.get(new_name) or _importlib.import_module(new_name)
69
+
70
+ def exec_module(self, module): # noqa: D401 - aliased; nothing to exec
71
+ return None
72
+
73
+
74
+ _sys.modules.setdefault("meet_record", _sys.modules[__name__])
75
+ if not any(isinstance(f, _MeetRecordAliasFinder) for f in _sys.meta_path):
76
+ _sys.meta_path.append(_MeetRecordAliasFinder())
@@ -0,0 +1,11 @@
1
+ # Ignore everything in this directory except this file.
2
+ #
3
+ # meet_record/_bin/ holds the macOS arm64 meet-record-mac binary in
4
+ # the macOS wheel. The binary itself is built by mac.yml in CI and
5
+ # copied here at wheel-build time by release.yml; it's never
6
+ # committed. This .gitignore both keeps the directory present in the
7
+ # tree (so setuptools.package-data can reference _bin/* during build)
8
+ # and prevents accidental commits of a locally-built artefact.
9
+
10
+ *
11
+ !.gitignore
millet_record/audio.py ADDED
@@ -0,0 +1,252 @@
1
+ """Audio utilities for meetscribe.
2
+
3
+ Low-level helpers for reading stereo audio files, computing per-speaker
4
+ channel energy, and compressing recordings.
5
+
6
+ Extracted from label.py and transcribe.py to eliminate duplication.
7
+ All I/O uses ffmpeg/ffprobe (via subprocess) so that any audio format
8
+ supported by ffmpeg (WAV, OGG/Opus, FLAC, …) can be read transparently.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import logging
15
+ import subprocess
16
+ from pathlib import Path
17
+ from typing import NamedTuple
18
+
19
+ import numpy as np
20
+
21
+ log = logging.getLogger(__name__)
22
+
23
+
24
+ class StereoChannels(NamedTuple):
25
+ """Parsed stereo audio data returned by :func:`read_stereo_channels`."""
26
+
27
+ mic: np.ndarray # Left channel (your microphone), float32
28
+ system: np.ndarray # Right channel (system/remote audio), float32
29
+ sample_rate: int # Frames per second
30
+ sampwidth: int # Bytes per sample (always 2 — decoded to int16)
31
+
32
+
33
+ def read_stereo_channels(audio_path: Path) -> StereoChannels | None:
34
+ """Read a stereo audio file and return separate mic and system channels.
35
+
36
+ Uses ffmpeg to decode to raw PCM, so any format ffmpeg supports
37
+ (WAV, OGG/Opus, FLAC, …) works transparently.
38
+
39
+ Returns None (instead of raising) if the file is mono, cannot be
40
+ opened, or decoding fails. Callers should fall back to a safe
41
+ default in that case.
42
+
43
+ The returned arrays are float32 copies — safe to modify.
44
+ """
45
+ # Probe channel count first.
46
+ probe_cmd = [
47
+ "ffprobe", "-v", "quiet",
48
+ "-show_entries", "stream=channels,sample_rate",
49
+ "-of", "json",
50
+ str(audio_path),
51
+ ]
52
+ try:
53
+ probe = subprocess.run(probe_cmd, capture_output=True, text=True)
54
+ if probe.returncode != 0:
55
+ return None
56
+ info = json.loads(probe.stdout)
57
+ stream = info.get("streams", [{}])[0]
58
+ n_channels = int(stream.get("channels", 0))
59
+ sample_rate = int(stream.get("sample_rate", 0))
60
+ except Exception:
61
+ return None
62
+
63
+ if n_channels != 2 or sample_rate == 0:
64
+ return None
65
+
66
+ # Decode full file to raw s16le PCM via ffmpeg.
67
+ decode_cmd = [
68
+ "ffmpeg", "-v", "quiet",
69
+ "-i", str(audio_path),
70
+ "-f", "s16le",
71
+ "-acodec", "pcm_s16le",
72
+ "-ar", str(sample_rate),
73
+ "-ac", "2",
74
+ "-", # write to stdout
75
+ ]
76
+ try:
77
+ result = subprocess.run(decode_cmd, capture_output=True)
78
+ if result.returncode != 0:
79
+ return None
80
+ raw = result.stdout
81
+ except Exception:
82
+ return None
83
+
84
+ if len(raw) == 0:
85
+ return None
86
+
87
+ samples = np.frombuffer(raw, dtype=np.int16)
88
+ if len(samples) % 2 != 0:
89
+ samples = samples[:-1]
90
+ samples = samples.reshape(-1, 2).astype(np.float32)
91
+
92
+ return StereoChannels(
93
+ mic=samples[:, 0],
94
+ system=samples[:, 1],
95
+ sample_rate=sample_rate,
96
+ sampwidth=2,
97
+ )
98
+
99
+
100
+ # ─── Audio compression ─────────────────────────────────────────────────────
101
+
102
+ def _get_audio_duration(path: Path) -> float | None:
103
+ """Return duration in seconds via ffprobe, or None on failure."""
104
+ cmd = [
105
+ "ffprobe", "-v", "quiet",
106
+ "-show_entries", "format=duration",
107
+ "-of", "csv=p=0",
108
+ str(path),
109
+ ]
110
+ try:
111
+ result = subprocess.run(cmd, capture_output=True, text=True)
112
+ if result.returncode == 0 and result.stdout.strip():
113
+ return float(result.stdout.strip())
114
+ except Exception:
115
+ pass
116
+ return None
117
+
118
+
119
+ def compress_audio(
120
+ wav_path: Path,
121
+ *,
122
+ keep_wav: bool = False,
123
+ bitrate: str = "48k",
124
+ ) -> Path:
125
+ """Compress a WAV file to OGG/Opus and optionally delete the original.
126
+
127
+ Args:
128
+ wav_path: Path to the stereo WAV recording.
129
+ keep_wav: If True, keep the WAV file after compression.
130
+ bitrate: Opus bitrate (default 48k — transparent for speech).
131
+
132
+ Returns:
133
+ Path to the compressed .ogg file.
134
+
135
+ Raises:
136
+ RuntimeError: If ffmpeg fails or duration validation fails.
137
+ FileNotFoundError: If the WAV file does not exist.
138
+ """
139
+ wav_path = Path(wav_path)
140
+ if not wav_path.exists():
141
+ raise FileNotFoundError(f"Audio file not found: {wav_path}")
142
+
143
+ ogg_path = wav_path.with_suffix(".ogg")
144
+
145
+ cmd = [
146
+ "ffmpeg", "-y", "-v", "quiet",
147
+ "-i", str(wav_path),
148
+ "-c:a", "libopus",
149
+ "-b:a", bitrate,
150
+ "-vn",
151
+ str(ogg_path),
152
+ ]
153
+ result = subprocess.run(cmd, capture_output=True, text=True)
154
+ if result.returncode != 0:
155
+ # Clean up partial output.
156
+ ogg_path.unlink(missing_ok=True)
157
+ raise RuntimeError(
158
+ f"Audio compression failed (ffmpeg exit {result.returncode}): "
159
+ f"{result.stderr.strip()}"
160
+ )
161
+
162
+ # Validate: durations must match within 1 second.
163
+ wav_dur = _get_audio_duration(wav_path)
164
+ ogg_dur = _get_audio_duration(ogg_path)
165
+ if wav_dur is not None and ogg_dur is not None:
166
+ if abs(wav_dur - ogg_dur) > 1.0:
167
+ ogg_path.unlink(missing_ok=True)
168
+ raise RuntimeError(
169
+ f"Duration mismatch after compression: WAV={wav_dur:.1f}s "
170
+ f"vs OGG={ogg_dur:.1f}s (diff > 1s)"
171
+ )
172
+
173
+ # Gather sizes for logging before potentially deleting the WAV.
174
+ wav_size = wav_path.stat().st_size
175
+ ogg_size = ogg_path.stat().st_size
176
+ ratio = wav_size / ogg_size if ogg_size > 0 else 0
177
+
178
+ if not keep_wav:
179
+ wav_path.unlink()
180
+ log.info("Deleted %s after compression", wav_path.name)
181
+
182
+ log.info(
183
+ "Compressed %s -> %s (%.1f MB -> %.1f MB, %.0fx)",
184
+ wav_path.name, ogg_path.name,
185
+ wav_size / 1_048_576, ogg_size / 1_048_576, ratio,
186
+ )
187
+
188
+ return ogg_path
189
+
190
+
191
+ def compute_speaker_channel_energy(
192
+ mic_ch: np.ndarray,
193
+ sys_ch: np.ndarray,
194
+ segments: list, # list[Segment] — avoid circular import
195
+ sample_rate: int,
196
+ ) -> dict[str, float]:
197
+ """Compute the mic-channel energy ratio for each speaker.
198
+
199
+ For each speaker, accumulates RMS energy on the mic channel and on
200
+ the system channel across all their segments, then returns a dict
201
+ mapping ``speaker_id -> mic_ratio`` where::
202
+
203
+ mic_ratio = avg_mic_rms / (avg_mic_rms + avg_sys_rms)
204
+
205
+ A ratio > 0.5 means the speaker is dominant on the mic (i.e. YOU).
206
+ Speakers with no audio frames get a ratio of 0.5 (unknown).
207
+
208
+ Args:
209
+ mic_ch: Float32 array of left-channel (mic) samples.
210
+ sys_ch: Float32 array of right-channel (system) samples.
211
+ segments: List of Segment objects with .start, .end, .speaker.
212
+ sample_rate: Frames per second (used to convert timestamps to indices).
213
+
214
+ Returns:
215
+ Dict mapping speaker ID to mic-ratio float in [0.0, 1.0].
216
+ """
217
+ n = len(mic_ch)
218
+ mic_energy: dict[str, float] = {}
219
+ sys_energy: dict[str, float] = {}
220
+ total_frames: dict[str, int] = {}
221
+
222
+ for seg in segments:
223
+ if not seg.speaker:
224
+ continue
225
+ start = max(0, min(int(seg.start * sample_rate), n))
226
+ end = max(0, min(int(seg.end * sample_rate), n))
227
+ if end <= start:
228
+ continue
229
+
230
+ mic_slice = mic_ch[start:end]
231
+ sys_slice = sys_ch[start:end]
232
+ count = end - start
233
+
234
+ mic_rms = float(np.sqrt(np.mean(mic_slice ** 2)))
235
+ sys_rms = float(np.sqrt(np.mean(sys_slice ** 2)))
236
+
237
+ spk = seg.speaker
238
+ mic_energy[spk] = mic_energy.get(spk, 0.0) + mic_rms * count
239
+ sys_energy[spk] = sys_energy.get(spk, 0.0) + sys_rms * count
240
+ total_frames[spk] = total_frames.get(spk, 0) + count
241
+
242
+ mic_ratio: dict[str, float] = {}
243
+ for spk, frames in total_frames.items():
244
+ if frames == 0:
245
+ mic_ratio[spk] = 0.5
246
+ continue
247
+ avg_mic = mic_energy.get(spk, 0.0) / frames
248
+ avg_sys = sys_energy.get(spk, 0.0) / frames
249
+ denom = avg_mic + avg_sys
250
+ mic_ratio[spk] = avg_mic / denom if denom > 0 else 0.5
251
+
252
+ return mic_ratio