millet-record 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- millet_record-0.4.0/LICENSE +12 -0
- millet_record-0.4.0/NOTICE +47 -0
- millet_record-0.4.0/PKG-INFO +150 -0
- millet_record-0.4.0/README.md +116 -0
- millet_record-0.4.0/millet_record/__init__.py +76 -0
- millet_record-0.4.0/millet_record/_bin/.gitignore +11 -0
- millet_record-0.4.0/millet_record/audio.py +252 -0
- millet_record-0.4.0/millet_record/capture.py +1068 -0
- millet_record-0.4.0/millet_record/cli.py +532 -0
- millet_record-0.4.0/millet_record/languages.py +99 -0
- millet_record-0.4.0/millet_record/utils.py +54 -0
- millet_record-0.4.0/millet_record.egg-info/PKG-INFO +150 -0
- millet_record-0.4.0/millet_record.egg-info/SOURCES.txt +19 -0
- millet_record-0.4.0/millet_record.egg-info/dependency_links.txt +1 -0
- millet_record-0.4.0/millet_record.egg-info/entry_points.txt +3 -0
- millet_record-0.4.0/millet_record.egg-info/requires.txt +8 -0
- millet_record-0.4.0/millet_record.egg-info/top_level.txt +1 -0
- millet_record-0.4.0/pyproject.toml +91 -0
- millet_record-0.4.0/setup.cfg +4 -0
- millet_record-0.4.0/tests/test_capture_darwin.py +598 -0
- millet_record-0.4.0/tests/test_imports.py +56 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
GNU GENERAL PUBLIC LICENSE
|
|
2
|
+
Version 3, 29 June 2007
|
|
3
|
+
|
|
4
|
+
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
|
5
|
+
Everyone is permitted to copy and distribute verbatim copies
|
|
6
|
+
of this license document, but changing it is not allowed.
|
|
7
|
+
|
|
8
|
+
This package (meetscribe-record) is licensed under the GPL-3.0-or-later,
|
|
9
|
+
the same license as the parent meetscribe-offline package.
|
|
10
|
+
|
|
11
|
+
For the full license text, see:
|
|
12
|
+
https://www.gnu.org/licenses/gpl-3.0.txt
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
meetscribe-record — NOTICE
|
|
2
|
+
|
|
3
|
+
The macOS sidecar `meet-record-mac` (in `mac/`) adapts code from the Recap
|
|
4
|
+
project under its MIT license:
|
|
5
|
+
|
|
6
|
+
https://github.com/RecapAI/Recap
|
|
7
|
+
Copyright (c) 2025 Rawand Ahmed Shaswar
|
|
8
|
+
|
|
9
|
+
Specifically, the Core Audio Process Tap + Aggregate Device setup in
|
|
10
|
+
`mac/Sources/MeetRecordMac/ProcessTap.swift` is based on:
|
|
11
|
+
|
|
12
|
+
Recap/Audio/Capture/Tap/ProcessTap.swift
|
|
13
|
+
|
|
14
|
+
and the structural shape of the recording pipeline draws on:
|
|
15
|
+
|
|
16
|
+
Recap/Audio/Capture/MicrophoneCapture+AudioEngine.swift
|
|
17
|
+
Recap/Audio/Processing/RecordingCoordinator.swift
|
|
18
|
+
|
|
19
|
+
The original MIT license text accompanying that source is preserved here:
|
|
20
|
+
|
|
21
|
+
----------------------------------------------------------------------
|
|
22
|
+
MIT License
|
|
23
|
+
|
|
24
|
+
Copyright (c) 2025 Rawand Ahmed Shaswar
|
|
25
|
+
|
|
26
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
27
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
28
|
+
in the Software without restriction, including without limitation the rights
|
|
29
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
30
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
31
|
+
furnished to do so, subject to the following conditions:
|
|
32
|
+
|
|
33
|
+
The above copyright notice and this permission notice shall be included in
|
|
34
|
+
all copies or substantial portions of the Software.
|
|
35
|
+
|
|
36
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
37
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
38
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
39
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
40
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
41
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
42
|
+
THE SOFTWARE.
|
|
43
|
+
----------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
The adapted Swift source files in `mac/Sources/MeetRecordMac/` are
|
|
46
|
+
distributed as part of meetscribe-record under GPL-3.0-or-later, which is
|
|
47
|
+
compatible with incorporating MIT-licensed source.
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: millet-record
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Lightweight capture-only subset of millet (formerly meetscribe-record): record dual-channel meeting audio (mic + system) with no transcription/diarization/summarization deps. Named after the Ottoman millet system. Part of the vezir ecosystem.
|
|
5
|
+
Author: pretyflaco
|
|
6
|
+
License: GPL-3.0-or-later
|
|
7
|
+
Project-URL: Homepage, https://github.com/pretyflaco/millet-record
|
|
8
|
+
Project-URL: Repository, https://github.com/pretyflaco/millet-record
|
|
9
|
+
Project-URL: Issues, https://github.com/pretyflaco/millet-record/issues
|
|
10
|
+
Keywords: meeting,recording,audio,ffmpeg,pipewire,pulseaudio,core-audio,scribe,thin-client
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
14
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
15
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
16
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Capture/Recording
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
License-File: NOTICE
|
|
26
|
+
Requires-Dist: click>=8.0
|
|
27
|
+
Requires-Dist: numpy>=1.20
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: ruff; extra == "dev"
|
|
30
|
+
Requires-Dist: pytest; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest-timeout; extra == "dev"
|
|
32
|
+
Requires-Dist: build; extra == "dev"
|
|
33
|
+
Dynamic: license-file
|
|
34
|
+
|
|
35
|
+
# millet-record
|
|
36
|
+
|
|
37
|
+
Lightweight capture-only subset of [millet](https://github.com/pretyflaco/millet)
|
|
38
|
+
(formerly meetscribe-record).
|
|
39
|
+
|
|
40
|
+
Records dual-channel meeting audio — your microphone on the left
|
|
41
|
+
channel, system/remote audio on the right — into a single stereo WAV
|
|
42
|
+
via PipeWire or PulseAudio + ffmpeg. Ships none of millet's
|
|
43
|
+
transcription, diarization, summarization, or PDF dependencies;
|
|
44
|
+
install footprint is ~30 MB instead of ~3 GB.
|
|
45
|
+
|
|
46
|
+
Full release history in [`CHANGELOG.md`](CHANGELOG.md). Named after
|
|
47
|
+
the Ottoman *millet system*. Part of the
|
|
48
|
+
[vezir](https://github.com/pretyflaco/vezir) ecosystem.
|
|
49
|
+
|
|
50
|
+
## When to use which
|
|
51
|
+
|
|
52
|
+
| Need | Install |
|
|
53
|
+
|---|---|
|
|
54
|
+
| Just record audio (e.g., for [vezir](https://github.com/pretyflaco/vezir) thin clients, or local archival) | `pip install millet-record` |
|
|
55
|
+
| Record + transcribe + diarize + summarize + PDF | `pip install millet-pipeline` (depends on millet-record) |
|
|
56
|
+
|
|
57
|
+
## Install
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install millet-record
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
System deps (apt example):
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
sudo apt install ffmpeg pulseaudio-utils
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## CLI
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
millet check # verify prerequisites
|
|
73
|
+
millet devices # list audio sources
|
|
74
|
+
millet record # record dual-channel WAV; Ctrl+C to stop
|
|
75
|
+
millet archive # compress past WAV recordings to OGG/Opus
|
|
76
|
+
millet request-permissions # macOS Sequoia 15+: trigger Microphone /
|
|
77
|
+
# System Audio Recording TCC prompts
|
|
78
|
+
# (Apple removed the manual '+' button in
|
|
79
|
+
# System Settings, so apps must request)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
`millet record` writes to `~/millet-recordings/meeting-YYYYMMDD-HHMMSS/...wav`
|
|
83
|
+
unless `-o` is passed. See `millet record --help` for options.
|
|
84
|
+
|
|
85
|
+
When `millet-pipeline` is also installed, additional subcommands
|
|
86
|
+
(`transcribe`, `run`, `label`, `sync`, `gui`, ...) become available
|
|
87
|
+
under the same `millet` command via Click entry-points.
|
|
88
|
+
|
|
89
|
+
### Legacy `meet` command
|
|
90
|
+
|
|
91
|
+
The pre-rename `meet` console script keeps working for two minor
|
|
92
|
+
versions (until `millet-record 0.6.0`). It prints a deprecation
|
|
93
|
+
warning on each invocation and forwards to the `millet` group. Set
|
|
94
|
+
`MILLET_SUPPRESS_DEPRECATION=1` to silence the warning during
|
|
95
|
+
transition.
|
|
96
|
+
|
|
97
|
+
## Architecture
|
|
98
|
+
|
|
99
|
+
`millet-record` exposes a stable package `millet_record` containing:
|
|
100
|
+
|
|
101
|
+
- `millet_record.capture` — ffmpeg-backed dual-channel capture
|
|
102
|
+
(RecordingSession, watchdog, drain buffer)
|
|
103
|
+
- `millet_record.audio` — stereo channel reading + ffmpeg-based audio
|
|
104
|
+
compression
|
|
105
|
+
- `millet_record.utils` — formatting helpers (HH:MM:SS, file sizes)
|
|
106
|
+
- `millet_record.languages` — language constants used by capture flow
|
|
107
|
+
- `millet_record.cli` — `millet` console-script entry point
|
|
108
|
+
|
|
109
|
+
The legacy `meet_record` package name is still importable via a
|
|
110
|
+
`sys.modules` alias + a meta-path finder, so existing
|
|
111
|
+
`from meet_record.X import …` keeps working unchanged. Removed in
|
|
112
|
+
`millet-record 0.6.0`.
|
|
113
|
+
|
|
114
|
+
`millet-pipeline` depends on this package and re-uses these modules,
|
|
115
|
+
plus its own heavy modules (transcribe, label, voiceprint, summarize,
|
|
116
|
+
sync, pdf, gui).
|
|
117
|
+
|
|
118
|
+
## macOS (Apple Silicon)
|
|
119
|
+
|
|
120
|
+
`pip install millet-record` on macOS 14.4+ Apple Silicon ships a
|
|
121
|
+
bundled `meet-record-mac` Swift sidecar that captures via Core Audio
|
|
122
|
+
Process Tap + AVAudioEngine — no PulseAudio, no BlackHole, no extra
|
|
123
|
+
install. `millet record` uses it by default.
|
|
124
|
+
|
|
125
|
+
> **Note:** the Swift binary itself is still named `meet-record-mac`
|
|
126
|
+
> for now — renaming would require macOS code-signing bundle-path
|
|
127
|
+
> changes that aren't worth doing as part of the package rename.
|
|
128
|
+
> Tracked as a follow-up; doesn't affect end users.
|
|
129
|
+
|
|
130
|
+
First run prompts for Microphone and System Audio Recording permissions
|
|
131
|
+
via the standard macOS TCC dialogs; both are required for full dual-
|
|
132
|
+
channel capture (mic on left, system on right). See
|
|
133
|
+
[`mac/README.md`](mac/README.md) for the sidecar's CLI surface, level
|
|
134
|
+
analysis recipes, and environment variables.
|
|
135
|
+
|
|
136
|
+
On **macOS Sequoia 15+**, Apple removed the manual `+` button from
|
|
137
|
+
System Settings → Privacy → Microphone, so users can no longer add
|
|
138
|
+
permissions before running the app. The `millet request-permissions`
|
|
139
|
+
subcommand explicitly calls `AVCaptureDevice.requestAccess(for: .audio)`
|
|
140
|
+
to trigger the TCC dialog. `millet check` will tell you which
|
|
141
|
+
permission is missing and suggest running `request-permissions`.
|
|
142
|
+
|
|
143
|
+
Set `MEET_RECORD_MAC=0` to force the legacy ffmpeg+PulseAudio path
|
|
144
|
+
(diagnostic kill switch only — that path will fail on a stock macOS
|
|
145
|
+
install because there is no PulseAudio device). Intel Macs and
|
|
146
|
+
macOS < 14.4 are unsupported.
|
|
147
|
+
|
|
148
|
+
## License
|
|
149
|
+
|
|
150
|
+
GPL-3.0-or-later, same as parent millet.
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# millet-record
|
|
2
|
+
|
|
3
|
+
Lightweight capture-only subset of [millet](https://github.com/pretyflaco/millet)
|
|
4
|
+
(formerly meetscribe-record).
|
|
5
|
+
|
|
6
|
+
Records dual-channel meeting audio — your microphone on the left
|
|
7
|
+
channel, system/remote audio on the right — into a single stereo WAV
|
|
8
|
+
via PipeWire or PulseAudio + ffmpeg. Ships none of millet's
|
|
9
|
+
transcription, diarization, summarization, or PDF dependencies;
|
|
10
|
+
install footprint is ~30 MB instead of ~3 GB.
|
|
11
|
+
|
|
12
|
+
Full release history in [`CHANGELOG.md`](CHANGELOG.md). Named after
|
|
13
|
+
the Ottoman *millet system*. Part of the
|
|
14
|
+
[vezir](https://github.com/pretyflaco/vezir) ecosystem.
|
|
15
|
+
|
|
16
|
+
## When to use which
|
|
17
|
+
|
|
18
|
+
| Need | Install |
|
|
19
|
+
|---|---|
|
|
20
|
+
| Just record audio (e.g., for [vezir](https://github.com/pretyflaco/vezir) thin clients, or local archival) | `pip install millet-record` |
|
|
21
|
+
| Record + transcribe + diarize + summarize + PDF | `pip install millet-pipeline` (depends on millet-record) |
|
|
22
|
+
|
|
23
|
+
## Install
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install millet-record
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
System deps (apt example):
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
sudo apt install ffmpeg pulseaudio-utils
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## CLI
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
millet check # verify prerequisites
|
|
39
|
+
millet devices # list audio sources
|
|
40
|
+
millet record # record dual-channel WAV; Ctrl+C to stop
|
|
41
|
+
millet archive # compress past WAV recordings to OGG/Opus
|
|
42
|
+
millet request-permissions # macOS Sequoia 15+: trigger Microphone /
|
|
43
|
+
# System Audio Recording TCC prompts
|
|
44
|
+
# (Apple removed the manual '+' button in
|
|
45
|
+
# System Settings, so apps must request)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
`millet record` writes to `~/millet-recordings/meeting-YYYYMMDD-HHMMSS/...wav`
|
|
49
|
+
unless `-o` is passed. See `millet record --help` for options.
|
|
50
|
+
|
|
51
|
+
When `millet-pipeline` is also installed, additional subcommands
|
|
52
|
+
(`transcribe`, `run`, `label`, `sync`, `gui`, ...) become available
|
|
53
|
+
under the same `millet` command via Click entry-points.
|
|
54
|
+
|
|
55
|
+
### Legacy `meet` command
|
|
56
|
+
|
|
57
|
+
The pre-rename `meet` console script keeps working for two minor
|
|
58
|
+
versions (until `millet-record 0.6.0`). It prints a deprecation
|
|
59
|
+
warning on each invocation and forwards to the `millet` group. Set
|
|
60
|
+
`MILLET_SUPPRESS_DEPRECATION=1` to silence the warning during
|
|
61
|
+
transition.
|
|
62
|
+
|
|
63
|
+
## Architecture
|
|
64
|
+
|
|
65
|
+
`millet-record` exposes a stable package `millet_record` containing:
|
|
66
|
+
|
|
67
|
+
- `millet_record.capture` — ffmpeg-backed dual-channel capture
|
|
68
|
+
(RecordingSession, watchdog, drain buffer)
|
|
69
|
+
- `millet_record.audio` — stereo channel reading + ffmpeg-based audio
|
|
70
|
+
compression
|
|
71
|
+
- `millet_record.utils` — formatting helpers (HH:MM:SS, file sizes)
|
|
72
|
+
- `millet_record.languages` — language constants used by capture flow
|
|
73
|
+
- `millet_record.cli` — `millet` console-script entry point
|
|
74
|
+
|
|
75
|
+
The legacy `meet_record` package name is still importable via a
|
|
76
|
+
`sys.modules` alias + a meta-path finder, so existing
|
|
77
|
+
`from meet_record.X import …` keeps working unchanged. Removed in
|
|
78
|
+
`millet-record 0.6.0`.
|
|
79
|
+
|
|
80
|
+
`millet-pipeline` depends on this package and re-uses these modules,
|
|
81
|
+
plus its own heavy modules (transcribe, label, voiceprint, summarize,
|
|
82
|
+
sync, pdf, gui).
|
|
83
|
+
|
|
84
|
+
## macOS (Apple Silicon)
|
|
85
|
+
|
|
86
|
+
`pip install millet-record` on macOS 14.4+ Apple Silicon ships a
|
|
87
|
+
bundled `meet-record-mac` Swift sidecar that captures via Core Audio
|
|
88
|
+
Process Tap + AVAudioEngine — no PulseAudio, no BlackHole, no extra
|
|
89
|
+
install. `millet record` uses it by default.
|
|
90
|
+
|
|
91
|
+
> **Note:** the Swift binary itself is still named `meet-record-mac`
|
|
92
|
+
> for now — renaming would require macOS code-signing bundle-path
|
|
93
|
+
> changes that aren't worth doing as part of the package rename.
|
|
94
|
+
> Tracked as a follow-up; doesn't affect end users.
|
|
95
|
+
|
|
96
|
+
First run prompts for Microphone and System Audio Recording permissions
|
|
97
|
+
via the standard macOS TCC dialogs; both are required for full dual-
|
|
98
|
+
channel capture (mic on left, system on right). See
|
|
99
|
+
[`mac/README.md`](mac/README.md) for the sidecar's CLI surface, level
|
|
100
|
+
analysis recipes, and environment variables.
|
|
101
|
+
|
|
102
|
+
On **macOS Sequoia 15+**, Apple removed the manual `+` button from
|
|
103
|
+
System Settings → Privacy → Microphone, so users can no longer add
|
|
104
|
+
permissions before running the app. The `millet request-permissions`
|
|
105
|
+
subcommand explicitly calls `AVCaptureDevice.requestAccess(for: .audio)`
|
|
106
|
+
to trigger the TCC dialog. `millet check` will tell you which
|
|
107
|
+
permission is missing and suggest running `request-permissions`.
|
|
108
|
+
|
|
109
|
+
Set `MEET_RECORD_MAC=0` to force the legacy ffmpeg+PulseAudio path
|
|
110
|
+
(diagnostic kill switch only — that path will fail on a stock macOS
|
|
111
|
+
install because there is no PulseAudio device). Intel Macs and
|
|
112
|
+
macOS < 14.4 are unsupported.
|
|
113
|
+
|
|
114
|
+
## License
|
|
115
|
+
|
|
116
|
+
GPL-3.0-or-later, same as parent millet.
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""millet-record — lightweight capture-only subset of millet (formerly meetscribe-record).
|
|
2
|
+
|
|
3
|
+
Public modules:
|
|
4
|
+
millet_record.capture — RecordingSession, dual-channel capture
|
|
5
|
+
(Linux: ffmpeg+PulseAudio; macOS 14.4+
|
|
6
|
+
arm64: meet-record-mac sidecar)
|
|
7
|
+
millet_record.audio — stereo channel reading + ffmpeg compression
|
|
8
|
+
millet_record.utils — formatting helpers
|
|
9
|
+
millet_record.languages — language constants
|
|
10
|
+
millet_record.cli — `millet` console-script entry point
|
|
11
|
+
(with deprecation-aliased `meet` for two
|
|
12
|
+
minor versions)
|
|
13
|
+
|
|
14
|
+
Named after the Ottoman millet system. Part of the vezir ecosystem.
|
|
15
|
+
|
|
16
|
+
Version is the single source of truth here; pyproject.toml's
|
|
17
|
+
[project] section pulls it dynamically via setuptools.dynamic.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
__version__ = "0.4.0"
|
|
21
|
+
|
|
22
|
+
# ── Backward-compat: meet_record alias ──────────────────────────────────────
|
|
23
|
+
# Existing code (e.g. older meetscribe-offline 0.8.3 compatibility shims,
|
|
24
|
+
# third-party scripts) imports ``from meet_record.X import …``. We register
|
|
25
|
+
# this package as both ``millet_record`` (canonical) and ``meet_record``
|
|
26
|
+
# (legacy) in sys.modules so both import paths resolve to the same package.
|
|
27
|
+
# Submodules are also aliased lazily via a MetaPathFinder so we don't pay
|
|
28
|
+
# the import cost up-front (capture pulls ffmpeg detection on Linux).
|
|
29
|
+
#
|
|
30
|
+
# Removed in millet-record 0.6.0 (matches the `meet` console-script
|
|
31
|
+
# deprecation timeline).
|
|
32
|
+
import sys as _sys
|
|
33
|
+
import importlib as _importlib
|
|
34
|
+
import importlib.abc as _abc
|
|
35
|
+
import importlib.machinery as _machinery
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class _MeetRecordAliasFinder(_abc.MetaPathFinder):
|
|
39
|
+
"""Resolve ``meet_record`` and ``meet_record.X`` to ``millet_record[.X]``."""
|
|
40
|
+
|
|
41
|
+
def find_spec(self, fullname, path, target=None):
|
|
42
|
+
if fullname == "meet_record":
|
|
43
|
+
return _machinery.ModuleSpec(
|
|
44
|
+
fullname,
|
|
45
|
+
loader=_MeetRecordAliasLoader(),
|
|
46
|
+
is_package=True,
|
|
47
|
+
)
|
|
48
|
+
if fullname.startswith("meet_record."):
|
|
49
|
+
new_name = "millet_record." + fullname[len("meet_record."):]
|
|
50
|
+
try:
|
|
51
|
+
mod = _importlib.import_module(new_name)
|
|
52
|
+
except ImportError:
|
|
53
|
+
return None
|
|
54
|
+
_sys.modules[fullname] = mod
|
|
55
|
+
return _machinery.ModuleSpec(
|
|
56
|
+
fullname,
|
|
57
|
+
loader=_MeetRecordAliasLoader(),
|
|
58
|
+
is_package=hasattr(mod, "__path__"),
|
|
59
|
+
)
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class _MeetRecordAliasLoader(_abc.Loader):
|
|
64
|
+
def create_module(self, spec):
|
|
65
|
+
if spec.name == "meet_record":
|
|
66
|
+
return _sys.modules[__name__]
|
|
67
|
+
new_name = "millet_record." + spec.name[len("meet_record."):]
|
|
68
|
+
return _sys.modules.get(new_name) or _importlib.import_module(new_name)
|
|
69
|
+
|
|
70
|
+
def exec_module(self, module): # noqa: D401 - aliased; nothing to exec
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
_sys.modules.setdefault("meet_record", _sys.modules[__name__])
|
|
75
|
+
if not any(isinstance(f, _MeetRecordAliasFinder) for f in _sys.meta_path):
|
|
76
|
+
_sys.meta_path.append(_MeetRecordAliasFinder())
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# Ignore everything in this directory except this file.
|
|
2
|
+
#
|
|
3
|
+
# meet_record/_bin/ holds the macOS arm64 meet-record-mac binary in
|
|
4
|
+
# the macOS wheel. The binary itself is built by mac.yml in CI and
|
|
5
|
+
# copied here at wheel-build time by release.yml; it's never
|
|
6
|
+
# committed. This .gitignore both keeps the directory present in the
|
|
7
|
+
# tree (so setuptools.package-data can reference _bin/* during build)
|
|
8
|
+
# and prevents accidental commits of a locally-built artefact.
|
|
9
|
+
|
|
10
|
+
*
|
|
11
|
+
!.gitignore
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""Audio utilities for meetscribe.
|
|
2
|
+
|
|
3
|
+
Low-level helpers for reading stereo audio files, computing per-speaker
|
|
4
|
+
channel energy, and compressing recordings.
|
|
5
|
+
|
|
6
|
+
Extracted from label.py and transcribe.py to eliminate duplication.
|
|
7
|
+
All I/O uses ffmpeg/ffprobe (via subprocess) so that any audio format
|
|
8
|
+
supported by ffmpeg (WAV, OGG/Opus, FLAC, …) can be read transparently.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
import subprocess
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import NamedTuple
|
|
18
|
+
|
|
19
|
+
import numpy as np
|
|
20
|
+
|
|
21
|
+
log = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class StereoChannels(NamedTuple):
|
|
25
|
+
"""Parsed stereo audio data returned by :func:`read_stereo_channels`."""
|
|
26
|
+
|
|
27
|
+
mic: np.ndarray # Left channel (your microphone), float32
|
|
28
|
+
system: np.ndarray # Right channel (system/remote audio), float32
|
|
29
|
+
sample_rate: int # Frames per second
|
|
30
|
+
sampwidth: int # Bytes per sample (always 2 — decoded to int16)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def read_stereo_channels(audio_path: Path) -> StereoChannels | None:
|
|
34
|
+
"""Read a stereo audio file and return separate mic and system channels.
|
|
35
|
+
|
|
36
|
+
Uses ffmpeg to decode to raw PCM, so any format ffmpeg supports
|
|
37
|
+
(WAV, OGG/Opus, FLAC, …) works transparently.
|
|
38
|
+
|
|
39
|
+
Returns None (instead of raising) if the file is mono, cannot be
|
|
40
|
+
opened, or decoding fails. Callers should fall back to a safe
|
|
41
|
+
default in that case.
|
|
42
|
+
|
|
43
|
+
The returned arrays are float32 copies — safe to modify.
|
|
44
|
+
"""
|
|
45
|
+
# Probe channel count first.
|
|
46
|
+
probe_cmd = [
|
|
47
|
+
"ffprobe", "-v", "quiet",
|
|
48
|
+
"-show_entries", "stream=channels,sample_rate",
|
|
49
|
+
"-of", "json",
|
|
50
|
+
str(audio_path),
|
|
51
|
+
]
|
|
52
|
+
try:
|
|
53
|
+
probe = subprocess.run(probe_cmd, capture_output=True, text=True)
|
|
54
|
+
if probe.returncode != 0:
|
|
55
|
+
return None
|
|
56
|
+
info = json.loads(probe.stdout)
|
|
57
|
+
stream = info.get("streams", [{}])[0]
|
|
58
|
+
n_channels = int(stream.get("channels", 0))
|
|
59
|
+
sample_rate = int(stream.get("sample_rate", 0))
|
|
60
|
+
except Exception:
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
if n_channels != 2 or sample_rate == 0:
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
# Decode full file to raw s16le PCM via ffmpeg.
|
|
67
|
+
decode_cmd = [
|
|
68
|
+
"ffmpeg", "-v", "quiet",
|
|
69
|
+
"-i", str(audio_path),
|
|
70
|
+
"-f", "s16le",
|
|
71
|
+
"-acodec", "pcm_s16le",
|
|
72
|
+
"-ar", str(sample_rate),
|
|
73
|
+
"-ac", "2",
|
|
74
|
+
"-", # write to stdout
|
|
75
|
+
]
|
|
76
|
+
try:
|
|
77
|
+
result = subprocess.run(decode_cmd, capture_output=True)
|
|
78
|
+
if result.returncode != 0:
|
|
79
|
+
return None
|
|
80
|
+
raw = result.stdout
|
|
81
|
+
except Exception:
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
if len(raw) == 0:
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
samples = np.frombuffer(raw, dtype=np.int16)
|
|
88
|
+
if len(samples) % 2 != 0:
|
|
89
|
+
samples = samples[:-1]
|
|
90
|
+
samples = samples.reshape(-1, 2).astype(np.float32)
|
|
91
|
+
|
|
92
|
+
return StereoChannels(
|
|
93
|
+
mic=samples[:, 0],
|
|
94
|
+
system=samples[:, 1],
|
|
95
|
+
sample_rate=sample_rate,
|
|
96
|
+
sampwidth=2,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ─── Audio compression ─────────────────────────────────────────────────────
|
|
101
|
+
|
|
102
|
+
def _get_audio_duration(path: Path) -> float | None:
|
|
103
|
+
"""Return duration in seconds via ffprobe, or None on failure."""
|
|
104
|
+
cmd = [
|
|
105
|
+
"ffprobe", "-v", "quiet",
|
|
106
|
+
"-show_entries", "format=duration",
|
|
107
|
+
"-of", "csv=p=0",
|
|
108
|
+
str(path),
|
|
109
|
+
]
|
|
110
|
+
try:
|
|
111
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
112
|
+
if result.returncode == 0 and result.stdout.strip():
|
|
113
|
+
return float(result.stdout.strip())
|
|
114
|
+
except Exception:
|
|
115
|
+
pass
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def compress_audio(
|
|
120
|
+
wav_path: Path,
|
|
121
|
+
*,
|
|
122
|
+
keep_wav: bool = False,
|
|
123
|
+
bitrate: str = "48k",
|
|
124
|
+
) -> Path:
|
|
125
|
+
"""Compress a WAV file to OGG/Opus and optionally delete the original.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
wav_path: Path to the stereo WAV recording.
|
|
129
|
+
keep_wav: If True, keep the WAV file after compression.
|
|
130
|
+
bitrate: Opus bitrate (default 48k — transparent for speech).
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Path to the compressed .ogg file.
|
|
134
|
+
|
|
135
|
+
Raises:
|
|
136
|
+
RuntimeError: If ffmpeg fails or duration validation fails.
|
|
137
|
+
FileNotFoundError: If the WAV file does not exist.
|
|
138
|
+
"""
|
|
139
|
+
wav_path = Path(wav_path)
|
|
140
|
+
if not wav_path.exists():
|
|
141
|
+
raise FileNotFoundError(f"Audio file not found: {wav_path}")
|
|
142
|
+
|
|
143
|
+
ogg_path = wav_path.with_suffix(".ogg")
|
|
144
|
+
|
|
145
|
+
cmd = [
|
|
146
|
+
"ffmpeg", "-y", "-v", "quiet",
|
|
147
|
+
"-i", str(wav_path),
|
|
148
|
+
"-c:a", "libopus",
|
|
149
|
+
"-b:a", bitrate,
|
|
150
|
+
"-vn",
|
|
151
|
+
str(ogg_path),
|
|
152
|
+
]
|
|
153
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
154
|
+
if result.returncode != 0:
|
|
155
|
+
# Clean up partial output.
|
|
156
|
+
ogg_path.unlink(missing_ok=True)
|
|
157
|
+
raise RuntimeError(
|
|
158
|
+
f"Audio compression failed (ffmpeg exit {result.returncode}): "
|
|
159
|
+
f"{result.stderr.strip()}"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Validate: durations must match within 1 second.
|
|
163
|
+
wav_dur = _get_audio_duration(wav_path)
|
|
164
|
+
ogg_dur = _get_audio_duration(ogg_path)
|
|
165
|
+
if wav_dur is not None and ogg_dur is not None:
|
|
166
|
+
if abs(wav_dur - ogg_dur) > 1.0:
|
|
167
|
+
ogg_path.unlink(missing_ok=True)
|
|
168
|
+
raise RuntimeError(
|
|
169
|
+
f"Duration mismatch after compression: WAV={wav_dur:.1f}s "
|
|
170
|
+
f"vs OGG={ogg_dur:.1f}s (diff > 1s)"
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Gather sizes for logging before potentially deleting the WAV.
|
|
174
|
+
wav_size = wav_path.stat().st_size
|
|
175
|
+
ogg_size = ogg_path.stat().st_size
|
|
176
|
+
ratio = wav_size / ogg_size if ogg_size > 0 else 0
|
|
177
|
+
|
|
178
|
+
if not keep_wav:
|
|
179
|
+
wav_path.unlink()
|
|
180
|
+
log.info("Deleted %s after compression", wav_path.name)
|
|
181
|
+
|
|
182
|
+
log.info(
|
|
183
|
+
"Compressed %s -> %s (%.1f MB -> %.1f MB, %.0fx)",
|
|
184
|
+
wav_path.name, ogg_path.name,
|
|
185
|
+
wav_size / 1_048_576, ogg_size / 1_048_576, ratio,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
return ogg_path
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def compute_speaker_channel_energy(
|
|
192
|
+
mic_ch: np.ndarray,
|
|
193
|
+
sys_ch: np.ndarray,
|
|
194
|
+
segments: list, # list[Segment] — avoid circular import
|
|
195
|
+
sample_rate: int,
|
|
196
|
+
) -> dict[str, float]:
|
|
197
|
+
"""Compute the mic-channel energy ratio for each speaker.
|
|
198
|
+
|
|
199
|
+
For each speaker, accumulates RMS energy on the mic channel and on
|
|
200
|
+
the system channel across all their segments, then returns a dict
|
|
201
|
+
mapping ``speaker_id -> mic_ratio`` where::
|
|
202
|
+
|
|
203
|
+
mic_ratio = avg_mic_rms / (avg_mic_rms + avg_sys_rms)
|
|
204
|
+
|
|
205
|
+
A ratio > 0.5 means the speaker is dominant on the mic (i.e. YOU).
|
|
206
|
+
Speakers with no audio frames get a ratio of 0.5 (unknown).
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
mic_ch: Float32 array of left-channel (mic) samples.
|
|
210
|
+
sys_ch: Float32 array of right-channel (system) samples.
|
|
211
|
+
segments: List of Segment objects with .start, .end, .speaker.
|
|
212
|
+
sample_rate: Frames per second (used to convert timestamps to indices).
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Dict mapping speaker ID to mic-ratio float in [0.0, 1.0].
|
|
216
|
+
"""
|
|
217
|
+
n = len(mic_ch)
|
|
218
|
+
mic_energy: dict[str, float] = {}
|
|
219
|
+
sys_energy: dict[str, float] = {}
|
|
220
|
+
total_frames: dict[str, int] = {}
|
|
221
|
+
|
|
222
|
+
for seg in segments:
|
|
223
|
+
if not seg.speaker:
|
|
224
|
+
continue
|
|
225
|
+
start = max(0, min(int(seg.start * sample_rate), n))
|
|
226
|
+
end = max(0, min(int(seg.end * sample_rate), n))
|
|
227
|
+
if end <= start:
|
|
228
|
+
continue
|
|
229
|
+
|
|
230
|
+
mic_slice = mic_ch[start:end]
|
|
231
|
+
sys_slice = sys_ch[start:end]
|
|
232
|
+
count = end - start
|
|
233
|
+
|
|
234
|
+
mic_rms = float(np.sqrt(np.mean(mic_slice ** 2)))
|
|
235
|
+
sys_rms = float(np.sqrt(np.mean(sys_slice ** 2)))
|
|
236
|
+
|
|
237
|
+
spk = seg.speaker
|
|
238
|
+
mic_energy[spk] = mic_energy.get(spk, 0.0) + mic_rms * count
|
|
239
|
+
sys_energy[spk] = sys_energy.get(spk, 0.0) + sys_rms * count
|
|
240
|
+
total_frames[spk] = total_frames.get(spk, 0) + count
|
|
241
|
+
|
|
242
|
+
mic_ratio: dict[str, float] = {}
|
|
243
|
+
for spk, frames in total_frames.items():
|
|
244
|
+
if frames == 0:
|
|
245
|
+
mic_ratio[spk] = 0.5
|
|
246
|
+
continue
|
|
247
|
+
avg_mic = mic_energy.get(spk, 0.0) / frames
|
|
248
|
+
avg_sys = sys_energy.get(spk, 0.0) / frames
|
|
249
|
+
denom = avg_mic + avg_sys
|
|
250
|
+
mic_ratio[spk] = avg_mic / denom if denom > 0 else 0.5
|
|
251
|
+
|
|
252
|
+
return mic_ratio
|