bitrater 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bitrater-0.1.1/.github/workflows/publish.yml +50 -0
- bitrater-0.1.1/.gitignore +68 -0
- bitrater-0.1.1/LICENSE +21 -0
- bitrater-0.1.1/MODEL_CARD.md +155 -0
- bitrater-0.1.1/PKG-INFO +209 -0
- bitrater-0.1.1/README.md +163 -0
- bitrater-0.1.1/beetsplug/__init__.py +0 -0
- bitrater-0.1.1/beetsplug/bitrater/__init__.py +9 -0
- bitrater-0.1.1/beetsplug/bitrater/plugin.py +406 -0
- bitrater-0.1.1/bitrater/__init__.py +12 -0
- bitrater-0.1.1/bitrater/_threading.py +39 -0
- bitrater-0.1.1/bitrater/analyzer.py +217 -0
- bitrater-0.1.1/bitrater/cli.py +85 -0
- bitrater-0.1.1/bitrater/constants.py +80 -0
- bitrater-0.1.1/bitrater/cutoff_detector.py +215 -0
- bitrater-0.1.1/bitrater/dl_inference.py +257 -0
- bitrater-0.1.1/bitrater/dl_model.py +169 -0
- bitrater-0.1.1/bitrater/feature_cache.py +259 -0
- bitrater-0.1.1/bitrater/file_analyzer.py +170 -0
- bitrater-0.1.1/bitrater/models/stage1_cnn.onnx +0 -0
- bitrater-0.1.1/bitrater/models/stage2_seq.onnx +0 -0
- bitrater-0.1.1/bitrater/spectrum.py +727 -0
- bitrater-0.1.1/bitrater/transcode.py +703 -0
- bitrater-0.1.1/bitrater/transcode_detector.py +66 -0
- bitrater-0.1.1/bitrater/types.py +118 -0
- bitrater-0.1.1/pyproject.toml +123 -0
- bitrater-0.1.1/tests/__init__.py +0 -0
- bitrater-0.1.1/tests/conftest.py +204 -0
- bitrater-0.1.1/tests/test_analyzer.py +111 -0
- bitrater-0.1.1/tests/test_analyzer_coverage.py +258 -0
- bitrater-0.1.1/tests/test_cli.py +169 -0
- bitrater-0.1.1/tests/test_cutoff_detector.py +138 -0
- bitrater-0.1.1/tests/test_dl_inference.py +83 -0
- bitrater-0.1.1/tests/test_feature_cache.py +215 -0
- bitrater-0.1.1/tests/test_feature_cache_format.py +106 -0
- bitrater-0.1.1/tests/test_file_analyzer.py +235 -0
- bitrater-0.1.1/tests/test_file_analyzer_coverage.py +203 -0
- bitrater-0.1.1/tests/test_integration.py +60 -0
- bitrater-0.1.1/tests/test_plugin.py +312 -0
- bitrater-0.1.1/tests/test_plugin_no_print.py +42 -0
- bitrater-0.1.1/tests/test_psd_cache.py +78 -0
- bitrater-0.1.1/tests/test_spectrum.py +336 -0
- bitrater-0.1.1/tests/test_spectrum_coverage.py +267 -0
- bitrater-0.1.1/tests/test_threading.py +91 -0
- bitrater-0.1.1/tests/test_transcode.py +707 -0
- bitrater-0.1.1/tests/test_transcode_detector.py +138 -0
- bitrater-0.1.1/uv.lock +2148 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build:
|
|
10
|
+
name: Build distribution
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- name: Set up Python
|
|
16
|
+
uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: "3.12"
|
|
19
|
+
|
|
20
|
+
- name: Install build tools
|
|
21
|
+
run: pip install build
|
|
22
|
+
|
|
23
|
+
- name: Build package
|
|
24
|
+
run: python -m build
|
|
25
|
+
|
|
26
|
+
- name: Upload distribution artifacts
|
|
27
|
+
uses: actions/upload-artifact@v4
|
|
28
|
+
with:
|
|
29
|
+
name: dist
|
|
30
|
+
path: dist/
|
|
31
|
+
|
|
32
|
+
publish:
|
|
33
|
+
name: Publish to PyPI
|
|
34
|
+
needs: build
|
|
35
|
+
runs-on: ubuntu-latest
|
|
36
|
+
environment:
|
|
37
|
+
name: pypi
|
|
38
|
+
url: https://pypi.org/p/bitrater
|
|
39
|
+
permissions:
|
|
40
|
+
id-token: write # Required for OIDC trusted publishing
|
|
41
|
+
|
|
42
|
+
steps:
|
|
43
|
+
- name: Download distribution artifacts
|
|
44
|
+
uses: actions/download-artifact@v4
|
|
45
|
+
with:
|
|
46
|
+
name: dist
|
|
47
|
+
path: dist/
|
|
48
|
+
|
|
49
|
+
- name: Publish to PyPI
|
|
50
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
develop-eggs/
|
|
9
|
+
dist/
|
|
10
|
+
downloads/
|
|
11
|
+
eggs/
|
|
12
|
+
.eggs/
|
|
13
|
+
lib/
|
|
14
|
+
lib64/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
*.egg-info/
|
|
20
|
+
.installed.cfg
|
|
21
|
+
*.egg
|
|
22
|
+
MANIFEST
|
|
23
|
+
|
|
24
|
+
# Virtual Environment
|
|
25
|
+
.venv/
|
|
26
|
+
venv/
|
|
27
|
+
ENV/
|
|
28
|
+
|
|
29
|
+
# IDE
|
|
30
|
+
.idea/
|
|
31
|
+
.vscode/
|
|
32
|
+
*.swp
|
|
33
|
+
*.swo
|
|
34
|
+
|
|
35
|
+
# Testing
|
|
36
|
+
.coverage
|
|
37
|
+
htmlcov/
|
|
38
|
+
.pytest_cache/
|
|
39
|
+
.tox/
|
|
40
|
+
|
|
41
|
+
# Misc
|
|
42
|
+
.DS_Store
|
|
43
|
+
|
|
44
|
+
# AI dev tools
|
|
45
|
+
.claude/
|
|
46
|
+
.serena/
|
|
47
|
+
.worktrees/
|
|
48
|
+
.mcp.json
|
|
49
|
+
CLAUDE.md
|
|
50
|
+
|
|
51
|
+
# Development docs and research papers
|
|
52
|
+
docs/
|
|
53
|
+
documents/
|
|
54
|
+
images/
|
|
55
|
+
|
|
56
|
+
# Training data (large files, user-provided)
|
|
57
|
+
training_data/lossless/
|
|
58
|
+
training_data/encoded/
|
|
59
|
+
training_data/cache/
|
|
60
|
+
training_data/analysis_results/
|
|
61
|
+
training_data/logs/
|
|
62
|
+
|
|
63
|
+
# Log files
|
|
64
|
+
encoding_log_*.txt
|
|
65
|
+
*.log
|
|
66
|
+
|
|
67
|
+
# VS Code workspace
|
|
68
|
+
*.code-workspace
|
bitrater-0.1.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 yamsnjams
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# Model Card: Bitrater Deep Learning Classifier
|
|
2
|
+
|
|
3
|
+
## Model Details
|
|
4
|
+
|
|
5
|
+
- **Architecture**: Two-stage CNN + BiLSTM with multi-head attention
|
|
6
|
+
- **Task**: 7-class audio bitrate classification
|
|
7
|
+
- **Version**: 1.0
|
|
8
|
+
- **Files**: `bitrater/models/stage1_cnn.onnx` (1.7 MB), `bitrater/models/stage2_seq.onnx` (3.1 MB)
|
|
9
|
+
- **Inference**: ONNX Runtime (no PyTorch required)
|
|
10
|
+
- **Training**: PyTorch (optional `[training]` extra)
|
|
11
|
+
- **License**: MIT (same as project)
|
|
12
|
+
|
|
13
|
+
## Architecture
|
|
14
|
+
|
|
15
|
+
### Stage 1: CNN Feature Extractor (~430K parameters)
|
|
16
|
+
|
|
17
|
+
Extracts a 128-dimensional feature vector from each 2-second spectrogram window.
|
|
18
|
+
|
|
19
|
+
| Layer | Output Shape | Notes |
|
|
20
|
+
|-------|-------------|-------|
|
|
21
|
+
| Conv2d(1, 32) + BN + ReLU + MaxPool2d(2) | (32, 64, T/2) | |
|
|
22
|
+
| Conv2d(32, 64) + BN + ReLU + MaxPool2d(2) | (64, 32, T/4) | |
|
|
23
|
+
| Conv2d(64, 128) + BN + ReLU + MaxPool2d(2) | (128, 16, T/8) | |
|
|
24
|
+
| Conv2d(128, 256) + BN + ReLU + MaxPool2d(2) | (256, 8, T/16) | |
|
|
25
|
+
| AdaptiveAvgPool2d(1) + Linear(256, 128) | (128,) | Feature vector |
|
|
26
|
+
|
|
27
|
+
Dropout (p=0.2) after each conv block.
|
|
28
|
+
|
|
29
|
+
### Stage 2: Sequence Classifier (~700K parameters)
|
|
30
|
+
|
|
31
|
+
Processes sequences of 48 CNN feature vectors with auxiliary features.
|
|
32
|
+
|
|
33
|
+
- **BiLSTM**: 2-layer, hidden_dim=128, bidirectional (output: 256-dim per timestep)
|
|
34
|
+
- **Multi-head attention pooling**: 4 heads over LSTM outputs
|
|
35
|
+
- **Auxiliary input**: 211-dim vector (181 SVM spectral features + 30 global modulation DCT features)
|
|
36
|
+
- **Classifier head**: Linear(256+211, 256) -> ReLU -> Linear(256, 7)
|
|
37
|
+
- **VBR auxiliary head**: Linear(256, 32) -> ReLU -> Linear(32, 1)
|
|
38
|
+
|
|
39
|
+
### Input Representation
|
|
40
|
+
|
|
41
|
+
**Dual-band spectrogram** (128 frequency bins x T time frames):
|
|
42
|
+
- Bins 0-63: Mel-scale filterbank, 0-16 kHz (captures overall spectral content)
|
|
43
|
+
- Bins 64-127: Linear-scale, 16-22 kHz with ~94 Hz resolution (captures MP3 compression cutoffs)
|
|
44
|
+
|
|
45
|
+
This gives 3x better frequency resolution in the critical 16-22 kHz region compared to a standard 128-bin mel spectrogram, enabling precise detection of:
|
|
46
|
+
- CBR 192 cutoff at ~18.6 kHz
|
|
47
|
+
- CBR 320 cutoff at ~20.5 kHz
|
|
48
|
+
- VBR's variable cutoff behavior vs CBR's fixed cutoff
|
|
49
|
+
|
|
50
|
+
### Inference Pipeline
|
|
51
|
+
|
|
52
|
+
For each file:
|
|
53
|
+
1. Load audio at 44.1 kHz mono
|
|
54
|
+
2. Compute shared STFT (n_fft=2048, hop=512)
|
|
55
|
+
3. Extract dual-band spectrograms for 2-second windows (1-second hop)
|
|
56
|
+
4. CNN forward pass on all needed windows -> 128-dim feature vectors
|
|
57
|
+
5. Extract global modulation features (2D DCT over log-mel, 30 coefficients)
|
|
58
|
+
6. Extract SVM spectral features (181-dim PSD/cutoff/temporal/artifact vector)
|
|
59
|
+
7. Sliding window over sequences of 48 CNN features + auxiliary features
|
|
60
|
+
8. Average softmax probabilities across all sequences -> final prediction
|
|
61
|
+
|
|
62
|
+
Typical inference time: 2-5 seconds per file on CPU.
|
|
63
|
+
|
|
64
|
+
## Training
|
|
65
|
+
|
|
66
|
+
### Dataset
|
|
67
|
+
|
|
68
|
+
Training data was generated by transcoding a collection of ~2,400 lossless FLAC files (85 GB) to MP3 at each target bitrate using LAME:
|
|
69
|
+
|
|
70
|
+
| Class | Encoding | Files | Description |
|
|
71
|
+
|-------|----------|-------|-------------|
|
|
72
|
+
| 128 | `lame -b 128` | ~2,400 | CBR 128 kbps |
|
|
73
|
+
| V2 | `lame -V 2` | ~2,400 | VBR preset 2 (~170-210 kbps) |
|
|
74
|
+
| 192 | `lame -b 192` | ~2,400 | CBR 192 kbps |
|
|
75
|
+
| V0 | `lame -V 0` | ~2,400 | VBR preset 0 (~220-260 kbps) |
|
|
76
|
+
| 256 | `lame -b 256` | ~2,400 | CBR 256 kbps |
|
|
77
|
+
| 320 | `lame -b 320` | ~2,400 | CBR 320 kbps |
|
|
78
|
+
| LOSSLESS | (original) | ~2,400 | Source FLAC files |
|
|
79
|
+
|
|
80
|
+
Total: ~16,800 files across 7 classes. Split: 80% train, 10% validation, 10% test.
|
|
81
|
+
|
|
82
|
+
Source material spans diverse genres and recording conditions.
|
|
83
|
+
|
|
84
|
+
### Training Procedure
|
|
85
|
+
|
|
86
|
+
**Stage 1 (CNN warmup):**
|
|
87
|
+
- Train `WindowClassifier` on individual 2-second spectrogram windows
|
|
88
|
+
- 25 epochs, OneCycleLR (max_lr=1e-3), batch size 64
|
|
89
|
+
- Focal loss (gamma=2.0) with class weighting [1.0, 2.0, 2.0, 1.5, 1.0, 1.0, 1.0]
|
|
90
|
+
- Label smoothing (0.05)
|
|
91
|
+
- Max 10 random windows sampled per file per epoch
|
|
92
|
+
|
|
93
|
+
**Stage 2 (sequence training):**
|
|
94
|
+
- Freeze CNN, train `SequenceClassifier` on sequences of 48 CNN feature vectors
|
|
95
|
+
- 60 epochs, OneCycleLR (max_lr=3e-4), batch size 32
|
|
96
|
+
- Same focal loss and class weighting as Stage 1
|
|
97
|
+
- 4 random sequences sampled per file per epoch
|
|
98
|
+
- Early stopping with patience 15 on validation accuracy
|
|
99
|
+
|
|
100
|
+
**Hardware**: Trained on NVIDIA GTX 1660 Super (6 GB VRAM), FP32 only (no Tensor Cores).
|
|
101
|
+
|
|
102
|
+
### Key Design Decisions
|
|
103
|
+
|
|
104
|
+
- **Dual-band filterbank**: Standard mel spectrograms lose resolution above 16 kHz. The linear high-frequency band provides precise cutoff detection.
|
|
105
|
+
- **Two-stage training**: Separates representation learning (CNN on windows) from temporal reasoning (BiLSTM on sequences).
|
|
106
|
+
- **Focal loss with class weighting**: V2 and 192 kbps are inherently hard to separate (overlapping bitrate ranges), so they receive 2x class weight.
|
|
107
|
+
- **SVM features as auxiliary input**: The hand-crafted SVM features (PSD bands, cutoff detection, temporal statistics) provide complementary information to the learned CNN features.
|
|
108
|
+
- **Global modulation features**: 2D DCT over the full-file spectrogram captures long-range spectral patterns that distinguish VBR's gradually varying lowpass filter from CBR's fixed cutoff.
|
|
109
|
+
- **Variance features disabled**: Ablation study showed temporal variance/range features did not improve accuracy when SVM+global modulation features are present.
|
|
110
|
+
|
|
111
|
+
## Performance
|
|
112
|
+
|
|
113
|
+
**Overall accuracy: 98.42%** (file-level evaluation on held-out test set, 1,391 files)
|
|
114
|
+
|
|
115
|
+
### Per-Class Metrics
|
|
116
|
+
|
|
117
|
+
| Class | Precision | Recall | F1 Score | Support |
|
|
118
|
+
|-------|-----------|--------|----------|---------|
|
|
119
|
+
| 128 | 98.5% | 100.0% | 99.2% | 195 |
|
|
120
|
+
| V2 | 98.4% | 97.3% | 97.8% | 184 |
|
|
121
|
+
| 192 | 97.7% | 98.2% | 97.9% | 219 |
|
|
122
|
+
| V0 | 98.1% | 97.7% | 97.9% | 217 |
|
|
123
|
+
| 256 | 99.4% | 99.4% | 99.4% | 170 |
|
|
124
|
+
| 320 | 97.7% | 98.6% | 98.1% | 213 |
|
|
125
|
+
| LOSSLESS | 99.5% | 97.9% | 98.7% | 193 |
|
|
126
|
+
|
|
127
|
+
### Confusion Matrix
|
|
128
|
+
|
|
129
|
+
Rows are true labels, columns are predictions.
|
|
130
|
+
|
|
131
|
+
| | 128 | V2 | 192 | V0 | 256 | 320 | LOSSLESS |
|
|
132
|
+
|----------|----:|---:|----:|---:|----:|----:|---------:|
|
|
133
|
+
| **128** | 195 | 0 | 0 | 0 | 0 | 0 | 0 |
|
|
134
|
+
| **V2** | 1 | 179 | 3 | 1 | 0 | 0 | 0 |
|
|
135
|
+
| **192** | 1 | 3 | 215 | 0 | 0 | 0 | 0 |
|
|
136
|
+
| **V0** | 0 | 0 | 0 | 212 | 0 | 4 | 1 |
|
|
137
|
+
| **256** | 0 | 0 | 1 | 0 | 169 | 0 | 0 |
|
|
138
|
+
| **320** | 1 | 0 | 0 | 1 | 1 | 210 | 0 |
|
|
139
|
+
| **LOSSLESS** | 0 | 0 | 1 | 2 | 0 | 1 | 189 |
|
|
140
|
+
|
|
141
|
+
Total errors: 22/1,391 (1.58%). V2/192 cross-confusion: 6 files.
|
|
142
|
+
|
|
143
|
+
### Known Limitations
|
|
144
|
+
|
|
145
|
+
- **V2/192 confusion**: These two classes have the most overlap — V2 VBR averages 170-210 kbps with a variable lowpass filter, while CBR 192 has a fixed ~18.6 kHz cutoff. The model achieves ~97.8% F1 on both, but some edge cases remain.
|
|
146
|
+
- **LAME encoder only**: Training data was generated with LAME. Other MP3 encoders (FhG, Fraunhofer) may produce different spectral signatures.
|
|
147
|
+
- **44.1 kHz sample rate**: Files must be 44.1 kHz for accurate analysis. Other sample rates will be resampled by librosa.
|
|
148
|
+
- **Minimum file length**: Files shorter than ~50 seconds may have fewer sequences for aggregation, potentially reducing confidence.
|
|
149
|
+
|
|
150
|
+
## Research Foundation
|
|
151
|
+
|
|
152
|
+
- **SVM baseline**: D'Alessandro & Shi (2009), "MP3 Bit Rate Quality Detection through Frequency Spectrum Analysis"
|
|
153
|
+
- **CNN approach**: Informed by Hennequin et al. (2017, Deezer/ICASSP) and Seichter et al. (2016, Fraunhofer)
|
|
154
|
+
- **Global modulation**: Inspired by "Generalized Spoofing Detection Inspired from Audio Generation Artifacts" (2021)
|
|
155
|
+
- **VBR vs CBR discrimination at similar bitrates**: Novel contribution — no published work addresses this specific problem
|
bitrater-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bitrater
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Audio quality analysis and bitrate detection, with optional beets plugin
|
|
5
|
+
Project-URL: Homepage, https://github.com/yamsnjams/bitrater
|
|
6
|
+
Project-URL: Repository, https://github.com/yamsnjams/bitrater
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/yamsnjams/bitrater/issues
|
|
8
|
+
Author-email: yamsnjams <yamsnjams@protonmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: audio,beets,bitrate,mp3,spectral-analysis,transcoding
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
23
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Analysis
|
|
24
|
+
Requires-Python: <3.14,>=3.10
|
|
25
|
+
Requires-Dist: joblib>=1.0.0
|
|
26
|
+
Requires-Dist: librosa>=0.10.0
|
|
27
|
+
Requires-Dist: mutagen>=1.45.0
|
|
28
|
+
Requires-Dist: numpy>=1.20.0
|
|
29
|
+
Requires-Dist: onnxruntime>=1.16.0
|
|
30
|
+
Requires-Dist: scikit-learn>=1.0.0
|
|
31
|
+
Requires-Dist: scipy>=1.7.0
|
|
32
|
+
Requires-Dist: tqdm>=4.60.0
|
|
33
|
+
Provides-Extra: beets
|
|
34
|
+
Requires-Dist: beets>=1.6.0; extra == 'beets'
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: black>=22.0.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: flake8>=4.0.0; extra == 'dev'
|
|
38
|
+
Requires-Dist: isort>=5.10.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
40
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
41
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
42
|
+
Provides-Extra: training
|
|
43
|
+
Requires-Dist: torch>=2.0.0; extra == 'training'
|
|
44
|
+
Requires-Dist: torchaudio>=2.0.0; extra == 'training'
|
|
45
|
+
Description-Content-Type: text/markdown
|
|
46
|
+
|
|
47
|
+
# bitrater
|
|
48
|
+
|
|
49
|
+
Audio quality analysis and bitrate detection for audio files. Detects the true encoding quality of MP3, FLAC, WAV, AAC, and other formats using spectral analysis and deep learning. Identifies transcodes (e.g., a 128 kbps source re-encoded as 320 kbps MP3 or upsampled to FLAC) and verifies lossless files are genuinely lossless.
|
|
50
|
+
|
|
51
|
+
Available as a **standalone CLI tool** or as a **[beets](https://beets.io/) plugin**.
|
|
52
|
+
|
|
53
|
+
## Features
|
|
54
|
+
|
|
55
|
+
- **7-class bitrate classification**: 128, 192, 256, 320 kbps CBR, V0/V2 VBR presets, and lossless (FLAC/WAV/AIFF)
|
|
56
|
+
- **Lossless verification**: confirms whether lossless files are truly lossless or transcodes from lossy sources
|
|
57
|
+
- **Transcode detection**: identifies files whose stated bitrate doesn't match their true encoding quality
|
|
58
|
+
- **Pre-trained deep learning model**: ships with a CNN+BiLSTM model achieving 98.4% accuracy
|
|
59
|
+
- **Confidence scoring**: every prediction includes a confidence score
|
|
60
|
+
- **Feature caching**: thread-safe NPZ cache avoids redundant spectral analysis
|
|
61
|
+
- **Parallel processing**: multi-threaded analysis via joblib
|
|
62
|
+
|
|
63
|
+
## Installation
|
|
64
|
+
|
|
65
|
+
Requires Python 3.10+ and [FFmpeg](https://ffmpeg.org/).
|
|
66
|
+
|
|
67
|
+
### Standalone (no beets)
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install bitrater
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### With beets plugin
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install "bitrater[beets]"
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Then enable the plugin in your beets config (`~/.config/beets/config.yaml`):
|
|
80
|
+
|
|
81
|
+
```yaml
|
|
82
|
+
plugins: bitrater
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### From source (with uv)
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
git clone https://github.com/yamsnjams/bitrater.git
|
|
89
|
+
cd bitrater
|
|
90
|
+
uv sync # standalone
|
|
91
|
+
uv sync --all-extras # with beets + training + dev dependencies
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Quick Start
|
|
95
|
+
|
|
96
|
+
### Standalone CLI
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
# Analyze a single file
|
|
100
|
+
bitrater analyze song.mp3
|
|
101
|
+
|
|
102
|
+
# Analyze a directory
|
|
103
|
+
bitrater analyze /path/to/music/
|
|
104
|
+
|
|
105
|
+
# Verbose output (show warnings)
|
|
106
|
+
bitrater -v analyze /path/to/music/
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Example output:
|
|
110
|
+
```
|
|
111
|
+
[OK] song.mp3: MP3 320kbps (confidence: 95%)
|
|
112
|
+
[TRANSCODE] another.mp3: MP3 128kbps (confidence: 88%)
|
|
113
|
+
[OK] track.flac: LOSSLESS (confidence: 97%)
|
|
114
|
+
[TRANSCODE] fake_lossless.flac: MP3 192kbps (confidence: 91%)
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Beets Plugin
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
# Analyze your library (or a subset via query)
|
|
121
|
+
beet bitrater
|
|
122
|
+
beet bitrater artist:radiohead
|
|
123
|
+
|
|
124
|
+
# Verbose output
|
|
125
|
+
beet bitrater -v
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
The plugin stores results in beets' database as custom fields:
|
|
129
|
+
|
|
130
|
+
| Field | Description |
|
|
131
|
+
|-------|-------------|
|
|
132
|
+
| `original_bitrate` | Estimated true encoding bitrate |
|
|
133
|
+
| `bitrate_confidence` | Confidence score (0.0-1.0) |
|
|
134
|
+
| `is_transcoded` | Whether the file appears to be a transcode |
|
|
135
|
+
| `spectral_quality` | Overall spectral quality score |
|
|
136
|
+
| `format_warnings` | Warning messages from analysis |
|
|
137
|
+
|
|
138
|
+
## Pre-trained Model
|
|
139
|
+
|
|
140
|
+
Bitrater ships with a pre-trained deep learning model that works out of the box. No training is required. See [MODEL_CARD.md](MODEL_CARD.md) for full details on the model architecture, training data, and performance metrics.
|
|
141
|
+
|
|
142
|
+
The bundled model achieves **98.4% accuracy** across all 7 classes on a held-out test set.
|
|
143
|
+
|
|
144
|
+
## Beets Plugin Configuration
|
|
145
|
+
|
|
146
|
+
All options and their defaults:
|
|
147
|
+
|
|
148
|
+
```yaml
|
|
149
|
+
bitrater:
|
|
150
|
+
auto: false # Auto-analyze on import
|
|
151
|
+
min_confidence: 0.8 # Minimum confidence threshold
|
|
152
|
+
warn_transcodes: true # Show transcode warnings
|
|
153
|
+
threads: null # Analysis threads (null = auto)
|
|
154
|
+
on_transcode: ask # Action for transcodes: ask, quarantine, keep, skip
|
|
155
|
+
quarantine_dir: null # Quarantine folder (default: {library}/.quarantine/)
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### Transcode Handling
|
|
159
|
+
|
|
160
|
+
When a counterfeit/transcoded file is detected, `on_transcode` controls the behavior:
|
|
161
|
+
|
|
162
|
+
| Value | Behavior |
|
|
163
|
+
|-------|----------|
|
|
164
|
+
| `ask` | Prompt the user: Keep, Quarantine, or Skip (default) |
|
|
165
|
+
| `quarantine` | Automatically move to quarantine folder |
|
|
166
|
+
| `keep` | Log a warning but take no action |
|
|
167
|
+
| `skip` | Remove from library and delete the file |
|
|
168
|
+
|
|
169
|
+
The quarantine folder defaults to `.quarantine/` inside your beets library directory.
|
|
170
|
+
Set `quarantine_dir` to override with a custom path.
|
|
171
|
+
|
|
172
|
+
## How It Works
|
|
173
|
+
|
|
174
|
+
### Spectral Analysis
|
|
175
|
+
|
|
176
|
+
Audio files are analyzed in the frequency domain. MP3 encoding introduces characteristic artifacts:
|
|
177
|
+
|
|
178
|
+
- **Frequency cutoffs**: lower bitrates have lower high-frequency cutoffs (e.g., 128 kbps cuts off around 16 kHz)
|
|
179
|
+
- **Spectral flatness**: lossy compression reduces spectral detail in high frequencies
|
|
180
|
+
- **SFB21 band**: the highest scale factor band is a strong indicator of encoding quality
|
|
181
|
+
|
|
182
|
+
### Deep Learning Classifier
|
|
183
|
+
|
|
184
|
+
Two-stage CNN + BiLSTM architecture (~1.1M total parameters):
|
|
185
|
+
|
|
186
|
+
- **Stage 1**: CNN feature extractor on dual-band spectrograms (64 mel + 64 linear HF bins, 2-second windows)
|
|
187
|
+
- **Stage 2**: BiLSTM with multi-head attention over sequences of 48 CNN features, plus 211 auxiliary features (spectral + global modulation DCT)
|
|
188
|
+
- Focal loss with class weighting, file-level aggregation across all sequences
|
|
189
|
+
- **98.4% overall accuracy** with all classes above 96% F1
|
|
190
|
+
|
|
191
|
+
See [MODEL_CARD.md](MODEL_CARD.md) for complete details.
|
|
192
|
+
|
|
193
|
+
## Development
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
# Run tests
|
|
197
|
+
uv run python -m pytest tests/
|
|
198
|
+
|
|
199
|
+
# Run tests with coverage
|
|
200
|
+
uv run python -m pytest tests/ --cov=bitrater --cov=beetsplug
|
|
201
|
+
|
|
202
|
+
# Format and lint
|
|
203
|
+
uv run black bitrater/ beetsplug/ tests/
|
|
204
|
+
uv run ruff check --fix bitrater/ beetsplug/ tests/
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## License
|
|
208
|
+
|
|
209
|
+
[MIT](LICENSE)
|
bitrater-0.1.1/README.md
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# bitrater
|
|
2
|
+
|
|
3
|
+
Audio quality analysis and bitrate detection for audio files. Detects the true encoding quality of MP3, FLAC, WAV, AAC, and other formats using spectral analysis and deep learning. Identifies transcodes (e.g., a 128 kbps source re-encoded as 320 kbps MP3 or upsampled to FLAC) and verifies lossless files are genuinely lossless.
|
|
4
|
+
|
|
5
|
+
Available as a **standalone CLI tool** or as a **[beets](https://beets.io/) plugin**.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **7-class bitrate classification**: 128, 192, 256, 320 kbps CBR, V0/V2 VBR presets, and lossless (FLAC/WAV/AIFF)
|
|
10
|
+
- **Lossless verification**: confirms whether lossless files are truly lossless or transcodes from lossy sources
|
|
11
|
+
- **Transcode detection**: identifies files whose stated bitrate doesn't match their true encoding quality
|
|
12
|
+
- **Pre-trained deep learning model**: ships with a CNN+BiLSTM model achieving 98.4% accuracy
|
|
13
|
+
- **Confidence scoring**: every prediction includes a confidence score
|
|
14
|
+
- **Feature caching**: thread-safe NPZ cache avoids redundant spectral analysis
|
|
15
|
+
- **Parallel processing**: multi-threaded analysis via joblib
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
Requires Python 3.10+ and [FFmpeg](https://ffmpeg.org/).
|
|
20
|
+
|
|
21
|
+
### Standalone (no beets)
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install bitrater
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### With beets plugin
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install "bitrater[beets]"
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Then enable the plugin in your beets config (`~/.config/beets/config.yaml`):
|
|
34
|
+
|
|
35
|
+
```yaml
|
|
36
|
+
plugins: bitrater
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### From source (with uv)
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
git clone https://github.com/yamsnjams/bitrater.git
|
|
43
|
+
cd bitrater
|
|
44
|
+
uv sync # standalone
|
|
45
|
+
uv sync --all-extras # with beets + training + dev dependencies
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Quick Start
|
|
49
|
+
|
|
50
|
+
### Standalone CLI
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# Analyze a single file
|
|
54
|
+
bitrater analyze song.mp3
|
|
55
|
+
|
|
56
|
+
# Analyze a directory
|
|
57
|
+
bitrater analyze /path/to/music/
|
|
58
|
+
|
|
59
|
+
# Verbose output (show warnings)
|
|
60
|
+
bitrater -v analyze /path/to/music/
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Example output:
|
|
64
|
+
```
|
|
65
|
+
[OK] song.mp3: MP3 320kbps (confidence: 95%)
|
|
66
|
+
[TRANSCODE] another.mp3: MP3 128kbps (confidence: 88%)
|
|
67
|
+
[OK] track.flac: LOSSLESS (confidence: 97%)
|
|
68
|
+
[TRANSCODE] fake_lossless.flac: MP3 192kbps (confidence: 91%)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Beets Plugin
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# Analyze your library (or a subset via query)
|
|
75
|
+
beet bitrater
|
|
76
|
+
beet bitrater artist:radiohead
|
|
77
|
+
|
|
78
|
+
# Verbose output
|
|
79
|
+
beet bitrater -v
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
The plugin stores results in beets' database as custom fields:
|
|
83
|
+
|
|
84
|
+
| Field | Description |
|
|
85
|
+
|-------|-------------|
|
|
86
|
+
| `original_bitrate` | Estimated true encoding bitrate |
|
|
87
|
+
| `bitrate_confidence` | Confidence score (0.0-1.0) |
|
|
88
|
+
| `is_transcoded` | Whether the file appears to be a transcode |
|
|
89
|
+
| `spectral_quality` | Overall spectral quality score |
|
|
90
|
+
| `format_warnings` | Warning messages from analysis |
|
|
91
|
+
|
|
92
|
+
## Pre-trained Model
|
|
93
|
+
|
|
94
|
+
Bitrater ships with a pre-trained deep learning model that works out of the box. No training is required. See [MODEL_CARD.md](MODEL_CARD.md) for full details on the model architecture, training data, and performance metrics.
|
|
95
|
+
|
|
96
|
+
The bundled model achieves **98.4% accuracy** across all 7 classes on a held-out test set.
|
|
97
|
+
|
|
98
|
+
## Beets Plugin Configuration
|
|
99
|
+
|
|
100
|
+
All options and their defaults:
|
|
101
|
+
|
|
102
|
+
```yaml
|
|
103
|
+
bitrater:
|
|
104
|
+
auto: false # Auto-analyze on import
|
|
105
|
+
min_confidence: 0.8 # Minimum confidence threshold
|
|
106
|
+
warn_transcodes: true # Show transcode warnings
|
|
107
|
+
threads: null # Analysis threads (null = auto)
|
|
108
|
+
on_transcode: ask # Action for transcodes: ask, quarantine, keep, skip
|
|
109
|
+
quarantine_dir: null # Quarantine folder (default: {library}/.quarantine/)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Transcode Handling
|
|
113
|
+
|
|
114
|
+
When a counterfeit/transcoded file is detected, `on_transcode` controls the behavior:
|
|
115
|
+
|
|
116
|
+
| Value | Behavior |
|
|
117
|
+
|-------|----------|
|
|
118
|
+
| `ask` | Prompt the user: Keep, Quarantine, or Skip (default) |
|
|
119
|
+
| `quarantine` | Automatically move to quarantine folder |
|
|
120
|
+
| `keep` | Log a warning but take no action |
|
|
121
|
+
| `skip` | Remove from library and delete the file |
|
|
122
|
+
|
|
123
|
+
The quarantine folder defaults to `.quarantine/` inside your beets library directory.
|
|
124
|
+
Set `quarantine_dir` to override with a custom path.
|
|
125
|
+
|
|
126
|
+
## How It Works
|
|
127
|
+
|
|
128
|
+
### Spectral Analysis
|
|
129
|
+
|
|
130
|
+
Audio files are analyzed in the frequency domain. MP3 encoding introduces characteristic artifacts:
|
|
131
|
+
|
|
132
|
+
- **Frequency cutoffs**: lower bitrates have lower high-frequency cutoffs (e.g., 128 kbps cuts off around 16 kHz)
|
|
133
|
+
- **Spectral flatness**: lossy compression reduces spectral detail in high frequencies
|
|
134
|
+
- **SFB21 band**: the highest scale factor band is a strong indicator of encoding quality
|
|
135
|
+
|
|
136
|
+
### Deep Learning Classifier
|
|
137
|
+
|
|
138
|
+
Two-stage CNN + BiLSTM architecture (~1.1M total parameters):
|
|
139
|
+
|
|
140
|
+
- **Stage 1**: CNN feature extractor on dual-band spectrograms (64 mel + 64 linear HF bins, 2-second windows)
|
|
141
|
+
- **Stage 2**: BiLSTM with multi-head attention over sequences of 48 CNN features, plus 211 auxiliary features (spectral + global modulation DCT)
|
|
142
|
+
- Focal loss with class weighting, file-level aggregation across all sequences
|
|
143
|
+
- **98.4% overall accuracy** with all classes above 96% F1
|
|
144
|
+
|
|
145
|
+
See [MODEL_CARD.md](MODEL_CARD.md) for complete details.
|
|
146
|
+
|
|
147
|
+
## Development
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
# Run tests
|
|
151
|
+
uv run python -m pytest tests/
|
|
152
|
+
|
|
153
|
+
# Run tests with coverage
|
|
154
|
+
uv run python -m pytest tests/ --cov=bitrater --cov=beetsplug
|
|
155
|
+
|
|
156
|
+
# Format and lint
|
|
157
|
+
uv run black bitrater/ beetsplug/ tests/
|
|
158
|
+
uv run ruff check --fix bitrater/ beetsplug/ tests/
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## License
|
|
162
|
+
|
|
163
|
+
[MIT](LICENSE)
|
|
File without changes
|