openbeats 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openbeats-0.1.0/.gitignore +7 -0
- openbeats-0.1.0/PKG-INFO +98 -0
- openbeats-0.1.0/README.md +74 -0
- openbeats-0.1.0/pyproject.toml +48 -0
- openbeats-0.1.0/src/openbeats/__init__.py +0 -0
- openbeats-0.1.0/src/openbeats/beats_encoder.py +2118 -0
- openbeats-0.1.0/src/openbeats/cli.py +54 -0
- openbeats-0.1.0/src/openbeats/model.py +86 -0
- openbeats-0.1.0/src/openbeats/utils.py +162 -0
- openbeats-0.1.0/tests/test_encoder.py +37 -0
- openbeats-0.1.0/tests/test_integration.py +39 -0
- openbeats-0.1.0/tests/test_model_helpers.py +36 -0
- openbeats-0.1.0/uv.lock +1424 -0
openbeats-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: openbeats
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Inference for the OpenBEATs audio encoder (vendored ESPnet BeatsEncoder)
|
|
5
|
+
Project-URL: Homepage, https://shikhar-s.github.io/OpenBEATs/
|
|
6
|
+
Project-URL: Paper, https://arxiv.org/abs/2507.14129
|
|
7
|
+
Author: Shikhar Bharadwaj
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Keywords: audio,beats,bioacoustics,embeddings,espnet,openbeats
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Analysis
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Requires-Dist: huggingface-hub
|
|
15
|
+
Requires-Dist: numpy
|
|
16
|
+
Requires-Dist: packaging
|
|
17
|
+
Requires-Dist: pyyaml
|
|
18
|
+
Requires-Dist: soundfile
|
|
19
|
+
Requires-Dist: torch>=2.0
|
|
20
|
+
Requires-Dist: torchaudio>=2.0
|
|
21
|
+
Provides-Extra: adapter
|
|
22
|
+
Requires-Dist: transformers; extra == 'adapter'
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# OpenBEATs inference
|
|
26
|
+
|
|
27
|
+
Run inference with [OpenBEATs](https://shikhar-s.github.io/OpenBEATs/), a
|
|
28
|
+
general-purpose audio encoder pre-trained on speech, music, environmental sound,
|
|
29
|
+
and bioacoustics ([paper](https://arxiv.org/abs/2507.14129)). Given an audio
|
|
30
|
+
file, it produces patch-level embeddings.
|
|
31
|
+
|
|
32
|
+
## Install
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install openbeats
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
This installs the `openbeats-infer` / `openbeats-download` commands. Dependencies
|
|
39
|
+
are kept lean (torch, torchaudio, numpy, huggingface-hub, pyyaml, soundfile) and
|
|
40
|
+
declared with lower bounds, so an existing torch install is reused rather than
|
|
41
|
+
reinstalled. For a fully isolated CLI that doesn't touch your environment, use
|
|
42
|
+
[`uv tool`](https://docs.astral.sh/uv/) or `pipx`:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
uv tool install openbeats # or: pipx install openbeats
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Usage
|
|
49
|
+
|
|
50
|
+
### CLI — quick prototyping
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
openbeats-infer --checkpoint espnet/OpenBEATS-Large-i1-as20k \
|
|
54
|
+
--audio your_audio.wav --out embeddings.npz
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
`--checkpoint` accepts a Hugging Face repo id (auto-downloaded), a local
|
|
58
|
+
directory, or a checkpoint file. The `.npz` holds `patch_embeddings`
|
|
59
|
+
`(num_patches, 1024)` (plus `logits`/`probs` for classification checkpoints).
|
|
60
|
+
Options: `--device cuda`, `--max-layer N`, `--chunk-seconds 10` (long audio).
|
|
61
|
+
|
|
62
|
+
### Python — from an audio file
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from openbeats.model import OpenBeats
|
|
66
|
+
|
|
67
|
+
model = OpenBeats.from_pretrained("espnet/OpenBEATS-Large-i1-as20k", device="cuda")
|
|
68
|
+
out = model.encode_file("your_audio.wav") # or chunk_seconds=10 for long audio
|
|
69
|
+
print(out["patch_embeddings"].shape) # (num_patches, 1024)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Python — from your own waveform
|
|
73
|
+
|
|
74
|
+
Pass a 1-D 16 kHz waveform in `[-1, 1]` (use `load_audio` for other rates):
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
import numpy as np
|
|
78
|
+
from openbeats.utils import load_audio
|
|
79
|
+
|
|
80
|
+
wav, sr = load_audio("your_audio.wav") # any rate -> mono 16 kHz
|
|
81
|
+
out = model.encode(wav, sr) # or pass your own np.ndarray
|
|
82
|
+
print(out["patch_embeddings"].shape) # (num_patches, 1024)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Checkpoints
|
|
86
|
+
|
|
87
|
+
Browse variants (Base/Large, AudioSet and bioacoustics fine-tunes) in the
|
|
88
|
+
[espnet OpenBEATs collection](https://huggingface.co/collections/espnet/openbeats).
|
|
89
|
+
|
|
90
|
+
## Development
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
uv sync # install with dev deps (pytest)
|
|
94
|
+
uv run pytest # unit tests (no downloads)
|
|
95
|
+
OPENBEATS_INTEGRATION=1 uv run pytest # + end-to-end (downloads from HF)
|
|
96
|
+
uv build # build wheel + sdist into dist/
|
|
97
|
+
uv publish # publish to PyPI
|
|
98
|
+
```
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# OpenBEATs inference
|
|
2
|
+
|
|
3
|
+
Run inference with [OpenBEATs](https://shikhar-s.github.io/OpenBEATs/), a
|
|
4
|
+
general-purpose audio encoder pre-trained on speech, music, environmental sound,
|
|
5
|
+
and bioacoustics ([paper](https://arxiv.org/abs/2507.14129)). Given an audio
|
|
6
|
+
file, it produces patch-level embeddings.
|
|
7
|
+
|
|
8
|
+
## Install
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
pip install openbeats
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
This installs the `openbeats-infer` / `openbeats-download` commands. Dependencies
|
|
15
|
+
are kept lean (torch, torchaudio, numpy, huggingface-hub, pyyaml, soundfile) and
|
|
16
|
+
declared with lower bounds, so an existing torch install is reused rather than
|
|
17
|
+
reinstalled. For a fully isolated CLI that doesn't touch your environment, use
|
|
18
|
+
[`uv tool`](https://docs.astral.sh/uv/) or `pipx`:
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
uv tool install openbeats # or: pipx install openbeats
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Usage
|
|
25
|
+
|
|
26
|
+
### CLI — quick prototyping
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
openbeats-infer --checkpoint espnet/OpenBEATS-Large-i1-as20k \
|
|
30
|
+
--audio your_audio.wav --out embeddings.npz
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
`--checkpoint` accepts a Hugging Face repo id (auto-downloaded), a local
|
|
34
|
+
directory, or a checkpoint file. The `.npz` holds `patch_embeddings`
|
|
35
|
+
`(num_patches, 1024)` (plus `logits`/`probs` for classification checkpoints).
|
|
36
|
+
Options: `--device cuda`, `--max-layer N`, `--chunk-seconds 10` (long audio).
|
|
37
|
+
|
|
38
|
+
### Python — from an audio file
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from openbeats.model import OpenBeats
|
|
42
|
+
|
|
43
|
+
model = OpenBeats.from_pretrained("espnet/OpenBEATS-Large-i1-as20k", device="cuda")
|
|
44
|
+
out = model.encode_file("your_audio.wav") # or chunk_seconds=10 for long audio
|
|
45
|
+
print(out["patch_embeddings"].shape) # (num_patches, 1024)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Python — from your own waveform
|
|
49
|
+
|
|
50
|
+
Pass a 1-D 16 kHz waveform in `[-1, 1]` (use `load_audio` for other rates):
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
import numpy as np
|
|
54
|
+
from openbeats.utils import load_audio
|
|
55
|
+
|
|
56
|
+
wav, sr = load_audio("your_audio.wav") # any rate -> mono 16 kHz
|
|
57
|
+
out = model.encode(wav, sr) # or pass your own np.ndarray
|
|
58
|
+
print(out["patch_embeddings"].shape) # (num_patches, 1024)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Checkpoints
|
|
62
|
+
|
|
63
|
+
Browse variants (Base/Large, AudioSet and bioacoustics fine-tunes) in the
|
|
64
|
+
[espnet OpenBEATs collection](https://huggingface.co/collections/espnet/openbeats).
|
|
65
|
+
|
|
66
|
+
## Development
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
uv sync # install with dev deps (pytest)
|
|
70
|
+
uv run pytest # unit tests (no downloads)
|
|
71
|
+
OPENBEATS_INTEGRATION=1 uv run pytest # + end-to-end (downloads from HF)
|
|
72
|
+
uv build # build wheel + sdist into dist/
|
|
73
|
+
uv publish # publish to PyPI
|
|
74
|
+
```
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "openbeats"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Inference for the OpenBEATs audio encoder (vendored ESPnet BeatsEncoder)"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
authors = [{ name = "Shikhar Bharadwaj" }]
|
|
9
|
+
keywords = ["audio", "beats", "openbeats", "embeddings", "bioacoustics", "espnet"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Programming Language :: Python :: 3",
|
|
12
|
+
"Topic :: Multimedia :: Sound/Audio :: Analysis",
|
|
13
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
14
|
+
]
|
|
15
|
+
# Lower bounds only: an existing torch/torchaudio install satisfies these, so pip
|
|
16
|
+
# won't reinstall and clobber a user's (e.g. CUDA-matched) build.
|
|
17
|
+
dependencies = [
|
|
18
|
+
"torch>=2.0",
|
|
19
|
+
"torchaudio>=2.0",
|
|
20
|
+
"numpy",
|
|
21
|
+
"packaging",
|
|
22
|
+
"pyyaml",
|
|
23
|
+
"huggingface-hub",
|
|
24
|
+
"soundfile",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.optional-dependencies]
|
|
28
|
+
# Only for checkpoints using the wav2vec2 conformer adapter / learned positional
|
|
29
|
+
# embeddings (not used by the standard OpenBEATs encoders).
|
|
30
|
+
adapter = ["transformers"]
|
|
31
|
+
|
|
32
|
+
[project.scripts]
|
|
33
|
+
openbeats-infer = "openbeats.cli:infer_main"
|
|
34
|
+
openbeats-download = "openbeats.cli:download_main"
|
|
35
|
+
|
|
36
|
+
[project.urls]
|
|
37
|
+
Homepage = "https://shikhar-s.github.io/OpenBEATs/"
|
|
38
|
+
Paper = "https://arxiv.org/abs/2507.14129"
|
|
39
|
+
|
|
40
|
+
[dependency-groups]
|
|
41
|
+
dev = ["pytest"]
|
|
42
|
+
|
|
43
|
+
[build-system]
|
|
44
|
+
requires = ["hatchling"]
|
|
45
|
+
build-backend = "hatchling.build"
|
|
46
|
+
|
|
47
|
+
[tool.hatch.build.targets.wheel]
|
|
48
|
+
packages = ["src/openbeats"]
|
|
File without changes
|