openbeats 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.pyc
4
+ checkpoints/
5
+ *.npz
6
+ dist/
7
+ .pytest_cache/
@@ -0,0 +1,98 @@
1
+ Metadata-Version: 2.4
2
+ Name: openbeats
3
+ Version: 0.1.0
4
+ Summary: Inference for the OpenBEATs audio encoder (vendored ESPnet BeatsEncoder)
5
+ Project-URL: Homepage, https://shikhar-s.github.io/OpenBEATs/
6
+ Project-URL: Paper, https://arxiv.org/abs/2507.14129
7
+ Author: Shikhar Bharadwaj
8
+ License-Expression: MIT
9
+ Keywords: audio,beats,bioacoustics,embeddings,espnet,openbeats
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Analysis
12
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
13
+ Requires-Python: >=3.10
14
+ Requires-Dist: huggingface-hub
15
+ Requires-Dist: numpy
16
+ Requires-Dist: packaging
17
+ Requires-Dist: pyyaml
18
+ Requires-Dist: soundfile
19
+ Requires-Dist: torch>=2.0
20
+ Requires-Dist: torchaudio>=2.0
21
+ Provides-Extra: adapter
22
+ Requires-Dist: transformers; extra == 'adapter'
23
+ Description-Content-Type: text/markdown
24
+
25
+ # OpenBEATs inference
26
+
27
+ Run inference with [OpenBEATs](https://shikhar-s.github.io/OpenBEATs/), a
28
+ general-purpose audio encoder pre-trained on speech, music, environmental sound,
29
+ and bioacoustics ([paper](https://arxiv.org/abs/2507.14129)). Given an audio
30
+ file, it produces patch-level embeddings.
31
+
32
+ ## Install
33
+
34
+ ```bash
35
+ pip install openbeats
36
+ ```
37
+
38
+ This installs the `openbeats-infer` / `openbeats-download` commands. Dependencies
39
+ are kept lean (torch, torchaudio, numpy, huggingface-hub, pyyaml, soundfile) and
40
+ declared with lower bounds, so an existing torch install is reused rather than
41
+ reinstalled. For a fully isolated CLI that doesn't touch your environment, use
42
+ [`uv tool`](https://docs.astral.sh/uv/) or `pipx`:
43
+
44
+ ```bash
45
+ uv tool install openbeats # or: pipx install openbeats
46
+ ```
47
+
48
+ ## Usage
49
+
50
+ ### CLI — quick prototyping
51
+
52
+ ```bash
53
+ openbeats-infer --checkpoint espnet/OpenBEATS-Large-i1-as20k \
54
+ --audio your_audio.wav --out embeddings.npz
55
+ ```
56
+
57
+ `--checkpoint` accepts a Hugging Face repo id (auto-downloaded), a local
58
+ directory, or a checkpoint file. The `.npz` holds `patch_embeddings`
59
+ `(num_patches, 1024)` (plus `logits`/`probs` for classification checkpoints).
60
+ Options: `--device cuda`, `--max-layer N`, `--chunk-seconds 10` (long audio).
61
+
62
+ ### Python — from an audio file
63
+
64
+ ```python
65
+ from openbeats.model import OpenBeats
66
+
67
+ model = OpenBeats.from_pretrained("espnet/OpenBEATS-Large-i1-as20k", device="cuda")
68
+ out = model.encode_file("your_audio.wav") # or chunk_seconds=10 for long audio
69
+ print(out["patch_embeddings"].shape) # (num_patches, 1024)
70
+ ```
71
+
72
+ ### Python — from your own waveform
73
+
74
+ Pass a 1-D 16 kHz waveform in `[-1, 1]` (use `load_audio` for other rates):
75
+
76
+ ```python
77
+ import numpy as np
78
+ from openbeats.utils import load_audio
79
+
80
+ wav, sr = load_audio("your_audio.wav") # any rate -> mono 16 kHz
81
+ out = model.encode(wav, sr) # or pass your own np.ndarray
82
+ print(out["patch_embeddings"].shape) # (num_patches, 1024)
83
+ ```
84
+
85
+ ## Checkpoints
86
+
87
+ Browse variants (Base/Large, AudioSet and bioacoustics fine-tunes) in the
88
+ [espnet OpenBEATs collection](https://huggingface.co/collections/espnet/openbeats).
89
+
90
+ ## Development
91
+
92
+ ```bash
93
+ uv sync # install with dev deps (pytest)
94
+ uv run pytest # unit tests (no downloads)
95
+ OPENBEATS_INTEGRATION=1 uv run pytest # + end-to-end (downloads from HF)
96
+ uv build # build wheel + sdist into dist/
97
+ uv publish # publish to PyPI
98
+ ```
@@ -0,0 +1,74 @@
1
+ # OpenBEATs inference
2
+
3
+ Run inference with [OpenBEATs](https://shikhar-s.github.io/OpenBEATs/), a
4
+ general-purpose audio encoder pre-trained on speech, music, environmental sound,
5
+ and bioacoustics ([paper](https://arxiv.org/abs/2507.14129)). Given an audio
6
+ file, it produces patch-level embeddings.
7
+
8
+ ## Install
9
+
10
+ ```bash
11
+ pip install openbeats
12
+ ```
13
+
14
+ This installs the `openbeats-infer` / `openbeats-download` commands. Dependencies
15
+ are kept lean (torch, torchaudio, numpy, huggingface-hub, pyyaml, soundfile) and
16
+ declared with lower bounds, so an existing torch install is reused rather than
17
+ reinstalled. For a fully isolated CLI that doesn't touch your environment, use
18
+ [`uv tool`](https://docs.astral.sh/uv/) or `pipx`:
19
+
20
+ ```bash
21
+ uv tool install openbeats # or: pipx install openbeats
22
+ ```
23
+
24
+ ## Usage
25
+
26
+ ### CLI — quick prototyping
27
+
28
+ ```bash
29
+ openbeats-infer --checkpoint espnet/OpenBEATS-Large-i1-as20k \
30
+ --audio your_audio.wav --out embeddings.npz
31
+ ```
32
+
33
+ `--checkpoint` accepts a Hugging Face repo id (auto-downloaded), a local
34
+ directory, or a checkpoint file. The `.npz` holds `patch_embeddings`
35
+ `(num_patches, 1024)` (plus `logits`/`probs` for classification checkpoints).
36
+ Options: `--device cuda`, `--max-layer N`, `--chunk-seconds 10` (long audio).
37
+
38
+ ### Python — from an audio file
39
+
40
+ ```python
41
+ from openbeats.model import OpenBeats
42
+
43
+ model = OpenBeats.from_pretrained("espnet/OpenBEATS-Large-i1-as20k", device="cuda")
44
+ out = model.encode_file("your_audio.wav") # or chunk_seconds=10 for long audio
45
+ print(out["patch_embeddings"].shape) # (num_patches, 1024)
46
+ ```
47
+
48
+ ### Python — from your own waveform
49
+
50
+ Pass a 1-D 16 kHz waveform in `[-1, 1]` (use `load_audio` for other rates):
51
+
52
+ ```python
53
+ import numpy as np
54
+ from openbeats.utils import load_audio
55
+
56
+ wav, sr = load_audio("your_audio.wav") # any rate -> mono 16 kHz
57
+ out = model.encode(wav, sr) # or pass your own np.ndarray
58
+ print(out["patch_embeddings"].shape) # (num_patches, 1024)
59
+ ```
60
+
61
+ ## Checkpoints
62
+
63
+ Browse variants (Base/Large, AudioSet and bioacoustics fine-tunes) in the
64
+ [espnet OpenBEATs collection](https://huggingface.co/collections/espnet/openbeats).
65
+
66
+ ## Development
67
+
68
+ ```bash
69
+ uv sync # install with dev deps (pytest)
70
+ uv run pytest # unit tests (no downloads)
71
+ OPENBEATS_INTEGRATION=1 uv run pytest # + end-to-end (downloads from HF)
72
+ uv build # build wheel + sdist into dist/
73
+ uv publish # publish to PyPI
74
+ ```
@@ -0,0 +1,48 @@
1
+ [project]
2
+ name = "openbeats"
3
+ version = "0.1.0"
4
+ description = "Inference for the OpenBEATs audio encoder (vendored ESPnet BeatsEncoder)"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = "MIT"
8
+ authors = [{ name = "Shikhar Bharadwaj" }]
9
+ keywords = ["audio", "beats", "openbeats", "embeddings", "bioacoustics", "espnet"]
10
+ classifiers = [
11
+ "Programming Language :: Python :: 3",
12
+ "Topic :: Multimedia :: Sound/Audio :: Analysis",
13
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
14
+ ]
15
+ # Lower bounds only: an existing torch/torchaudio install satisfies these, so pip
16
+ # won't reinstall and clobber a user's (e.g. CUDA-matched) build.
17
+ dependencies = [
18
+ "torch>=2.0",
19
+ "torchaudio>=2.0",
20
+ "numpy",
21
+ "packaging",
22
+ "pyyaml",
23
+ "huggingface-hub",
24
+ "soundfile",
25
+ ]
26
+
27
+ [project.optional-dependencies]
28
+ # Only for checkpoints using the wav2vec2 conformer adapter / learned positional
29
+ # embeddings (not used by the standard OpenBEATs encoders).
30
+ adapter = ["transformers"]
31
+
32
+ [project.scripts]
33
+ openbeats-infer = "openbeats.cli:infer_main"
34
+ openbeats-download = "openbeats.cli:download_main"
35
+
36
+ [project.urls]
37
+ Homepage = "https://shikhar-s.github.io/OpenBEATs/"
38
+ Paper = "https://arxiv.org/abs/2507.14129"
39
+
40
+ [dependency-groups]
41
+ dev = ["pytest"]
42
+
43
+ [build-system]
44
+ requires = ["hatchling"]
45
+ build-backend = "hatchling.build"
46
+
47
+ [tool.hatch.build.targets.wheel]
48
+ packages = ["src/openbeats"]
File without changes