orbit-dsp 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orbit_dsp-1.0.0/PKG-INFO +18 -0
- orbit_dsp-1.0.0/README.md +3 -0
- orbit_dsp-1.0.0/orbit_dsp/__init__.py +0 -0
- orbit_dsp-1.0.0/orbit_dsp/audio_dsp.py +320 -0
- orbit_dsp-1.0.0/orbit_dsp.egg-info/PKG-INFO +18 -0
- orbit_dsp-1.0.0/orbit_dsp.egg-info/SOURCES.txt +10 -0
- orbit_dsp-1.0.0/orbit_dsp.egg-info/dependency_links.txt +1 -0
- orbit_dsp-1.0.0/orbit_dsp.egg-info/entry_points.txt +2 -0
- orbit_dsp-1.0.0/orbit_dsp.egg-info/requires.txt +3 -0
- orbit_dsp-1.0.0/orbit_dsp.egg-info/top_level.txt +1 -0
- orbit_dsp-1.0.0/pyproject.toml +28 -0
- orbit_dsp-1.0.0/setup.cfg +4 -0
orbit_dsp-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: orbit-dsp
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Ultra-fast, CPU-only classical audio feature extraction (BPM, Key, Loudness, Energy, Duration)
|
|
5
|
+
License: ISC
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: License :: OSI Approved :: ISC License (ISCL)
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Analysis
|
|
10
|
+
Requires-Python: >=3.8
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: librosa>=0.10.0
|
|
13
|
+
Requires-Dist: numpy>=1.20.0
|
|
14
|
+
Requires-Dist: scipy>=1.8.0
|
|
15
|
+
|
|
16
|
+
# orbit_dsp
|
|
17
|
+
|
|
18
|
+
Standalone Python package for ORBIT DSP analysis.
|
|
File without changes
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
ORBIT Classical DSP Analysis Script
|
|
4
|
+
|
|
5
|
+
Extracts fast, traditional musical features:
|
|
6
|
+
- BPM (tempo) with confidence score
|
|
7
|
+
- Musical key with confidence score (Krumhansl-Schmuckler algorithm)
|
|
8
|
+
- Energy level (RMS-based)
|
|
9
|
+
- Loudness (dB)
|
|
10
|
+
- Dynamic range (RMS percentile spread)
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
python scripts/audio_dsp.py <audio_path> [--output json] [--max-length 120]
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import sys
|
|
17
|
+
import os
|
|
18
|
+
import json
|
|
19
|
+
import argparse
|
|
20
|
+
import warnings
|
|
21
|
+
|
|
22
|
+
# Suppress warnings for cleaner JSON output
|
|
23
|
+
warnings.filterwarnings('ignore')
|
|
24
|
+
|
|
25
|
+
# Krumhansl-Schmuckler key profiles
|
|
26
|
+
MAJOR_PROFILE = [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88]
|
|
27
|
+
MINOR_PROFILE = [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]
|
|
28
|
+
PITCH_CLASSES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def check_dependencies():
|
|
32
|
+
"""Check if required packages are installed (lazy imports)."""
|
|
33
|
+
missing = []
|
|
34
|
+
try:
|
|
35
|
+
import librosa
|
|
36
|
+
except ImportError:
|
|
37
|
+
missing.append('librosa')
|
|
38
|
+
try:
|
|
39
|
+
import numpy
|
|
40
|
+
except ImportError:
|
|
41
|
+
missing.append('numpy')
|
|
42
|
+
try:
|
|
43
|
+
import scipy.stats
|
|
44
|
+
except ImportError:
|
|
45
|
+
missing.append('scipy')
|
|
46
|
+
|
|
47
|
+
if missing:
|
|
48
|
+
print(json.dumps({
|
|
49
|
+
'error': 'missing_dependencies',
|
|
50
|
+
'message': f'Missing Python packages: {", ".join(missing)}',
|
|
51
|
+
'install': f'pip install {" ".join(missing)}'
|
|
52
|
+
}))
|
|
53
|
+
sys.exit(1)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def correlate_with_profile(chroma, profile):
|
|
57
|
+
"""Calculate Pearson correlation between chroma vector and key profile."""
|
|
58
|
+
import numpy as np
|
|
59
|
+
|
|
60
|
+
# Normalize both vectors
|
|
61
|
+
chroma_norm = chroma - np.mean(chroma)
|
|
62
|
+
profile_norm = np.array(profile) - np.mean(profile)
|
|
63
|
+
|
|
64
|
+
# Pearson correlation
|
|
65
|
+
numerator = np.sum(chroma_norm * profile_norm)
|
|
66
|
+
denominator = np.sqrt(np.sum(chroma_norm ** 2) * np.sum(profile_norm ** 2))
|
|
67
|
+
|
|
68
|
+
if denominator == 0:
|
|
69
|
+
return 0
|
|
70
|
+
|
|
71
|
+
return numerator / denominator
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _detect_key_from_chroma(chroma_avg):
|
|
75
|
+
"""Detect key from a normalized 12-bin chroma distribution."""
|
|
76
|
+
import numpy as np
|
|
77
|
+
|
|
78
|
+
# Normalize
|
|
79
|
+
chroma_avg = chroma_avg / np.max(chroma_avg) if np.max(chroma_avg) > 0 else chroma_avg
|
|
80
|
+
|
|
81
|
+
best_key = None
|
|
82
|
+
best_mode = None
|
|
83
|
+
best_correlation = -1
|
|
84
|
+
|
|
85
|
+
# Try all 12 keys for both major and minor
|
|
86
|
+
for i in range(12):
|
|
87
|
+
# Rotate chroma to align with key
|
|
88
|
+
rotated_chroma = np.roll(chroma_avg, -i)
|
|
89
|
+
|
|
90
|
+
# Correlate with major profile
|
|
91
|
+
major_corr = correlate_with_profile(rotated_chroma, MAJOR_PROFILE)
|
|
92
|
+
if major_corr > best_correlation:
|
|
93
|
+
best_correlation = major_corr
|
|
94
|
+
best_key = PITCH_CLASSES[i]
|
|
95
|
+
best_mode = 'major'
|
|
96
|
+
|
|
97
|
+
# Correlate with minor profile
|
|
98
|
+
minor_corr = correlate_with_profile(rotated_chroma, MINOR_PROFILE)
|
|
99
|
+
if minor_corr > best_correlation:
|
|
100
|
+
best_correlation = minor_corr
|
|
101
|
+
best_key = PITCH_CLASSES[i]
|
|
102
|
+
best_mode = 'minor'
|
|
103
|
+
|
|
104
|
+
# Convert correlation to confidence (0-1 range)
|
|
105
|
+
confidence = max(0, min(1, (best_correlation + 1) / 2))
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
'value': f'{best_key} {best_mode}',
|
|
109
|
+
'key': best_key,
|
|
110
|
+
'mode': best_mode,
|
|
111
|
+
'confidence': round(confidence, 4)
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def detect_key(y, sr, harmonic_only=False):
|
|
116
|
+
"""Detect musical key using Krumhansl-Schmuckler algorithm."""
|
|
117
|
+
import librosa
|
|
118
|
+
import numpy as np
|
|
119
|
+
|
|
120
|
+
if harmonic_only:
|
|
121
|
+
y, _ = librosa.effects.hpss(y)
|
|
122
|
+
|
|
123
|
+
# Compute chroma features using CQT
|
|
124
|
+
chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
|
|
125
|
+
|
|
126
|
+
# Average across time
|
|
127
|
+
chroma_avg = np.mean(chroma, axis=1)
|
|
128
|
+
return _detect_key_from_chroma(chroma_avg)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def detect_key_from_stems(other_stem_path, bass_stem_path=None, max_length_seconds=120):
|
|
132
|
+
"""Detect key from Demucs stems with harmonic emphasis."""
|
|
133
|
+
import librosa
|
|
134
|
+
import numpy as np
|
|
135
|
+
|
|
136
|
+
if not other_stem_path or not os.path.exists(other_stem_path):
|
|
137
|
+
raise FileNotFoundError(f'Other stem not found: {other_stem_path}')
|
|
138
|
+
|
|
139
|
+
target_sr = 22050
|
|
140
|
+
other_y, sr = librosa.load(other_stem_path, sr=target_sr, mono=True)
|
|
141
|
+
max_samples = int(max_length_seconds * sr)
|
|
142
|
+
if len(other_y) > max_samples:
|
|
143
|
+
other_y = other_y[:max_samples]
|
|
144
|
+
|
|
145
|
+
other_harm, _ = librosa.effects.hpss(other_y)
|
|
146
|
+
mix = other_harm.astype(np.float32, copy=False)
|
|
147
|
+
|
|
148
|
+
if bass_stem_path and os.path.exists(bass_stem_path):
|
|
149
|
+
bass_y, bass_sr = librosa.load(bass_stem_path, sr=target_sr, mono=True)
|
|
150
|
+
if bass_sr != sr:
|
|
151
|
+
bass_y = librosa.resample(bass_y, orig_sr=bass_sr, target_sr=sr)
|
|
152
|
+
if len(bass_y) > max_samples:
|
|
153
|
+
bass_y = bass_y[:max_samples]
|
|
154
|
+
bass_harm, _ = librosa.effects.hpss(bass_y)
|
|
155
|
+
|
|
156
|
+
target_len = min(len(mix), len(bass_harm))
|
|
157
|
+
if target_len > 0:
|
|
158
|
+
mix = mix[:target_len] + (0.35 * bass_harm[:target_len])
|
|
159
|
+
|
|
160
|
+
return detect_key(mix, sr, harmonic_only=False)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def detect_bpm(y, sr):
|
|
164
|
+
"""Detect tempo (BPM) using librosa's beat tracker."""
|
|
165
|
+
import librosa
|
|
166
|
+
import numpy as np
|
|
167
|
+
|
|
168
|
+
# Use librosa's beat tracker
|
|
169
|
+
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
|
|
170
|
+
|
|
171
|
+
if hasattr(tempo, '__len__'):
|
|
172
|
+
tempo = float(tempo[0]) if len(tempo) > 0 else 0.0
|
|
173
|
+
else:
|
|
174
|
+
tempo = float(tempo)
|
|
175
|
+
|
|
176
|
+
# Calculate confidence using onset strength autocorrelation tempogram
|
|
177
|
+
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
|
|
178
|
+
tempogram = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr)
|
|
179
|
+
|
|
180
|
+
tempo_bins = librosa.tempo_frequencies(tempogram.shape[0], sr=sr)
|
|
181
|
+
tempo_idx = np.argmin(np.abs(tempo_bins - tempo))
|
|
182
|
+
|
|
183
|
+
if tempogram.shape[1] > 0:
|
|
184
|
+
tempo_strength = np.mean(tempogram[tempo_idx, :])
|
|
185
|
+
max_strength = np.max(np.mean(tempogram, axis=1))
|
|
186
|
+
confidence = tempo_strength / max_strength if max_strength > 0 else 0
|
|
187
|
+
else:
|
|
188
|
+
confidence = 0.5
|
|
189
|
+
|
|
190
|
+
return {
|
|
191
|
+
'value': round(tempo, 1),
|
|
192
|
+
'confidence': round(float(confidence), 4)
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def calculate_energy(y):
|
|
197
|
+
"""Calculate RMS-based energy level."""
|
|
198
|
+
import librosa
|
|
199
|
+
import numpy as np
|
|
200
|
+
|
|
201
|
+
rms = librosa.feature.rms(y=y)[0]
|
|
202
|
+
mean_rms = np.mean(rms)
|
|
203
|
+
|
|
204
|
+
# Normalize with a sigmoid-like scaling
|
|
205
|
+
energy = min(1.0, mean_rms / 0.15)
|
|
206
|
+
return round(float(energy), 4)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def calculate_loudness(y, sr):
|
|
210
|
+
"""Calculate loudness approximation in dB."""
|
|
211
|
+
import numpy as np
|
|
212
|
+
|
|
213
|
+
rms = np.sqrt(np.mean(y ** 2))
|
|
214
|
+
if rms > 0:
|
|
215
|
+
db = 20 * np.log10(rms)
|
|
216
|
+
else:
|
|
217
|
+
db = -60.0
|
|
218
|
+
return round(float(db), 2)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def calculate_dynamic_range(y):
|
|
222
|
+
"""Estimate dynamic range using frame RMS percentiles."""
|
|
223
|
+
import librosa
|
|
224
|
+
import numpy as np
|
|
225
|
+
|
|
226
|
+
rms = librosa.feature.rms(y=y)[0]
|
|
227
|
+
if len(rms) == 0:
|
|
228
|
+
return 0.0
|
|
229
|
+
|
|
230
|
+
high = np.percentile(rms, 95)
|
|
231
|
+
low = np.percentile(rms, 10)
|
|
232
|
+
high = max(high, 1e-10)
|
|
233
|
+
low = max(low, 1e-10)
|
|
234
|
+
dr = 20 * np.log10(high / low)
|
|
235
|
+
return round(float(max(0.0, dr)), 3)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def main():
|
|
239
|
+
parser = argparse.ArgumentParser(description='Analyze audio for classical DSP features')
|
|
240
|
+
parser.add_argument('audio_path', help='Path to audio file')
|
|
241
|
+
parser.add_argument('--output', choices=['json'], default='json',
|
|
242
|
+
help='Output format (default: json)')
|
|
243
|
+
parser.add_argument('--max-length', type=int, default=120,
|
|
244
|
+
help='Max audio length to analyze in seconds (default: 120)')
|
|
245
|
+
parser.add_argument('--stems-dir',
|
|
246
|
+
help='Directory containing Demucs stems for improved key detection')
|
|
247
|
+
|
|
248
|
+
args = parser.parse_args()
|
|
249
|
+
|
|
250
|
+
check_dependencies()
|
|
251
|
+
|
|
252
|
+
if not os.path.exists(args.audio_path):
|
|
253
|
+
print(json.dumps({'error': 'file_not_found', 'message': f'File not found: {args.audio_path}'}))
|
|
254
|
+
sys.exit(1)
|
|
255
|
+
|
|
256
|
+
try:
|
|
257
|
+
import librosa
|
|
258
|
+
import numpy as np
|
|
259
|
+
|
|
260
|
+
target_sr = 22050
|
|
261
|
+
y, sr = librosa.load(args.audio_path, sr=target_sr, mono=True)
|
|
262
|
+
duration = len(y) / sr
|
|
263
|
+
|
|
264
|
+
max_samples = int(args.max_length * sr)
|
|
265
|
+
if len(y) > max_samples:
|
|
266
|
+
y = y[:max_samples]
|
|
267
|
+
|
|
268
|
+
bpm_result = detect_bpm(y, sr)
|
|
269
|
+
|
|
270
|
+
key_result = None
|
|
271
|
+
key_detection_source = 'mix_hpss'
|
|
272
|
+
if args.stems_dir:
|
|
273
|
+
other_stem = os.path.join(args.stems_dir, 'other.wav')
|
|
274
|
+
bass_stem = os.path.join(args.stems_dir, 'bass.wav')
|
|
275
|
+
if os.path.exists(other_stem):
|
|
276
|
+
key_result = detect_key_from_stems(
|
|
277
|
+
other_stem,
|
|
278
|
+
bass_stem_path=bass_stem if os.path.exists(bass_stem) else None,
|
|
279
|
+
max_length_seconds=args.max_length
|
|
280
|
+
)
|
|
281
|
+
key_detection_source = 'demucs_stems'
|
|
282
|
+
|
|
283
|
+
if key_result is None:
|
|
284
|
+
key_result = detect_key(y, sr, harmonic_only=True)
|
|
285
|
+
|
|
286
|
+
energy = calculate_energy(y)
|
|
287
|
+
loudness_db = calculate_loudness(y, sr)
|
|
288
|
+
dynamic_range_db = calculate_dynamic_range(y)
|
|
289
|
+
|
|
290
|
+
result = {
|
|
291
|
+
'bpm': bpm_result,
|
|
292
|
+
'key': key_result,
|
|
293
|
+
'energy': energy,
|
|
294
|
+
'loudness_db': loudness_db,
|
|
295
|
+
'dynamic_range_db': dynamic_range_db,
|
|
296
|
+
'duration': round(duration, 2),
|
|
297
|
+
'sample_rate': sr,
|
|
298
|
+
'analyzed_length': round(min(duration, args.max_length), 2),
|
|
299
|
+
'key_detection_source': key_detection_source,
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
class NumpyEncoder(json.JSONEncoder):
|
|
303
|
+
def default(self, obj):
|
|
304
|
+
if isinstance(obj, (np.bool_, np.generic)):
|
|
305
|
+
return obj.item()
|
|
306
|
+
return super().default(obj)
|
|
307
|
+
|
|
308
|
+
print(json.dumps(result, cls=NumpyEncoder))
|
|
309
|
+
|
|
310
|
+
except Exception as e:
|
|
311
|
+
print(json.dumps({
|
|
312
|
+
'error': 'processing_error',
|
|
313
|
+
'message': str(e),
|
|
314
|
+
'type': type(e).__name__
|
|
315
|
+
}))
|
|
316
|
+
sys.exit(1)
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
if __name__ == '__main__':
|
|
320
|
+
main()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: orbit-dsp
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Ultra-fast, CPU-only classical audio feature extraction (BPM, Key, Loudness, Energy, Duration)
|
|
5
|
+
License: ISC
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: License :: OSI Approved :: ISC License (ISCL)
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Analysis
|
|
10
|
+
Requires-Python: >=3.8
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: librosa>=0.10.0
|
|
13
|
+
Requires-Dist: numpy>=1.20.0
|
|
14
|
+
Requires-Dist: scipy>=1.8.0
|
|
15
|
+
|
|
16
|
+
# orbit_dsp
|
|
17
|
+
|
|
18
|
+
Standalone Python package for ORBIT DSP analysis.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
orbit_dsp/__init__.py
|
|
4
|
+
orbit_dsp/audio_dsp.py
|
|
5
|
+
orbit_dsp.egg-info/PKG-INFO
|
|
6
|
+
orbit_dsp.egg-info/SOURCES.txt
|
|
7
|
+
orbit_dsp.egg-info/dependency_links.txt
|
|
8
|
+
orbit_dsp.egg-info/entry_points.txt
|
|
9
|
+
orbit_dsp.egg-info/requires.txt
|
|
10
|
+
orbit_dsp.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
orbit_dsp
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "orbit-dsp"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "Ultra-fast, CPU-only classical audio feature extraction (BPM, Key, Loudness, Energy, Duration)"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = { text = "ISC" }
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Programming Language :: Python :: 3",
|
|
14
|
+
"License :: OSI Approved :: ISC License (ISCL)",
|
|
15
|
+
"Operating System :: OS Independent",
|
|
16
|
+
"Topic :: Multimedia :: Sound/Audio :: Analysis"
|
|
17
|
+
]
|
|
18
|
+
dependencies = [
|
|
19
|
+
"librosa>=0.10.0",
|
|
20
|
+
"numpy>=1.20.0",
|
|
21
|
+
"scipy>=1.8.0"
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[tool.setuptools]
|
|
25
|
+
packages = ["orbit_dsp"]
|
|
26
|
+
|
|
27
|
+
[project.scripts]
|
|
28
|
+
orbit-dsp = "orbit_dsp.audio_dsp:main"
|