muze 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -1
- data/README.md +5 -0
- data/Rakefile +3 -0
- data/ext/muze/muze_ext.c +129 -12
- data/lib/muze/beat/beat_track.rb +93 -11
- data/lib/muze/core/audio.rb +129 -0
- data/lib/muze/core/cache.rb +38 -0
- data/lib/muze/core/dct.rb +24 -21
- data/lib/muze/core/frames.rb +31 -0
- data/lib/muze/core/matrix.rb +23 -0
- data/lib/muze/core/resample.rb +111 -19
- data/lib/muze/core/stft.rb +312 -52
- data/lib/muze/core/windows.rb +113 -17
- data/lib/muze/display/specshow.rb +307 -41
- data/lib/muze/effects/harmonic_percussive.rb +83 -18
- data/lib/muze/effects/streaming.rb +101 -0
- data/lib/muze/effects/time_stretch.rb +353 -36
- data/lib/muze/feature/aggregation.rb +49 -0
- data/lib/muze/feature/chroma.rb +43 -15
- data/lib/muze/feature/context.rb +81 -0
- data/lib/muze/feature/mfcc.rb +78 -38
- data/lib/muze/feature/spectral.rb +258 -39
- data/lib/muze/filters/chroma_filter.rb +21 -2
- data/lib/muze/filters/mel.rb +47 -1
- data/lib/muze/io/audio_loader/ffmpeg_backend.rb +179 -15
- data/lib/muze/io/audio_loader/wavify_backend.rb +118 -11
- data/lib/muze/io/audio_loader.rb +178 -48
- data/lib/muze/io/audio_writer.rb +48 -0
- data/lib/muze/native.rb +91 -8
- data/lib/muze/onset/onset_detect.rb +114 -23
- data/lib/muze/version.rb +1 -1
- data/lib/muze.rb +237 -60
- metadata +11 -21
- data/benchmarks/baseline.json +0 -24
- data/benchmarks/native_vs_ruby.rb +0 -23
- data/benchmarks/quality_metrics.rb +0 -265
- data/benchmarks/quality_thresholds.md +0 -28
- data/benchmarks/support/fixture_library.rb +0 -107
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 01b14e94bc84cd5ca2437c37404f0344a084af15f185465687776997c135442a
|
|
4
|
+
data.tar.gz: '09d93556256984b96e283bcd3be6bd5c6d404130056f6822b9bf42757a43c394'
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 451d64a3392a7baa148808ccee159a03c2d20234b0bc39db5bdf92535f399467fed6eb8360498148a925c652f0d43afb6e05cc070aa7548a2e71373490e727cb
|
|
7
|
+
data.tar.gz: 2743e731b94df7d830f4deb497099d211828ad41d85436a5afcb4e25864bec32865040db402763fa10a413004c574a5c5b7a942cc7f54091bb5c0dd6ce8ced4b
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,23 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project are documented in this file.
|
|
4
4
|
|
|
5
|
+
## [1.0.0] - 2026-05-25
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- Added broader audio I/O: WAV writing, source-rate preserving loads, `Pathname` and WAV `IO` inputs, stereo output, configurable downmixing, file metadata via `Muze.info`, and chunked reads via `Muze.load_stream`.
|
|
10
|
+
- Added streamed and long-file-friendly DSP APIs including `Muze.stft_stream`, `Muze.time_stretch_stream`, `Muze.pitch_shift_stream`, and `Muze.hpss_stream`.
|
|
11
|
+
- Added more STFT and conversion controls: explicit pad modes, trailing frame padding, periodic windows, custom windows, non-power-of-two even FFT lengths, frame/time/sample helpers, FFT frequencies, and configurable dB conversion.
|
|
12
|
+
- Added richer feature extraction: Slaney mel filters, mel frequencies, chroma tuning and octave weighting, MFCC liftering and input modes, delta edge modes, additional spectral descriptors, polynomial spectral features, tonnetz, beat-synchronous aggregation, and shared feature extraction via `Muze.feature_context` / `Muze.feature_stack`.
|
|
13
|
+
- Added rhythm controls for onset strength, onset detection, tempo estimation, normalized tempograms, fixed-BPM beat tracking, tempo frequency helpers, and beat metadata output.
|
|
14
|
+
- Added effects improvements: multi-channel processing, HPSS masks and margins, WSOLA/OLA time stretching, phase locking, pitch-shift controls, frame-energy trimming with interval units, and preemphasis/deemphasis.
|
|
15
|
+
- Added lightweight visualization upgrades: `specshow` axes, dimensions, color maps, value bounds, fragment output, image rendering, waveform envelope rendering, stereo waveform layouts, and onset envelope rendering.
|
|
16
|
+
|
|
17
|
+
### Changed
|
|
18
|
+
|
|
19
|
+
- The optional native extension is now built during gem installation when supported, while retaining the pure Ruby fallback.
|
|
20
|
+
- Audio loading now applies FFmpeg seek/duration controls before decoding and uses safer FFmpeg process handling.
|
|
21
|
+
|
|
5
22
|
## [0.1.0] - 2026-03-07
|
|
6
23
|
|
|
7
|
-
- Initial release.
|
|
24
|
+
- Initial release.
|
data/README.md
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# Muze
|
|
2
2
|
|
|
3
|
+
[](https://rubygems.org/gems/muze)
|
|
4
|
+
[](https://github.com/ydah/muze/actions/workflows/ci.yml)
|
|
5
|
+
[](https://www.ruby-lang.org/)
|
|
6
|
+
[](./LICENSE.txt)
|
|
7
|
+
|
|
3
8
|
Muze is a Ruby audio feature extraction library that provides a full pipeline from audio loading to spectral analysis, feature extraction, rhythm analysis, effects, and lightweight visualization.
|
|
4
9
|
|
|
5
10
|

|
data/Rakefile
CHANGED
|
@@ -7,6 +7,9 @@ require_relative "benchmarks/quality_metrics"
|
|
|
7
7
|
|
|
8
8
|
RSpec::Core::RakeTask.new(:spec)
|
|
9
9
|
|
|
10
|
+
CLEAN.include("ext/muze/*.o", "ext/muze/mkmf.log")
|
|
11
|
+
CLOBBER.include("ext/muze/Makefile", "ext/muze/*.bundle", "ext/muze/*.dSYM")
|
|
12
|
+
|
|
10
13
|
directory "ext/muze"
|
|
11
14
|
|
|
12
15
|
desc "Compile optional C extension"
|
data/ext/muze/muze_ext.c
CHANGED
|
@@ -1,16 +1,30 @@
|
|
|
1
1
|
#include "ruby.h"
|
|
2
|
+
#include "ruby/thread.h"
|
|
3
|
+
#include <string.h>
|
|
2
4
|
|
|
3
5
|
static VALUE mMuze;
|
|
4
6
|
static VALUE mNative;
|
|
5
7
|
|
|
8
|
+
static VALUE muze_parameter_error(void) {
|
|
9
|
+
ID id = rb_intern("ParameterError");
|
|
10
|
+
if (rb_const_defined(mMuze, id)) {
|
|
11
|
+
return rb_const_get(mMuze, id);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
return rb_eArgError;
|
|
15
|
+
}
|
|
16
|
+
|
|
6
17
|
static VALUE native_frame_slices(VALUE self, VALUE rb_signal, VALUE rb_frame_length, VALUE rb_hop_length) {
|
|
7
|
-
|
|
18
|
+
if (!RB_TYPE_P(rb_signal, T_ARRAY)) {
|
|
19
|
+
rb_raise(muze_parameter_error(), "signal must be an Array");
|
|
20
|
+
}
|
|
21
|
+
|
|
8
22
|
const long signal_length = RARRAY_LEN(rb_signal);
|
|
9
23
|
const long frame_length = NUM2LONG(rb_frame_length);
|
|
10
24
|
const long hop_length = NUM2LONG(rb_hop_length);
|
|
11
25
|
|
|
12
26
|
if (frame_length <= 0 || hop_length <= 0) {
|
|
13
|
-
rb_raise(
|
|
27
|
+
rb_raise(muze_parameter_error(), "frame_length and hop_length must be positive");
|
|
14
28
|
}
|
|
15
29
|
|
|
16
30
|
if (signal_length <= frame_length) {
|
|
@@ -41,16 +55,66 @@ static VALUE native_frame_slices(VALUE self, VALUE rb_signal, VALUE rb_frame_len
|
|
|
41
55
|
return frames;
|
|
42
56
|
}
|
|
43
57
|
|
|
44
|
-
static
|
|
45
|
-
const double
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
58
|
+
static void swap_double(double *values, long left, long right) {
|
|
59
|
+
const double tmp = values[left];
|
|
60
|
+
values[left] = values[right];
|
|
61
|
+
values[right] = tmp;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
static long partition_double(double *values, long left, long right, long pivot_index) {
|
|
65
|
+
const double pivot = values[pivot_index];
|
|
66
|
+
long store_index = left;
|
|
67
|
+
|
|
68
|
+
swap_double(values, pivot_index, right);
|
|
69
|
+
|
|
70
|
+
for (long i = left; i < right; i++) {
|
|
71
|
+
if (values[i] < pivot) {
|
|
72
|
+
swap_double(values, store_index, i);
|
|
73
|
+
store_index++;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
swap_double(values, right, store_index);
|
|
78
|
+
return store_index;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
static double quickselect_double(double *values, long count, long target) {
|
|
82
|
+
long left = 0;
|
|
83
|
+
long right = count - 1;
|
|
84
|
+
|
|
85
|
+
while (1) {
|
|
86
|
+
if (left == right) return values[left];
|
|
87
|
+
|
|
88
|
+
const long pivot_index = partition_double(values, left, right, left + ((right - left) / 2));
|
|
89
|
+
|
|
90
|
+
if (target == pivot_index) {
|
|
91
|
+
return values[target];
|
|
92
|
+
} else if (target < pivot_index) {
|
|
93
|
+
right = pivot_index - 1;
|
|
94
|
+
} else {
|
|
95
|
+
left = pivot_index + 1;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
struct median_args {
|
|
101
|
+
double *values;
|
|
102
|
+
long count;
|
|
103
|
+
long target;
|
|
104
|
+
double result;
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
static void *median_without_gvl(void *ptr) {
|
|
108
|
+
struct median_args *args = (struct median_args *)ptr;
|
|
109
|
+
args->result = quickselect_double(args->values, args->count, args->target);
|
|
110
|
+
return NULL;
|
|
50
111
|
}
|
|
51
112
|
|
|
52
113
|
static VALUE native_median1d(VALUE self, VALUE rb_values) {
|
|
53
|
-
|
|
114
|
+
if (!RB_TYPE_P(rb_values, T_ARRAY)) {
|
|
115
|
+
rb_raise(muze_parameter_error(), "values must be an Array");
|
|
116
|
+
}
|
|
117
|
+
|
|
54
118
|
const long count = RARRAY_LEN(rb_values);
|
|
55
119
|
if (count == 0) return DBL2NUM(0.0);
|
|
56
120
|
|
|
@@ -60,10 +124,62 @@ static VALUE native_median1d(VALUE self, VALUE rb_values) {
|
|
|
60
124
|
values[i] = NUM2DBL(rb_ary_entry(rb_values, i));
|
|
61
125
|
}
|
|
62
126
|
|
|
63
|
-
|
|
64
|
-
|
|
127
|
+
struct median_args args = { values, count, count / 2, 0.0 };
|
|
128
|
+
rb_thread_call_without_gvl(median_without_gvl, &args, NULL, NULL);
|
|
65
129
|
xfree(values);
|
|
66
|
-
return DBL2NUM(
|
|
130
|
+
return DBL2NUM(args.result);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
static void insert_sorted_double(double *values, long *length, double value) {
|
|
134
|
+
long index = 0;
|
|
135
|
+
while (index < *length && values[index] <= value) index++;
|
|
136
|
+
memmove(values + index + 1, values + index, sizeof(double) * (*length - index));
|
|
137
|
+
values[index] = value;
|
|
138
|
+
(*length)++;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
static void remove_sorted_double(double *values, long *length, double value) {
|
|
142
|
+
long index = 0;
|
|
143
|
+
while (index < *length && values[index] < value) index++;
|
|
144
|
+
while (index < *length && values[index] != value) index++;
|
|
145
|
+
if (index >= *length) return;
|
|
146
|
+
|
|
147
|
+
memmove(values + index, values + index + 1, sizeof(double) * (*length - index - 1));
|
|
148
|
+
(*length)--;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
static VALUE native_median_filter1d(VALUE self, VALUE rb_values, VALUE rb_half) {
|
|
152
|
+
if (!RB_TYPE_P(rb_values, T_ARRAY)) {
|
|
153
|
+
rb_raise(muze_parameter_error(), "values must be an Array");
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const long count = RARRAY_LEN(rb_values);
|
|
157
|
+
const long half = NUM2LONG(rb_half);
|
|
158
|
+
if (half < 0) {
|
|
159
|
+
rb_raise(muze_parameter_error(), "half must be non-negative");
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
VALUE output = rb_ary_new2(count);
|
|
163
|
+
if (count == 0) return output;
|
|
164
|
+
|
|
165
|
+
double *window = ALLOC_N(double, count);
|
|
166
|
+
long window_length = 0;
|
|
167
|
+
|
|
168
|
+
for (long index = 0; index < count; index++) {
|
|
169
|
+
if (index > half) {
|
|
170
|
+
remove_sorted_double(window, &window_length, NUM2DBL(rb_ary_entry(rb_values, index - half - 1)));
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
const long entering = index + half;
|
|
174
|
+
if (entering < count) {
|
|
175
|
+
insert_sorted_double(window, &window_length, NUM2DBL(rb_ary_entry(rb_values, entering)));
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
rb_ary_push(output, DBL2NUM(window_length == 0 ? 0.0 : window[window_length / 2]));
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
xfree(window);
|
|
182
|
+
return output;
|
|
67
183
|
}
|
|
68
184
|
|
|
69
185
|
void Init_muze_ext(void) {
|
|
@@ -72,4 +188,5 @@ void Init_muze_ext(void) {
|
|
|
72
188
|
|
|
73
189
|
rb_define_singleton_method(mNative, "frame_slices", native_frame_slices, 3);
|
|
74
190
|
rb_define_singleton_method(mNative, "median1d", native_median1d, 1);
|
|
191
|
+
rb_define_singleton_method(mNative, "median_filter1d", native_median_filter1d, 2);
|
|
75
192
|
}
|
data/lib/muze/beat/beat_track.rb
CHANGED
|
@@ -12,15 +12,36 @@ module Muze
|
|
|
12
12
|
# @param start_bpm [Float]
|
|
13
13
|
# @param tightness [Integer]
|
|
14
14
|
# @return [Array(Float, Array<Integer>)] estimated tempo and beat frames
|
|
15
|
-
def beat_track(y: nil, sr: 22_050, onset_envelope: nil, hop_length: 512, start_bpm: 120.0, tightness: 100)
|
|
15
|
+
def beat_track(y: nil, sr: 22_050, onset_envelope: nil, hop_length: 512, start_bpm: 120.0, tightness: 100, min_bpm: 30.0, max_bpm: 240.0, bpm: nil, fill_missing: true, return_metadata: false)
|
|
16
|
+
validate_beat_params!(
|
|
17
|
+
sr:,
|
|
18
|
+
hop_length:,
|
|
19
|
+
start_bpm:,
|
|
20
|
+
tightness:,
|
|
21
|
+
min_bpm:,
|
|
22
|
+
max_bpm:,
|
|
23
|
+
bpm:,
|
|
24
|
+
fill_missing:,
|
|
25
|
+
return_metadata:
|
|
26
|
+
)
|
|
27
|
+
|
|
16
28
|
envelope = if onset_envelope
|
|
17
29
|
onset_envelope.is_a?(Numo::NArray) ? onset_envelope.to_a : Array(onset_envelope)
|
|
18
30
|
else
|
|
19
31
|
Muze::Onset.onset_strength(y:, sr:, hop_length:).to_a
|
|
20
32
|
end
|
|
33
|
+
validate_finite_array!(envelope, "onset_envelope")
|
|
34
|
+
|
|
35
|
+
if envelope.empty? || envelope.max.to_f <= 1.0e-12
|
|
36
|
+
result = { tempo: nil, beats: [], confidence: 0.0 }
|
|
37
|
+
return return_metadata ? result : [nil, []]
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
tempo = bpm || estimate_tempo(envelope, sr:, hop_length:, start_bpm:, min_bpm:, max_bpm:, tightness:)
|
|
41
|
+
beats = track_beats(envelope, tempo:, sr:, hop_length:, tightness:, fill_missing:)
|
|
42
|
+
confidence = beat_confidence(envelope, beats)
|
|
43
|
+
return { tempo:, beats:, confidence: } if return_metadata
|
|
21
44
|
|
|
22
|
-
tempo = estimate_tempo(envelope, sr:, hop_length:, start_bpm:)
|
|
23
|
-
beats = track_beats(envelope, tempo:, sr:, hop_length:, tightness:)
|
|
24
45
|
[tempo, beats]
|
|
25
46
|
end
|
|
26
47
|
|
|
@@ -30,25 +51,40 @@ module Muze
|
|
|
30
51
|
# @param hop_length [Integer]
|
|
31
52
|
# @param win_length [Integer]
|
|
32
53
|
# @return [Numo::SFloat]
|
|
33
|
-
def tempogram(y: nil, onset_envelope: nil, sr: 22_050, hop_length: 512, win_length: 384)
|
|
34
|
-
|
|
54
|
+
def tempogram(y: nil, onset_envelope: nil, sr: 22_050, hop_length: 512, win_length: 384, normalize: false)
|
|
55
|
+
validate_positive_integer!(sr, "sr")
|
|
56
|
+
validate_positive_integer!(hop_length, "hop_length")
|
|
57
|
+
Muze::Feature.tempogram(y:, onset_envelope:, sr:, hop_length:, win_length:, normalize:)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def tempo_frequencies(sr: 22_050, hop_length: 512, win_length: 384)
|
|
61
|
+
validate_positive_integer!(sr, "sr")
|
|
62
|
+
validate_positive_integer!(hop_length, "hop_length")
|
|
63
|
+
raise Muze::ParameterError, "win_length must be positive" unless win_length.positive?
|
|
64
|
+
|
|
65
|
+
Numo::SFloat.cast(Array.new(win_length) do |lag|
|
|
66
|
+
lag.zero? ? 0.0 : 60.0 * sr / (hop_length * lag)
|
|
67
|
+
end)
|
|
35
68
|
end
|
|
36
69
|
|
|
37
|
-
def estimate_tempo(envelope, sr:, hop_length:, start_bpm:)
|
|
70
|
+
def estimate_tempo(envelope, sr:, hop_length:, start_bpm:, min_bpm:, max_bpm:, tightness:)
|
|
38
71
|
return start_bpm if envelope.length < 4
|
|
39
72
|
|
|
40
|
-
min_bpm
|
|
41
|
-
max_bpm
|
|
73
|
+
raise Muze::ParameterError, "min_bpm must be positive" unless min_bpm.positive?
|
|
74
|
+
raise Muze::ParameterError, "max_bpm must be greater than min_bpm" unless max_bpm > min_bpm
|
|
75
|
+
|
|
42
76
|
min_lag = [(sr * 60.0 / (hop_length * max_bpm)).round, 1].max
|
|
43
77
|
max_lag = [(sr * 60.0 / (hop_length * min_bpm)).round, envelope.length - 1].min
|
|
44
78
|
return start_bpm if min_lag >= max_lag
|
|
45
79
|
|
|
80
|
+
prior_lag = sr * 60.0 / (hop_length * start_bpm)
|
|
46
81
|
best_lag = min_lag
|
|
47
82
|
best_score = -Float::INFINITY
|
|
48
83
|
|
|
49
84
|
(min_lag..max_lag).each do |lag|
|
|
50
85
|
score = 0.0
|
|
51
86
|
(lag...envelope.length).each { |index| score += envelope[index] * envelope[index - lag] }
|
|
87
|
+
score -= normalized_tightness(tightness) * ((lag - prior_lag).abs / prior_lag) * score.abs
|
|
52
88
|
next unless score > best_score
|
|
53
89
|
|
|
54
90
|
best_score = score
|
|
@@ -59,7 +95,7 @@ module Muze
|
|
|
59
95
|
end
|
|
60
96
|
private_class_method :estimate_tempo
|
|
61
97
|
|
|
62
|
-
def track_beats(envelope, tempo:, sr:, hop_length:, tightness:)
|
|
98
|
+
def track_beats(envelope, tempo:, sr:, hop_length:, tightness:, fill_missing:)
|
|
63
99
|
interval = [(60.0 * sr / (tempo * hop_length)).round, 1].max
|
|
64
100
|
peaks = Muze::Onset.onset_detect(onset_envelope: envelope, backtrack: false)
|
|
65
101
|
return [] if peaks.empty?
|
|
@@ -69,7 +105,8 @@ module Muze
|
|
|
69
105
|
|
|
70
106
|
while target < envelope.length
|
|
71
107
|
candidates = peaks.select { |peak| (peak - target).abs <= search_radius(interval, tightness) }
|
|
72
|
-
|
|
108
|
+
candidate = select_beat_candidate(candidates, target:, interval:, envelope:, tightness:)
|
|
109
|
+
beats << (candidate || target) if fill_missing || candidate
|
|
73
110
|
target += interval
|
|
74
111
|
end
|
|
75
112
|
|
|
@@ -85,7 +122,7 @@ module Muze
|
|
|
85
122
|
private_class_method :search_radius
|
|
86
123
|
|
|
87
124
|
def select_beat_candidate(candidates, target:, interval:, envelope:, tightness:)
|
|
88
|
-
return
|
|
125
|
+
return nil unless candidates.any?
|
|
89
126
|
|
|
90
127
|
penalty_weight = 1.0 + (4.0 * normalized_tightness(tightness))
|
|
91
128
|
candidates.max_by do |candidate|
|
|
@@ -96,6 +133,16 @@ module Muze
|
|
|
96
133
|
end
|
|
97
134
|
private_class_method :select_beat_candidate
|
|
98
135
|
|
|
136
|
+
def beat_confidence(envelope, beats)
|
|
137
|
+
return 0.0 if beats.empty?
|
|
138
|
+
|
|
139
|
+
peak = envelope.max.to_f
|
|
140
|
+
return 0.0 if peak <= 0.0
|
|
141
|
+
|
|
142
|
+
beats.sum { |beat| envelope[beat].to_f } / (beats.length * peak)
|
|
143
|
+
end
|
|
144
|
+
private_class_method :beat_confidence
|
|
145
|
+
|
|
99
146
|
def normalized_tightness(tightness)
|
|
100
147
|
value = tightness.to_f
|
|
101
148
|
return 0.0 if value <= 0.0
|
|
@@ -103,5 +150,40 @@ module Muze
|
|
|
103
150
|
[value / 100.0, 4.0].min / 4.0
|
|
104
151
|
end
|
|
105
152
|
private_class_method :normalized_tightness
|
|
153
|
+
|
|
154
|
+
def validate_beat_params!(sr:, hop_length:, start_bpm:, tightness:, min_bpm:, max_bpm:, bpm:, fill_missing:, return_metadata:)
|
|
155
|
+
validate_positive_integer!(sr, "sr")
|
|
156
|
+
validate_positive_integer!(hop_length, "hop_length")
|
|
157
|
+
validate_positive_number!(start_bpm, "start_bpm")
|
|
158
|
+
raise Muze::ParameterError, "tightness must be finite" unless tightness.respond_to?(:finite?) && tightness.finite?
|
|
159
|
+
validate_positive_number!(min_bpm, "min_bpm")
|
|
160
|
+
validate_positive_number!(max_bpm, "max_bpm")
|
|
161
|
+
raise Muze::ParameterError, "max_bpm must be greater than min_bpm" unless max_bpm > min_bpm
|
|
162
|
+
validate_positive_number!(bpm, "bpm") if bpm
|
|
163
|
+
raise Muze::ParameterError, "fill_missing must be true or false" unless [true, false].include?(fill_missing)
|
|
164
|
+
raise Muze::ParameterError, "return_metadata must be true or false" unless [true, false].include?(return_metadata)
|
|
165
|
+
end
|
|
166
|
+
private_class_method :validate_beat_params!
|
|
167
|
+
|
|
168
|
+
def validate_positive_integer!(value, label)
|
|
169
|
+
return if value.is_a?(Integer) && value.positive?
|
|
170
|
+
|
|
171
|
+
raise Muze::ParameterError, "#{label} must be a positive integer"
|
|
172
|
+
end
|
|
173
|
+
private_class_method :validate_positive_integer!
|
|
174
|
+
|
|
175
|
+
def validate_positive_number!(value, label)
|
|
176
|
+
return if value.respond_to?(:finite?) && value.finite? && value.positive?
|
|
177
|
+
|
|
178
|
+
raise Muze::ParameterError, "#{label} must be positive"
|
|
179
|
+
end
|
|
180
|
+
private_class_method :validate_positive_number!
|
|
181
|
+
|
|
182
|
+
def validate_finite_array!(values, label)
|
|
183
|
+
return if values.all? { |value| value.respond_to?(:finite?) && value.finite? }
|
|
184
|
+
|
|
185
|
+
raise Muze::ParameterError, "#{label} must contain only finite numeric values"
|
|
186
|
+
end
|
|
187
|
+
private_class_method :validate_finite_array!
|
|
106
188
|
end
|
|
107
189
|
end
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
module Core
|
|
5
|
+
# Small audio-array helpers shared by public convenience APIs.
|
|
6
|
+
module Audio
|
|
7
|
+
module_function
|
|
8
|
+
|
|
9
|
+
def validate_audio!(y, allow_empty: false)
|
|
10
|
+
raise Muze::ParameterError, "audio must not be nil" if y.nil?
|
|
11
|
+
|
|
12
|
+
signal = Numo::SFloat.cast(y)
|
|
13
|
+
raise Muze::ParameterError, "audio must be one- or two-dimensional" unless [1, 2].include?(signal.ndim)
|
|
14
|
+
raise Muze::ParameterError, "audio must not be empty" if !allow_empty && signal.empty?
|
|
15
|
+
raise Muze::ParameterError, "audio must contain only finite numeric values" unless finite_values?(signal)
|
|
16
|
+
raise Muze::ParameterError, "audio channel count must be positive" if signal.ndim == 2 && signal.shape[1].zero?
|
|
17
|
+
|
|
18
|
+
signal
|
|
19
|
+
rescue NoMethodError, TypeError, ArgumentError => e
|
|
20
|
+
raise Muze::ParameterError, "audio must be Array<Float> or Numo::NArray: #{e.message}"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def valid_audio?(y, allow_empty: false)
|
|
24
|
+
validate_audio!(y, allow_empty:)
|
|
25
|
+
true
|
|
26
|
+
rescue Muze::ParameterError
|
|
27
|
+
false
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def normalize(y, peak: 1.0, axis: nil)
|
|
31
|
+
raise Muze::ParameterError, "peak must be positive" unless peak.respond_to?(:positive?) && peak.positive?
|
|
32
|
+
raise Muze::ParameterError, "axis must be nil or :channels" unless axis.nil? || axis == :channels
|
|
33
|
+
|
|
34
|
+
signal = validate_audio!(y, allow_empty: true)
|
|
35
|
+
return signal if signal.empty?
|
|
36
|
+
|
|
37
|
+
return normalize_channels(signal, peak:) if axis == :channels && signal.ndim == 2
|
|
38
|
+
|
|
39
|
+
current_peak = signal.abs.max.to_f
|
|
40
|
+
return signal if current_peak <= 0.0
|
|
41
|
+
|
|
42
|
+
(signal * (peak.to_f / current_peak)).cast_to(Numo::SFloat)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def remix(y, intervals, units: :samples, sr: nil, hop_length: 512)
|
|
46
|
+
signal = validate_audio!(y, allow_empty: true)
|
|
47
|
+
raw_intervals = intervals.respond_to?(:to_a) ? intervals.to_a : Array(intervals)
|
|
48
|
+
raw_intervals = [raw_intervals] if raw_intervals.length == 2 && !raw_intervals.first.respond_to?(:to_a)
|
|
49
|
+
|
|
50
|
+
sample_ranges = raw_intervals.map do |interval|
|
|
51
|
+
raise Muze::ParameterError, "intervals must contain [start, end] pairs" unless interval.respond_to?(:to_a) && interval.to_a.length == 2
|
|
52
|
+
|
|
53
|
+
start_sample = convert_position(interval.to_a[0], units:, sr:, hop_length:)
|
|
54
|
+
end_sample = convert_position(interval.to_a[1], units:, sr:, hop_length:)
|
|
55
|
+
raise Muze::ParameterError, "interval end must be >= start" if end_sample < start_sample
|
|
56
|
+
|
|
57
|
+
[start_sample, end_sample]
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
chunks = sample_ranges.map { |start_sample, end_sample| slice_samples(signal, start_sample, end_sample) }
|
|
61
|
+
concatenate_chunks(chunks, channel_count: signal.ndim == 2 ? signal.shape[1] : nil)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def finite_values?(signal)
|
|
65
|
+
signal.to_a.flatten.all? { |value| value.respond_to?(:finite?) && value.finite? }
|
|
66
|
+
end
|
|
67
|
+
private_class_method :finite_values?
|
|
68
|
+
|
|
69
|
+
def normalize_channels(signal, peak:)
|
|
70
|
+
frames, channels = signal.shape
|
|
71
|
+
output = Numo::SFloat.zeros(frames, channels)
|
|
72
|
+
channels.times do |channel|
|
|
73
|
+
values = signal[true, channel]
|
|
74
|
+
current_peak = values.abs.max.to_f
|
|
75
|
+
output[true, channel] = current_peak <= 0.0 ? values : values * (peak.to_f / current_peak)
|
|
76
|
+
end
|
|
77
|
+
output
|
|
78
|
+
end
|
|
79
|
+
private_class_method :normalize_channels
|
|
80
|
+
|
|
81
|
+
def convert_position(position, units:, sr:, hop_length:)
|
|
82
|
+
case units
|
|
83
|
+
when :samples
|
|
84
|
+
position.to_i
|
|
85
|
+
when :frames
|
|
86
|
+
raise Muze::ParameterError, "hop_length must be positive" unless hop_length.positive?
|
|
87
|
+
|
|
88
|
+
(position.to_i * hop_length).to_i
|
|
89
|
+
when :time
|
|
90
|
+
raise Muze::ParameterError, "sr must be positive for time units" unless sr&.positive?
|
|
91
|
+
|
|
92
|
+
(position.to_f * sr).round
|
|
93
|
+
else
|
|
94
|
+
raise Muze::ParameterError, "units must be :samples, :frames, or :time"
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
private_class_method :convert_position
|
|
98
|
+
|
|
99
|
+
def slice_samples(signal, start_sample, end_sample)
|
|
100
|
+
start_index = [[start_sample, 0].max, signal.shape[0]].min
|
|
101
|
+
end_index = [[end_sample, 0].max, signal.shape[0]].min
|
|
102
|
+
return signal.ndim == 2 ? Numo::SFloat.zeros(0, signal.shape[1]) : Numo::SFloat[] if end_index <= start_index
|
|
103
|
+
|
|
104
|
+
signal.ndim == 2 ? signal[start_index...end_index, true] : signal[start_index...end_index]
|
|
105
|
+
end
|
|
106
|
+
private_class_method :slice_samples
|
|
107
|
+
|
|
108
|
+
def concatenate_chunks(chunks, channel_count:)
|
|
109
|
+
return channel_count ? Numo::SFloat.zeros(0, channel_count) : Numo::SFloat[] if chunks.empty?
|
|
110
|
+
|
|
111
|
+
total = chunks.sum { |chunk| channel_count ? chunk.shape[0] : chunk.size }
|
|
112
|
+
if channel_count
|
|
113
|
+
output = Numo::SFloat.zeros(total, channel_count)
|
|
114
|
+
offset = 0
|
|
115
|
+
chunks.each do |chunk|
|
|
116
|
+
next if chunk.shape[0].zero?
|
|
117
|
+
|
|
118
|
+
output[offset...(offset + chunk.shape[0]), true] = chunk
|
|
119
|
+
offset += chunk.shape[0]
|
|
120
|
+
end
|
|
121
|
+
return output
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
Numo::SFloat.cast(chunks.flat_map(&:to_a))
|
|
125
|
+
end
|
|
126
|
+
private_class_method :concatenate_chunks
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
module Core
|
|
5
|
+
# Tiny bounded LRU cache for generated DSP lookup tables.
|
|
6
|
+
class BoundedCache
|
|
7
|
+
def initialize(max_size:)
|
|
8
|
+
raise Muze::ParameterError, "max_size must be positive" unless max_size.is_a?(Integer) && max_size.positive?
|
|
9
|
+
|
|
10
|
+
@max_size = max_size
|
|
11
|
+
@entries = {}
|
|
12
|
+
@mutex = Mutex.new
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def fetch(key)
|
|
16
|
+
@mutex.synchronize do
|
|
17
|
+
if @entries.key?(key)
|
|
18
|
+
value = @entries.delete(key)
|
|
19
|
+
@entries[key] = value
|
|
20
|
+
return value
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
value = yield
|
|
24
|
+
@entries.shift while @entries.size >= @max_size
|
|
25
|
+
@entries[key] = value
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def clear
|
|
30
|
+
@mutex.synchronize { @entries.clear }
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def size
|
|
34
|
+
@mutex.synchronize { @entries.size }
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
data/lib/muze/core/dct.rb
CHANGED
|
@@ -4,6 +4,7 @@ module Muze
|
|
|
4
4
|
module Core
|
|
5
5
|
# DCT utilities.
|
|
6
6
|
module DCT
|
|
7
|
+
BASIS_CACHE = Muze::Core::BoundedCache.new(max_size: 64)
|
|
7
8
|
module_function
|
|
8
9
|
|
|
9
10
|
# @param x [Numo::NArray]
|
|
@@ -22,34 +23,36 @@ module Muze
|
|
|
22
23
|
|
|
23
24
|
rows, cols = matrix.shape
|
|
24
25
|
target_length = n || rows
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
cols.times do |col|
|
|
28
|
-
signal = matrix[true, col].to_a
|
|
29
|
-
transformed = dct_vector(signal, target_length, norm:)
|
|
30
|
-
target_length.times { |idx| result[idx, col] = transformed[idx] }
|
|
31
|
-
end
|
|
26
|
+
working = adjust_rows(matrix, target_length)
|
|
27
|
+
result = basis_matrix(rows: target_length, cols: target_length, norm:).dot(working).cast_to(Numo::SFloat)
|
|
32
28
|
|
|
33
29
|
axis == 1 ? result.transpose : result
|
|
34
30
|
end
|
|
35
31
|
|
|
36
|
-
def
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
else
|
|
40
|
-
signal + Array.new(n - signal.length, 0.0)
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
Array.new(n) do |k|
|
|
44
|
-
sum = 0.0
|
|
45
|
-
n.times do |idx|
|
|
46
|
-
sum += padded[idx] * Math.cos(Math::PI * (idx + 0.5) * k / n)
|
|
47
|
-
end
|
|
32
|
+
def adjust_rows(matrix, target_length)
|
|
33
|
+
rows, cols = matrix.shape
|
|
34
|
+
return matrix[0...target_length, true] if rows >= target_length
|
|
48
35
|
|
|
49
|
-
|
|
36
|
+
output = Numo::SFloat.zeros(target_length, cols)
|
|
37
|
+
output[0...rows, true] = matrix
|
|
38
|
+
output
|
|
39
|
+
end
|
|
40
|
+
private_class_method :adjust_rows
|
|
41
|
+
|
|
42
|
+
def basis_matrix(rows:, cols:, norm:)
|
|
43
|
+
key = [rows, cols, norm]
|
|
44
|
+
BASIS_CACHE.fetch(key) do
|
|
45
|
+
matrix = Numo::SFloat.zeros(rows, cols)
|
|
46
|
+
rows.times do |row|
|
|
47
|
+
cols.times do |col|
|
|
48
|
+
value = Math.cos(Math::PI * (col + 0.5) * row / cols)
|
|
49
|
+
matrix[row, col] = normalize_dct(value, row, cols, norm)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
matrix
|
|
50
53
|
end
|
|
51
54
|
end
|
|
52
|
-
private_class_method :
|
|
55
|
+
private_class_method :basis_matrix
|
|
53
56
|
|
|
54
57
|
def normalize_dct(value, index, length, norm)
|
|
55
58
|
return value * 2.0 unless norm == :ortho
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
module Core
|
|
5
|
+
# Shared fixed-size frame slicing for analysis and effects code.
|
|
6
|
+
module Frames
|
|
7
|
+
module_function
|
|
8
|
+
|
|
9
|
+
def slice(signal, frame_length:, hop_length:, pad_end: false)
|
|
10
|
+
raise Muze::ParameterError, "frame_length and hop_length must be positive" unless frame_length.positive? && hop_length.positive?
|
|
11
|
+
|
|
12
|
+
values = signal.is_a?(Numo::NArray) ? signal.to_a : Array(signal)
|
|
13
|
+
return [pad_frame(values, frame_length)] if values.length <= frame_length
|
|
14
|
+
return Muze::Native.frame_slices(values, frame_length, hop_length) unless pad_end
|
|
15
|
+
|
|
16
|
+
frame_count = ((values.length - frame_length).to_f / hop_length).ceil + 1
|
|
17
|
+
Array.new(frame_count) do |index|
|
|
18
|
+
start = index * hop_length
|
|
19
|
+
pad_frame(values[start, frame_length] || [], frame_length)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def pad_frame(frame, frame_length)
|
|
24
|
+
return frame if frame.length >= frame_length
|
|
25
|
+
|
|
26
|
+
frame + Array.new(frame_length - frame.length, 0.0)
|
|
27
|
+
end
|
|
28
|
+
private_class_method :pad_frame
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|