muze 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -1
- data/README.md +5 -0
- data/Rakefile +3 -0
- data/ext/muze/muze_ext.c +129 -12
- data/lib/muze/beat/beat_track.rb +93 -11
- data/lib/muze/core/audio.rb +129 -0
- data/lib/muze/core/cache.rb +38 -0
- data/lib/muze/core/dct.rb +24 -21
- data/lib/muze/core/frames.rb +31 -0
- data/lib/muze/core/matrix.rb +23 -0
- data/lib/muze/core/resample.rb +111 -19
- data/lib/muze/core/stft.rb +312 -52
- data/lib/muze/core/windows.rb +113 -17
- data/lib/muze/display/specshow.rb +307 -41
- data/lib/muze/effects/harmonic_percussive.rb +83 -18
- data/lib/muze/effects/streaming.rb +101 -0
- data/lib/muze/effects/time_stretch.rb +353 -36
- data/lib/muze/feature/aggregation.rb +49 -0
- data/lib/muze/feature/chroma.rb +43 -15
- data/lib/muze/feature/context.rb +81 -0
- data/lib/muze/feature/mfcc.rb +78 -38
- data/lib/muze/feature/spectral.rb +258 -39
- data/lib/muze/filters/chroma_filter.rb +21 -2
- data/lib/muze/filters/mel.rb +47 -1
- data/lib/muze/io/audio_loader/ffmpeg_backend.rb +179 -15
- data/lib/muze/io/audio_loader/wavify_backend.rb +118 -11
- data/lib/muze/io/audio_loader.rb +178 -48
- data/lib/muze/io/audio_writer.rb +48 -0
- data/lib/muze/native.rb +91 -8
- data/lib/muze/onset/onset_detect.rb +114 -23
- data/lib/muze/version.rb +1 -1
- data/lib/muze.rb +237 -60
- metadata +11 -21
- data/benchmarks/baseline.json +0 -24
- data/benchmarks/native_vs_ruby.rb +0 -23
- data/benchmarks/quality_metrics.rb +0 -265
- data/benchmarks/quality_thresholds.md +0 -28
- data/benchmarks/support/fixture_library.rb +0 -107
data/lib/muze/feature/mfcc.rb
CHANGED
|
@@ -13,11 +13,27 @@ module Muze
|
|
|
13
13
|
# @param n_mels [Integer]
|
|
14
14
|
# @param fmin [Float]
|
|
15
15
|
# @param fmax [Float, nil]
|
|
16
|
+
# @param power [Float]
|
|
17
|
+
# @param center [Boolean]
|
|
18
|
+
# @param window [Symbol]
|
|
19
|
+
# @param pad_mode [Symbol]
|
|
20
|
+
# @param norm [Symbol, nil]
|
|
16
21
|
# @return [Numo::SFloat]
|
|
17
|
-
def melspectrogram(y: nil, sr: 22_050, s: nil, n_fft: 2048, hop_length: 512, n_mels: 128, fmin: 0.0, fmax: nil)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
22
|
+
def melspectrogram(y: nil, sr: 22_050, s: nil, n_fft: 2048, hop_length: 512, n_mels: 128, fmin: 0.0, fmax: nil, power: 2.0, center: true, window: :hann, pad_mode: :reflect, norm: nil, s_kind: :power)
|
|
23
|
+
raise Muze::ParameterError, "power must be positive" unless power.positive?
|
|
24
|
+
raise Muze::ParameterError, "s_kind must be :power or :magnitude" unless %i[power magnitude].include?(s_kind)
|
|
25
|
+
|
|
26
|
+
spectrum = if s
|
|
27
|
+
provided = Numo::SFloat.cast(s)
|
|
28
|
+
validate_finite_array!(provided.to_a.flatten, "s")
|
|
29
|
+
raise Muze::ParameterError, "spectrogram input must be non-negative" if provided.to_a.flatten.any?(&:negative?)
|
|
30
|
+
|
|
31
|
+
s_kind == :magnitude ? (provided**power).cast_to(Numo::SFloat) : provided
|
|
32
|
+
else
|
|
33
|
+
spectrogram(y, n_fft:, hop_length:, power:, center:, window:, pad_mode:)
|
|
34
|
+
end
|
|
35
|
+
filter_bank = Muze::Filters.mel(sr:, n_fft:, n_mels:, fmin:, fmax:, norm:)
|
|
36
|
+
Muze::Core::Matrix.multiply(filter_bank, spectrum)
|
|
21
37
|
end
|
|
22
38
|
|
|
23
39
|
# @param y [Numo::SFloat, Array<Float>, nil]
|
|
@@ -29,19 +45,31 @@ module Muze
|
|
|
29
45
|
# @param n_mels [Integer]
|
|
30
46
|
# @param fmin [Float]
|
|
31
47
|
# @param fmax [Float, nil]
|
|
48
|
+
# @param dct_type [Integer]
|
|
49
|
+
# @param lifter [Integer]
|
|
50
|
+
# @param norm [Symbol, nil]
|
|
32
51
|
# @return [Numo::SFloat]
|
|
33
|
-
def mfcc(y: nil, sr: 22_050, s: nil, n_mfcc: 20, n_fft: 2048, hop_length: 512, n_mels: 128, fmin: 0.0, fmax: nil)
|
|
52
|
+
def mfcc(y: nil, sr: 22_050, s: nil, n_mfcc: 20, n_fft: 2048, hop_length: 512, n_mels: 128, fmin: 0.0, fmax: nil, dct_type: 2, lifter: 0, norm: :ortho, s_kind: :mel_power)
|
|
34
53
|
raise Muze::ParameterError, "n_mfcc must be positive" unless n_mfcc.positive?
|
|
54
|
+
raise Muze::ParameterError, "lifter must be >= 0" if lifter.negative?
|
|
55
|
+
raise Muze::ParameterError, "s_kind must be :mel_power or :log_mel" unless %i[mel_power log_mel].include?(s_kind)
|
|
35
56
|
|
|
36
57
|
mel_spec = if s
|
|
37
|
-
Numo::SFloat.cast(s)
|
|
58
|
+
provided = Numo::SFloat.cast(s)
|
|
59
|
+
validate_finite_array!(provided.to_a.flatten, "s")
|
|
60
|
+
if s_kind == :mel_power && provided.to_a.flatten.any?(&:negative?)
|
|
61
|
+
raise Muze::ParameterError, "mel power spectrogram must be non-negative"
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
provided
|
|
38
65
|
else
|
|
39
66
|
melspectrogram(y:, sr:, n_fft:, hop_length:, n_mels:, fmin:, fmax:)
|
|
40
67
|
end
|
|
41
68
|
|
|
42
|
-
log_mel = Muze.power_to_db(mel_spec)
|
|
43
|
-
dct = Muze::Core::DCT.dct(log_mel, axis: 0, norm:
|
|
44
|
-
dct[0...n_mfcc, true].cast_to(Numo::SFloat)
|
|
69
|
+
log_mel = s_kind == :log_mel ? mel_spec : Muze.power_to_db(mel_spec)
|
|
70
|
+
dct = Muze::Core::DCT.dct(log_mel, type: dct_type, axis: 0, norm:)
|
|
71
|
+
coeffs = dct[0...n_mfcc, true].cast_to(Numo::SFloat)
|
|
72
|
+
apply_lifter(coeffs, lifter:)
|
|
45
73
|
end
|
|
46
74
|
|
|
47
75
|
# @param data [Numo::SFloat]
|
|
@@ -52,23 +80,25 @@ module Muze
|
|
|
52
80
|
def delta(data, order: 1, width: 9, mode: :interp)
|
|
53
81
|
raise Muze::ParameterError, "order must be >= 1" unless order >= 1
|
|
54
82
|
raise Muze::ParameterError, "width must be odd and >= 3" unless width.odd? && width >= 3
|
|
55
|
-
raise Muze::ParameterError, "mode must be :interp" unless
|
|
83
|
+
raise Muze::ParameterError, "mode must be :interp, :nearest, :mirror, or :constant" unless %i[interp nearest mirror constant].include?(mode)
|
|
56
84
|
|
|
57
85
|
result = Numo::SFloat.cast(data)
|
|
58
|
-
|
|
86
|
+
original_ndim = result.ndim
|
|
87
|
+
order.times { result = finite_difference(result, width, mode:) }
|
|
88
|
+
result = result[true, 0] if original_ndim == 1 && result.ndim == 2
|
|
59
89
|
result
|
|
60
90
|
end
|
|
61
91
|
|
|
62
|
-
def
|
|
92
|
+
def spectrogram(y, n_fft:, hop_length:, power:, center:, window:, pad_mode:)
|
|
63
93
|
raise Muze::ParameterError, "y must be provided when s is nil" if y.nil?
|
|
64
94
|
|
|
65
|
-
stft_matrix = Muze.stft(y, n_fft:, hop_length:)
|
|
95
|
+
stft_matrix = Muze.stft(y, n_fft:, hop_length:, center:, window:, pad_mode:)
|
|
66
96
|
magnitude, = Muze.magphase(stft_matrix)
|
|
67
|
-
(magnitude**
|
|
97
|
+
(magnitude**power).cast_to(Numo::SFloat)
|
|
68
98
|
end
|
|
69
|
-
private_class_method :
|
|
99
|
+
private_class_method :spectrogram
|
|
70
100
|
|
|
71
|
-
def finite_difference(data, width)
|
|
101
|
+
def finite_difference(data, width, mode:)
|
|
72
102
|
matrix = Numo::SFloat.cast(data)
|
|
73
103
|
matrix = matrix.expand_dims(1) if matrix.ndim == 1
|
|
74
104
|
|
|
@@ -81,40 +111,50 @@ module Muze
|
|
|
81
111
|
cols.times do |col|
|
|
82
112
|
numerator = 0.0
|
|
83
113
|
(1..half).each do |offset|
|
|
84
|
-
left =
|
|
85
|
-
right =
|
|
86
|
-
numerator += offset * (
|
|
114
|
+
left = sample_with_mode(matrix, row, col - offset, cols, mode:)
|
|
115
|
+
right = sample_with_mode(matrix, row, col + offset, cols, mode:)
|
|
116
|
+
numerator += offset * (right - left)
|
|
87
117
|
end
|
|
88
118
|
output[row, col] = numerator / denominator
|
|
89
119
|
end
|
|
90
120
|
end
|
|
91
121
|
|
|
92
|
-
|
|
122
|
+
output
|
|
93
123
|
end
|
|
94
124
|
private_class_method :finite_difference
|
|
95
125
|
|
|
96
|
-
def
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
126
|
+
def sample_with_mode(matrix, row, col, cols, mode:)
|
|
127
|
+
return matrix[row, col] if col.between?(0, cols - 1)
|
|
128
|
+
|
|
129
|
+
case mode
|
|
130
|
+
when :constant
|
|
131
|
+
0.0
|
|
132
|
+
when :mirror
|
|
133
|
+
matrix[row, mirror_index(col, cols)]
|
|
134
|
+
else
|
|
135
|
+
matrix[row, [[col, 0].max, cols - 1].min]
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
private_class_method :sample_with_mode
|
|
101
139
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
raise Muze::ParameterError, "Matrix dimensions do not align" unless left_cols == right_rows
|
|
140
|
+
def mirror_index(index, length)
|
|
141
|
+
return 0 if length <= 1
|
|
105
142
|
|
|
106
|
-
|
|
143
|
+
period = (length - 1) * 2
|
|
144
|
+
mirrored = index % period
|
|
145
|
+
mirrored >= length ? period - mirrored : mirrored
|
|
146
|
+
end
|
|
147
|
+
private_class_method :mirror_index
|
|
107
148
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
sum = 0.0
|
|
111
|
-
left_cols.times { |idx| sum += left_matrix[row, idx] * right_matrix[idx, col] }
|
|
112
|
-
output[row, col] = sum
|
|
113
|
-
end
|
|
114
|
-
end
|
|
149
|
+
def apply_lifter(coeffs, lifter:)
|
|
150
|
+
return coeffs if lifter.zero?
|
|
115
151
|
|
|
116
|
-
|
|
152
|
+
rows, = coeffs.shape
|
|
153
|
+
rows.times do |index|
|
|
154
|
+
coeffs[index, true] = coeffs[index, true] * (1.0 + ((lifter / 2.0) * Math.sin(Math::PI * (index + 1) / lifter)))
|
|
155
|
+
end
|
|
156
|
+
coeffs
|
|
117
157
|
end
|
|
118
|
-
private_class_method :
|
|
158
|
+
private_class_method :apply_lifter
|
|
119
159
|
end
|
|
120
160
|
end
|
|
@@ -10,8 +10,8 @@ module Muze
|
|
|
10
10
|
# @param n_fft [Integer]
|
|
11
11
|
# @param hop_length [Integer]
|
|
12
12
|
# @return [Numo::SFloat] shape: [1, frames]
|
|
13
|
-
def spectral_centroid(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512)
|
|
14
|
-
magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:)
|
|
13
|
+
def spectral_centroid(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude)
|
|
14
|
+
magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
|
|
15
15
|
_, frames = magnitude.shape
|
|
16
16
|
output = Numo::SFloat.zeros(1, frames)
|
|
17
17
|
|
|
@@ -38,9 +38,11 @@ module Muze
|
|
|
38
38
|
# @param hop_length [Integer]
|
|
39
39
|
# @param p [Integer]
|
|
40
40
|
# @return [Numo::SFloat] shape: [1, frames]
|
|
41
|
-
def spectral_bandwidth(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, p: 2)
|
|
42
|
-
|
|
43
|
-
|
|
41
|
+
def spectral_bandwidth(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, p: 2, center: true, pad_mode: :reflect, s_kind: :magnitude)
|
|
42
|
+
raise Muze::ParameterError, "p must be positive" unless p.positive?
|
|
43
|
+
|
|
44
|
+
magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
|
|
45
|
+
centroids = spectral_centroid(y:, s: magnitude, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind: :magnitude)
|
|
44
46
|
_, frames = magnitude.shape
|
|
45
47
|
output = Numo::SFloat.zeros(1, frames)
|
|
46
48
|
|
|
@@ -71,8 +73,10 @@ module Muze
|
|
|
71
73
|
# @param hop_length [Integer]
|
|
72
74
|
# @param roll_percent [Float]
|
|
73
75
|
# @return [Numo::SFloat] shape: [1, frames]
|
|
74
|
-
def spectral_rolloff(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, roll_percent: 0.85)
|
|
75
|
-
|
|
76
|
+
def spectral_rolloff(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, roll_percent: 0.85, center: true, pad_mode: :reflect, s_kind: :magnitude)
|
|
77
|
+
raise Muze::ParameterError, "roll_percent must satisfy 0 < roll_percent < 1" unless roll_percent.positive? && roll_percent < 1.0
|
|
78
|
+
|
|
79
|
+
magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
|
|
76
80
|
_, frames = magnitude.shape
|
|
77
81
|
output = Numo::SFloat.zeros(1, frames)
|
|
78
82
|
|
|
@@ -100,8 +104,10 @@ module Muze
|
|
|
100
104
|
# @param s [Numo::SFloat, nil]
|
|
101
105
|
# @param amin [Float]
|
|
102
106
|
# @return [Numo::SFloat] shape: [1, frames]
|
|
103
|
-
def spectral_flatness(y: nil, s: nil, n_fft: 2048, hop_length: 512, amin: 1.0e-10)
|
|
104
|
-
|
|
107
|
+
def spectral_flatness(y: nil, s: nil, n_fft: 2048, hop_length: 512, amin: 1.0e-10, center: true, pad_mode: :reflect, s_kind: :magnitude)
|
|
108
|
+
raise Muze::ParameterError, "amin must be positive" unless amin.positive?
|
|
109
|
+
|
|
110
|
+
magnitude, = prepare_magnitude(y:, s:, sr: 22_050, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
|
|
105
111
|
_, frames = magnitude.shape
|
|
106
112
|
output = Numo::SFloat.zeros(1, frames)
|
|
107
113
|
|
|
@@ -120,10 +126,14 @@ module Muze
|
|
|
120
126
|
# @param n_bands [Integer]
|
|
121
127
|
# @param quantile [Float]
|
|
122
128
|
# @return [Numo::SFloat] shape: [n_bands + 1, frames]
|
|
123
|
-
def spectral_contrast(y: nil, s: nil, n_fft: 2048, hop_length: 512, n_bands: 6, quantile: 0.02)
|
|
124
|
-
|
|
129
|
+
def spectral_contrast(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, n_bands: 6, quantile: 0.02, fmin: 200.0, center: true, pad_mode: :reflect, s_kind: :magnitude)
|
|
130
|
+
raise Muze::ParameterError, "n_bands must be positive" unless n_bands.positive?
|
|
131
|
+
raise Muze::ParameterError, "quantile must satisfy 0 < quantile < 0.5" unless quantile.positive? && quantile < 0.5
|
|
132
|
+
raise Muze::ParameterError, "fmin must be positive" unless fmin.positive?
|
|
133
|
+
|
|
134
|
+
magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
|
|
125
135
|
bins, frames = magnitude.shape
|
|
126
|
-
edges =
|
|
136
|
+
edges = spectral_contrast_edges(frequencies, n_bands:, fmin:, sr:)
|
|
127
137
|
output = Numo::SFloat.zeros(n_bands + 1, frames)
|
|
128
138
|
|
|
129
139
|
(n_bands + 1).times do |band|
|
|
@@ -145,20 +155,139 @@ module Muze
|
|
|
145
155
|
output
|
|
146
156
|
end
|
|
147
157
|
|
|
158
|
+
# @return [Array<Integer>]
|
|
159
|
+
def spectral_contrast_edges(frequencies, n_bands:, fmin:, sr:)
|
|
160
|
+
nyquist = sr / 2.0
|
|
161
|
+
hz_edges = [0.0, fmin]
|
|
162
|
+
n_bands.times { |band| hz_edges << [fmin * (2.0**(band + 1)), nyquist].min }
|
|
163
|
+
hz_edges << nyquist
|
|
164
|
+
hz_edges.map do |hz|
|
|
165
|
+
index = frequencies.each_index.min_by { |idx| (frequencies[idx] - hz).abs }
|
|
166
|
+
[[index, 0].max, frequencies.length - 1].min
|
|
167
|
+
end.each_cons(2).with_object([0]) do |(left, right), edges|
|
|
168
|
+
edges << [right, left + 1].max
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# @return [Numo::SFloat] shape: [1, frames]
|
|
173
|
+
def spectral_flux(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude)
|
|
174
|
+
magnitude, = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
|
|
175
|
+
_, frames = magnitude.shape
|
|
176
|
+
output = Numo::SFloat.zeros(1, frames)
|
|
177
|
+
|
|
178
|
+
(1...frames).each do |frame_index|
|
|
179
|
+
diff = magnitude[true, frame_index] - magnitude[true, frame_index - 1]
|
|
180
|
+
output[0, frame_index] = Math.sqrt((diff * diff).sum)
|
|
181
|
+
end
|
|
182
|
+
output
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# @return [Numo::SFloat] shape: [1, frames]
|
|
186
|
+
def spectral_entropy(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude)
|
|
187
|
+
magnitude, = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
|
|
188
|
+
_, frames = magnitude.shape
|
|
189
|
+
output = Numo::SFloat.zeros(1, frames)
|
|
190
|
+
|
|
191
|
+
frames.times do |frame_index|
|
|
192
|
+
spectrum = magnitude[true, frame_index]
|
|
193
|
+
total = spectrum.sum
|
|
194
|
+
next if total <= 0.0
|
|
195
|
+
|
|
196
|
+
probs = spectrum / total
|
|
197
|
+
entropy = probs.to_a.sum { |value| value.positive? ? -(value * Math.log2(value)) : 0.0 }
|
|
198
|
+
output[0, frame_index] = entropy / Math.log2([spectrum.size, 2].max)
|
|
199
|
+
end
|
|
200
|
+
output
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# @return [Numo::SFloat] shape: [1, frames]
|
|
204
|
+
def spectral_crest(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude)
|
|
205
|
+
magnitude, = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
|
|
206
|
+
_, frames = magnitude.shape
|
|
207
|
+
output = Numo::SFloat.zeros(1, frames)
|
|
208
|
+
|
|
209
|
+
frames.times do |frame_index|
|
|
210
|
+
spectrum = magnitude[true, frame_index]
|
|
211
|
+
mean = spectrum.mean
|
|
212
|
+
output[0, frame_index] = mean <= 0.0 ? 0.0 : spectrum.max / mean
|
|
213
|
+
end
|
|
214
|
+
output
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# @return [Numo::SFloat] shape: [1, frames]
|
|
218
|
+
def spectral_slope(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude)
|
|
219
|
+
magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
|
|
220
|
+
mean_frequency = frequencies.sum / frequencies.length.to_f
|
|
221
|
+
frequency_variance = frequencies.sum { |frequency| (frequency - mean_frequency)**2 }
|
|
222
|
+
_, frames = magnitude.shape
|
|
223
|
+
output = Numo::SFloat.zeros(1, frames)
|
|
224
|
+
|
|
225
|
+
frames.times do |frame_index|
|
|
226
|
+
spectrum = magnitude[true, frame_index].to_a
|
|
227
|
+
mean_spectrum = spectrum.sum / spectrum.length.to_f
|
|
228
|
+
covariance = frequencies.each_with_index.sum { |frequency, idx| (frequency - mean_frequency) * (spectrum[idx] - mean_spectrum) }
|
|
229
|
+
output[0, frame_index] = frequency_variance.zero? ? 0.0 : covariance / frequency_variance
|
|
230
|
+
end
|
|
231
|
+
output
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# @return [Numo::SFloat] shape: [1, frames]
|
|
235
|
+
def spectral_decrease(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude)
|
|
236
|
+
magnitude, = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
|
|
237
|
+
bins, frames = magnitude.shape
|
|
238
|
+
output = Numo::SFloat.zeros(1, frames)
|
|
239
|
+
|
|
240
|
+
frames.times do |frame_index|
|
|
241
|
+
first = magnitude[0, frame_index]
|
|
242
|
+
denominator = 0.0
|
|
243
|
+
numerator = 0.0
|
|
244
|
+
(1...bins).each do |bin|
|
|
245
|
+
value = magnitude[bin, frame_index]
|
|
246
|
+
numerator += (value - first) / bin
|
|
247
|
+
denominator += value
|
|
248
|
+
end
|
|
249
|
+
output[0, frame_index] = denominator <= 0.0 ? 0.0 : numerator / denominator
|
|
250
|
+
end
|
|
251
|
+
output
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# @return [Numo::SFloat] shape: [order + 1, frames]
|
|
255
|
+
def poly_features(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, order: 1, frequency: nil, center: true, pad_mode: :reflect, s_kind: :magnitude)
|
|
256
|
+
raise Muze::ParameterError, "order must be >= 0" unless order.is_a?(Integer) && order >= 0
|
|
257
|
+
|
|
258
|
+
magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
|
|
259
|
+
bins, frames = magnitude.shape
|
|
260
|
+
x_values = frequency ? Numo::SFloat.cast(frequency).to_a.flatten : frequencies
|
|
261
|
+
raise Muze::ParameterError, "frequency length must match spectrum bins" unless x_values.length == bins
|
|
262
|
+
|
|
263
|
+
x_values = normalize_frequency_axis(x_values)
|
|
264
|
+
output = Numo::SFloat.zeros(order + 1, frames)
|
|
265
|
+
frames.times do |frame_index|
|
|
266
|
+
coefficients = polynomial_coefficients(x_values, magnitude[true, frame_index].to_a, order)
|
|
267
|
+
coefficients.each_with_index { |value, index| output[index, frame_index] = value }
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
output
|
|
271
|
+
end
|
|
272
|
+
|
|
148
273
|
# @param y [Numo::SFloat, Array<Float>]
|
|
149
274
|
# @param frame_length [Integer]
|
|
150
275
|
# @param hop_length [Integer]
|
|
151
276
|
# @return [Numo::SFloat] shape: [1, frames]
|
|
152
|
-
def zero_crossing_rate(y, frame_length: 2048, hop_length: 512)
|
|
153
|
-
|
|
154
|
-
|
|
277
|
+
def zero_crossing_rate(y, frame_length: 2048, hop_length: 512, threshold: 0.0, center: false)
|
|
278
|
+
raise Muze::ParameterError, "threshold must be >= 0" if threshold.negative?
|
|
279
|
+
|
|
280
|
+
signal = mono_signal_to_a(y, "y")
|
|
281
|
+
signal = Array.new(frame_length / 2, 0.0) + signal + Array.new(frame_length / 2, 0.0) if center
|
|
282
|
+
frames = Muze::Core::Frames.slice(signal, frame_length:, hop_length:)
|
|
155
283
|
values = frames.map do |frame|
|
|
156
284
|
crossings = 0
|
|
157
|
-
|
|
285
|
+
signs = frame.map { |value| value.abs <= threshold ? 0.0 : value }
|
|
286
|
+
(1...signs.length).each { |idx| crossings += 1 if (signs[idx - 1] >= 0) != (signs[idx] >= 0) }
|
|
158
287
|
crossings.to_f / frame_length
|
|
159
288
|
end
|
|
160
289
|
|
|
161
|
-
Numo::SFloat[values]
|
|
290
|
+
Numo::SFloat[values].reshape(1, values.length)
|
|
162
291
|
end
|
|
163
292
|
|
|
164
293
|
# @param y [Numo::SFloat, Array<Float>, nil]
|
|
@@ -166,9 +295,10 @@ module Muze
|
|
|
166
295
|
# @param frame_length [Integer]
|
|
167
296
|
# @param hop_length [Integer]
|
|
168
297
|
# @return [Numo::SFloat] shape: [1, frames]
|
|
169
|
-
def rms(y: nil, s: nil, frame_length: 2048, hop_length: 512)
|
|
298
|
+
def rms(y: nil, s: nil, frame_length: 2048, hop_length: 512, center: false)
|
|
170
299
|
if s
|
|
171
300
|
matrix = Numo::SFloat.cast(s)
|
|
301
|
+
validate_finite_array!(matrix.to_a.flatten, "s")
|
|
172
302
|
matrix = matrix.expand_dims(1) if matrix.ndim == 1
|
|
173
303
|
_, frames = matrix.shape
|
|
174
304
|
values = Array.new(frames) do |frame_index|
|
|
@@ -176,16 +306,17 @@ module Muze
|
|
|
176
306
|
Math.sqrt((frame**2).sum / frame.size)
|
|
177
307
|
end
|
|
178
308
|
|
|
179
|
-
return Numo::SFloat[values]
|
|
309
|
+
return Numo::SFloat[values].reshape(1, values.length)
|
|
180
310
|
end
|
|
181
311
|
|
|
182
|
-
signal =
|
|
183
|
-
|
|
312
|
+
signal = mono_signal_to_a(y, "y")
|
|
313
|
+
signal = Array.new(frame_length / 2, 0.0) + signal + Array.new(frame_length / 2, 0.0) if center
|
|
314
|
+
frames = Muze::Core::Frames.slice(signal, frame_length:, hop_length:)
|
|
184
315
|
values = frames.map do |frame|
|
|
185
316
|
Math.sqrt(frame.sum { |value| value * value } / frame.length)
|
|
186
317
|
end
|
|
187
318
|
|
|
188
|
-
Numo::SFloat[values]
|
|
319
|
+
Numo::SFloat[values].reshape(1, values.length)
|
|
189
320
|
end
|
|
190
321
|
|
|
191
322
|
# @param y [Numo::SFloat, Array<Float>, nil]
|
|
@@ -194,12 +325,18 @@ module Muze
|
|
|
194
325
|
# @param hop_length [Integer]
|
|
195
326
|
# @param win_length [Integer]
|
|
196
327
|
# @return [Numo::SFloat]
|
|
197
|
-
def tempogram(y: nil, onset_envelope: nil, sr: 22_050, hop_length: 512, win_length: 384)
|
|
328
|
+
def tempogram(y: nil, onset_envelope: nil, sr: 22_050, hop_length: 512, win_length: 384, normalize: false)
|
|
329
|
+
raise Muze::ParameterError, "sr must be a positive integer" unless sr.is_a?(Integer) && sr.positive?
|
|
330
|
+
raise Muze::ParameterError, "hop_length must be a positive integer" unless hop_length.is_a?(Integer) && hop_length.positive?
|
|
331
|
+
raise Muze::ParameterError, "win_length must be a positive integer" unless win_length.is_a?(Integer) && win_length.positive?
|
|
332
|
+
raise Muze::ParameterError, "normalize must be true or false" unless [true, false].include?(normalize)
|
|
333
|
+
|
|
198
334
|
envelope = if onset_envelope
|
|
199
335
|
onset_envelope.is_a?(Numo::NArray) ? onset_envelope.to_a : Array(onset_envelope)
|
|
200
336
|
else
|
|
201
337
|
onset_env_from_signal(y, sr:, hop_length:)
|
|
202
338
|
end
|
|
339
|
+
validate_finite_array!(envelope, "onset_envelope")
|
|
203
340
|
|
|
204
341
|
frames = envelope.length
|
|
205
342
|
tempogram = Numo::SFloat.zeros(win_length, frames)
|
|
@@ -214,41 +351,41 @@ module Muze
|
|
|
214
351
|
(lag...segment.length).each do |offset|
|
|
215
352
|
value += segment[offset] * segment[offset - lag]
|
|
216
353
|
end
|
|
217
|
-
tempogram[lag, frame_index] = value
|
|
354
|
+
tempogram[lag, frame_index] = normalize ? normalized_autocorrelation(segment, lag, value) : value
|
|
218
355
|
end
|
|
219
356
|
end
|
|
220
357
|
|
|
221
358
|
tempogram
|
|
222
359
|
end
|
|
223
360
|
|
|
224
|
-
def prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:)
|
|
361
|
+
def prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center: true, pad_mode: :reflect, s_kind: :magnitude)
|
|
362
|
+
raise Muze::ParameterError, "s_kind must be :magnitude or :power" unless %i[magnitude power].include?(s_kind)
|
|
363
|
+
|
|
225
364
|
spectrum = if s
|
|
226
|
-
Numo::SFloat.cast(s)
|
|
365
|
+
provided = Numo::SFloat.cast(s)
|
|
366
|
+
validate_finite_array!(provided.to_a.flatten, "s")
|
|
367
|
+
if s_kind == :power
|
|
368
|
+
raise Muze::ParameterError, "power spectrogram must be non-negative" if provided.to_a.flatten.any?(&:negative?)
|
|
369
|
+
|
|
370
|
+
Numo::NMath.sqrt(provided).cast_to(Numo::SFloat)
|
|
371
|
+
else
|
|
372
|
+
provided
|
|
373
|
+
end
|
|
227
374
|
else
|
|
228
|
-
stft_matrix = Muze.stft(y, n_fft:, hop_length:)
|
|
375
|
+
stft_matrix = Muze.stft(y, n_fft:, hop_length:, center:, pad_mode:)
|
|
229
376
|
magnitude, = Muze.magphase(stft_matrix)
|
|
230
377
|
magnitude
|
|
231
378
|
end
|
|
232
379
|
|
|
233
380
|
spectrum = spectrum.expand_dims(1) if spectrum.ndim == 1
|
|
381
|
+
validate_finite_array!(spectrum.to_a.flatten, "spectrum")
|
|
234
382
|
bins, = spectrum.shape
|
|
235
383
|
fft_size = n_fft || ((bins - 1) * 2)
|
|
236
|
-
frequencies =
|
|
384
|
+
frequencies = Muze.fft_frequencies(sr:, n_fft: fft_size).to_a[0...bins]
|
|
237
385
|
[spectrum, frequencies]
|
|
238
386
|
end
|
|
239
387
|
private_class_method :prepare_magnitude
|
|
240
388
|
|
|
241
|
-
def frame_signal(signal, frame_length, hop_length)
|
|
242
|
-
return [signal + Array.new(frame_length - signal.length, 0.0)] if signal.length <= frame_length
|
|
243
|
-
|
|
244
|
-
frame_count = ((signal.length - frame_length) / hop_length) + 1
|
|
245
|
-
Array.new(frame_count) do |index|
|
|
246
|
-
start = index * hop_length
|
|
247
|
-
signal[start, frame_length]
|
|
248
|
-
end
|
|
249
|
-
end
|
|
250
|
-
private_class_method :frame_signal
|
|
251
|
-
|
|
252
389
|
def onset_env_from_signal(y, sr:, hop_length:)
|
|
253
390
|
mel_spec = melspectrogram(y:, sr:, n_fft: 1024, hop_length:, n_mels: 40)
|
|
254
391
|
_, frames = mel_spec.shape
|
|
@@ -262,5 +399,87 @@ module Muze
|
|
|
262
399
|
onset
|
|
263
400
|
end
|
|
264
401
|
private_class_method :onset_env_from_signal
|
|
402
|
+
|
|
403
|
+
def normalized_autocorrelation(segment, lag, value)
|
|
404
|
+
left_energy = 0.0
|
|
405
|
+
right_energy = 0.0
|
|
406
|
+
(lag...segment.length).each do |offset|
|
|
407
|
+
left = segment[offset]
|
|
408
|
+
right = segment[offset - lag]
|
|
409
|
+
left_energy += left * left
|
|
410
|
+
right_energy += right * right
|
|
411
|
+
end
|
|
412
|
+
denominator = Math.sqrt(left_energy * right_energy)
|
|
413
|
+
denominator <= 1.0e-12 ? 0.0 : value / denominator
|
|
414
|
+
end
|
|
415
|
+
private_class_method :normalized_autocorrelation
|
|
416
|
+
|
|
417
|
+
def normalize_frequency_axis(values)
|
|
418
|
+
min = values.min
|
|
419
|
+
max = values.max
|
|
420
|
+
return Array.new(values.length, 0.0) if (max - min).abs <= 1.0e-12
|
|
421
|
+
|
|
422
|
+
values.map { |value| (2.0 * (value - min) / (max - min)) - 1.0 }
|
|
423
|
+
end
|
|
424
|
+
private_class_method :normalize_frequency_axis
|
|
425
|
+
|
|
426
|
+
def polynomial_coefficients(x_values, y_values, order)
|
|
427
|
+
size = order + 1
|
|
428
|
+
normal = Array.new(size) { Array.new(size, 0.0) }
|
|
429
|
+
rhs = Array.new(size, 0.0)
|
|
430
|
+
|
|
431
|
+
x_values.each_with_index do |x_value, index|
|
|
432
|
+
powers = Array.new((2 * order) + 1, 1.0)
|
|
433
|
+
(1...powers.length).each { |power| powers[power] = powers[power - 1] * x_value }
|
|
434
|
+
size.times do |row|
|
|
435
|
+
rhs[row] += y_values[index] * powers[row]
|
|
436
|
+
size.times { |col| normal[row][col] += powers[row + col] }
|
|
437
|
+
end
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
solve_linear_system(normal, rhs)
|
|
441
|
+
end
|
|
442
|
+
private_class_method :polynomial_coefficients
|
|
443
|
+
|
|
444
|
+
def solve_linear_system(matrix, rhs)
|
|
445
|
+
size = rhs.length
|
|
446
|
+
size.times do |pivot|
|
|
447
|
+
best = (pivot...size).max_by { |row| matrix[row][pivot].abs }
|
|
448
|
+
return Array.new(size, 0.0) if matrix[best][pivot].abs <= 1.0e-12
|
|
449
|
+
|
|
450
|
+
matrix[pivot], matrix[best] = matrix[best], matrix[pivot]
|
|
451
|
+
rhs[pivot], rhs[best] = rhs[best], rhs[pivot]
|
|
452
|
+
|
|
453
|
+
divisor = matrix[pivot][pivot]
|
|
454
|
+
pivot.upto(size - 1) { |col| matrix[pivot][col] /= divisor }
|
|
455
|
+
rhs[pivot] /= divisor
|
|
456
|
+
|
|
457
|
+
size.times do |row|
|
|
458
|
+
next if row == pivot
|
|
459
|
+
|
|
460
|
+
factor = matrix[row][pivot]
|
|
461
|
+
pivot.upto(size - 1) { |col| matrix[row][col] -= factor * matrix[pivot][col] }
|
|
462
|
+
rhs[row] -= factor * rhs[pivot]
|
|
463
|
+
end
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
rhs
|
|
467
|
+
end
|
|
468
|
+
private_class_method :solve_linear_system
|
|
469
|
+
|
|
470
|
+
def validate_finite_array!(values, label)
|
|
471
|
+
return if values.all? { |value| value.respond_to?(:finite?) && value.finite? }
|
|
472
|
+
|
|
473
|
+
raise Muze::ParameterError, "#{label} must contain only finite numeric values"
|
|
474
|
+
end
|
|
475
|
+
private_class_method :validate_finite_array!
|
|
476
|
+
|
|
477
|
+
def mono_signal_to_a(value, label)
|
|
478
|
+
signal = Muze::Core::Audio.validate_audio!(value, allow_empty: true)
|
|
479
|
+
raise Muze::ParameterError, "#{label} must be mono audio" if signal.ndim == 2
|
|
480
|
+
|
|
481
|
+
signal.to_a
|
|
482
|
+
end
|
|
483
|
+
private_class_method :mono_signal_to_a
|
|
265
484
|
end
|
|
266
485
|
end
|
|
@@ -3,19 +3,29 @@
|
|
|
3
3
|
module Muze
|
|
4
4
|
module Filters
|
|
5
5
|
module_function
|
|
6
|
+
CHROMA_CACHE = Muze::Core::BoundedCache.new(max_size: 64)
|
|
6
7
|
|
|
7
8
|
# @param sr [Integer]
|
|
8
9
|
# @param n_fft [Integer]
|
|
9
10
|
# @param n_chroma [Integer]
|
|
10
11
|
# @param tuning [Float]
|
|
11
12
|
# @return [Numo::SFloat] shape: [n_chroma, 1 + n_fft/2]
|
|
12
|
-
def chroma(sr:, n_fft:, n_chroma: 12, tuning: 0.0)
|
|
13
|
+
def chroma(sr:, n_fft:, n_chroma: 12, tuning: 0.0, ctroct: nil, octwidth: nil)
|
|
14
|
+
key = [sr, n_fft, n_chroma, tuning, ctroct, octwidth]
|
|
15
|
+
CHROMA_CACHE.fetch(key) { build_chroma(sr:, n_fft:, n_chroma:, tuning:, ctroct:, octwidth:) }.dup
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def build_chroma(sr:, n_fft:, n_chroma:, tuning:, ctroct:, octwidth:)
|
|
13
19
|
raise Muze::ParameterError, "sr must be positive" unless sr.positive?
|
|
14
20
|
raise Muze::ParameterError, "n_fft must be positive" unless n_fft.positive?
|
|
15
21
|
raise Muze::ParameterError, "n_chroma must be positive" unless n_chroma.positive?
|
|
22
|
+
raise Muze::ParameterError, "tuning must be finite" unless tuning.respond_to?(:finite?) && tuning.finite?
|
|
23
|
+
raise Muze::ParameterError, "ctroct must be finite" if ctroct && !(ctroct.respond_to?(:finite?) && ctroct.finite?)
|
|
24
|
+
raise Muze::ParameterError, "octwidth must be positive" if octwidth && !(octwidth.respond_to?(:positive?) && octwidth.positive?)
|
|
16
25
|
|
|
17
26
|
bins = (n_fft / 2) + 1
|
|
18
27
|
matrix = Numo::SFloat.zeros(n_chroma, bins)
|
|
28
|
+
center_octave = ctroct || 5.0
|
|
19
29
|
|
|
20
30
|
bins.times do |bin|
|
|
21
31
|
frequency = (bin * sr.to_f) / n_fft
|
|
@@ -26,12 +36,13 @@ module Muze
|
|
|
26
36
|
|
|
27
37
|
n_chroma.times do |chroma_index|
|
|
28
38
|
distance = circular_distance(chroma_index, chroma_position, n_chroma)
|
|
29
|
-
matrix[chroma_index, bin] = Math.exp(-(distance**2) / 2.0)
|
|
39
|
+
matrix[chroma_index, bin] = Math.exp(-(distance**2) / 2.0) * octave_weight(frequency, center_octave:, octwidth:)
|
|
30
40
|
end
|
|
31
41
|
end
|
|
32
42
|
|
|
33
43
|
normalize_columns(matrix)
|
|
34
44
|
end
|
|
45
|
+
private_class_method :build_chroma
|
|
35
46
|
|
|
36
47
|
def circular_distance(a, b, modulo)
|
|
37
48
|
direct = (a - b).abs
|
|
@@ -39,6 +50,14 @@ module Muze
|
|
|
39
50
|
end
|
|
40
51
|
private_class_method :circular_distance
|
|
41
52
|
|
|
53
|
+
def octave_weight(frequency, center_octave:, octwidth:)
|
|
54
|
+
return 1.0 unless octwidth
|
|
55
|
+
|
|
56
|
+
octave = Math.log2(frequency / 16.351597831287414)
|
|
57
|
+
Math.exp(-0.5 * (((octave - center_octave) / octwidth)**2))
|
|
58
|
+
end
|
|
59
|
+
private_class_method :octave_weight
|
|
60
|
+
|
|
42
61
|
def normalize_columns(matrix)
|
|
43
62
|
cols = matrix.shape[1]
|
|
44
63
|
cols.times do |col|
|