muze 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -1
  3. data/README.md +5 -0
  4. data/Rakefile +3 -0
  5. data/ext/muze/muze_ext.c +129 -12
  6. data/lib/muze/beat/beat_track.rb +93 -11
  7. data/lib/muze/core/audio.rb +129 -0
  8. data/lib/muze/core/cache.rb +38 -0
  9. data/lib/muze/core/dct.rb +24 -21
  10. data/lib/muze/core/frames.rb +31 -0
  11. data/lib/muze/core/matrix.rb +23 -0
  12. data/lib/muze/core/resample.rb +111 -19
  13. data/lib/muze/core/stft.rb +312 -52
  14. data/lib/muze/core/windows.rb +113 -17
  15. data/lib/muze/display/specshow.rb +307 -41
  16. data/lib/muze/effects/harmonic_percussive.rb +83 -18
  17. data/lib/muze/effects/streaming.rb +101 -0
  18. data/lib/muze/effects/time_stretch.rb +353 -36
  19. data/lib/muze/feature/aggregation.rb +49 -0
  20. data/lib/muze/feature/chroma.rb +43 -15
  21. data/lib/muze/feature/context.rb +81 -0
  22. data/lib/muze/feature/mfcc.rb +78 -38
  23. data/lib/muze/feature/spectral.rb +258 -39
  24. data/lib/muze/filters/chroma_filter.rb +21 -2
  25. data/lib/muze/filters/mel.rb +47 -1
  26. data/lib/muze/io/audio_loader/ffmpeg_backend.rb +179 -15
  27. data/lib/muze/io/audio_loader/wavify_backend.rb +118 -11
  28. data/lib/muze/io/audio_loader.rb +178 -48
  29. data/lib/muze/io/audio_writer.rb +48 -0
  30. data/lib/muze/native.rb +91 -8
  31. data/lib/muze/onset/onset_detect.rb +114 -23
  32. data/lib/muze/version.rb +1 -1
  33. data/lib/muze.rb +237 -60
  34. metadata +11 -21
  35. data/benchmarks/baseline.json +0 -24
  36. data/benchmarks/native_vs_ruby.rb +0 -23
  37. data/benchmarks/quality_metrics.rb +0 -265
  38. data/benchmarks/quality_thresholds.md +0 -28
  39. data/benchmarks/support/fixture_library.rb +0 -107
@@ -13,11 +13,27 @@ module Muze
13
13
  # @param n_mels [Integer]
14
14
  # @param fmin [Float]
15
15
  # @param fmax [Float, nil]
16
+ # @param power [Float]
17
+ # @param center [Boolean]
18
+ # @param window [Symbol]
19
+ # @param pad_mode [Symbol]
20
+ # @param norm [Symbol, nil]
16
21
  # @return [Numo::SFloat]
17
- def melspectrogram(y: nil, sr: 22_050, s: nil, n_fft: 2048, hop_length: 512, n_mels: 128, fmin: 0.0, fmax: nil)
18
- power_spectrum = s ? Numo::SFloat.cast(s) : power_spectrogram(y, n_fft:, hop_length:)
19
- filter_bank = Muze::Filters.mel(sr:, n_fft:, n_mels:, fmin:, fmax:)
20
- matrix_multiply(filter_bank, power_spectrum)
22
+ def melspectrogram(y: nil, sr: 22_050, s: nil, n_fft: 2048, hop_length: 512, n_mels: 128, fmin: 0.0, fmax: nil, power: 2.0, center: true, window: :hann, pad_mode: :reflect, norm: nil, s_kind: :power)
23
+ raise Muze::ParameterError, "power must be positive" unless power.positive?
24
+ raise Muze::ParameterError, "s_kind must be :power or :magnitude" unless %i[power magnitude].include?(s_kind)
25
+
26
+ spectrum = if s
27
+ provided = Numo::SFloat.cast(s)
28
+ validate_finite_array!(provided.to_a.flatten, "s")
29
+ raise Muze::ParameterError, "spectrogram input must be non-negative" if provided.to_a.flatten.any?(&:negative?)
30
+
31
+ s_kind == :magnitude ? (provided**power).cast_to(Numo::SFloat) : provided
32
+ else
33
+ spectrogram(y, n_fft:, hop_length:, power:, center:, window:, pad_mode:)
34
+ end
35
+ filter_bank = Muze::Filters.mel(sr:, n_fft:, n_mels:, fmin:, fmax:, norm:)
36
+ Muze::Core::Matrix.multiply(filter_bank, spectrum)
21
37
  end
22
38
 
23
39
  # @param y [Numo::SFloat, Array<Float>, nil]
@@ -29,19 +45,31 @@ module Muze
29
45
  # @param n_mels [Integer]
30
46
  # @param fmin [Float]
31
47
  # @param fmax [Float, nil]
48
+ # @param dct_type [Integer]
49
+ # @param lifter [Integer]
50
+ # @param norm [Symbol, nil]
32
51
  # @return [Numo::SFloat]
33
- def mfcc(y: nil, sr: 22_050, s: nil, n_mfcc: 20, n_fft: 2048, hop_length: 512, n_mels: 128, fmin: 0.0, fmax: nil)
52
+ def mfcc(y: nil, sr: 22_050, s: nil, n_mfcc: 20, n_fft: 2048, hop_length: 512, n_mels: 128, fmin: 0.0, fmax: nil, dct_type: 2, lifter: 0, norm: :ortho, s_kind: :mel_power)
34
53
  raise Muze::ParameterError, "n_mfcc must be positive" unless n_mfcc.positive?
54
+ raise Muze::ParameterError, "lifter must be >= 0" if lifter.negative?
55
+ raise Muze::ParameterError, "s_kind must be :mel_power or :log_mel" unless %i[mel_power log_mel].include?(s_kind)
35
56
 
36
57
  mel_spec = if s
37
- Numo::SFloat.cast(s)
58
+ provided = Numo::SFloat.cast(s)
59
+ validate_finite_array!(provided.to_a.flatten, "s")
60
+ if s_kind == :mel_power && provided.to_a.flatten.any?(&:negative?)
61
+ raise Muze::ParameterError, "mel power spectrogram must be non-negative"
62
+ end
63
+
64
+ provided
38
65
  else
39
66
  melspectrogram(y:, sr:, n_fft:, hop_length:, n_mels:, fmin:, fmax:)
40
67
  end
41
68
 
42
- log_mel = Muze.power_to_db(mel_spec)
43
- dct = Muze::Core::DCT.dct(log_mel, axis: 0, norm: :ortho)
44
- dct[0...n_mfcc, true].cast_to(Numo::SFloat)
69
+ log_mel = s_kind == :log_mel ? mel_spec : Muze.power_to_db(mel_spec)
70
+ dct = Muze::Core::DCT.dct(log_mel, type: dct_type, axis: 0, norm:)
71
+ coeffs = dct[0...n_mfcc, true].cast_to(Numo::SFloat)
72
+ apply_lifter(coeffs, lifter:)
45
73
  end
46
74
 
47
75
  # @param data [Numo::SFloat]
@@ -52,23 +80,25 @@ module Muze
52
80
  def delta(data, order: 1, width: 9, mode: :interp)
53
81
  raise Muze::ParameterError, "order must be >= 1" unless order >= 1
54
82
  raise Muze::ParameterError, "width must be odd and >= 3" unless width.odd? && width >= 3
55
- raise Muze::ParameterError, "mode must be :interp" unless mode == :interp
83
+ raise Muze::ParameterError, "mode must be :interp, :nearest, :mirror, or :constant" unless %i[interp nearest mirror constant].include?(mode)
56
84
 
57
85
  result = Numo::SFloat.cast(data)
58
- order.times { result = finite_difference(result, width) }
86
+ original_ndim = result.ndim
87
+ order.times { result = finite_difference(result, width, mode:) }
88
+ result = result[true, 0] if original_ndim == 1 && result.ndim == 2
59
89
  result
60
90
  end
61
91
 
62
- def power_spectrogram(y, n_fft:, hop_length:)
92
+ def spectrogram(y, n_fft:, hop_length:, power:, center:, window:, pad_mode:)
63
93
  raise Muze::ParameterError, "y must be provided when s is nil" if y.nil?
64
94
 
65
- stft_matrix = Muze.stft(y, n_fft:, hop_length:)
95
+ stft_matrix = Muze.stft(y, n_fft:, hop_length:, center:, window:, pad_mode:)
66
96
  magnitude, = Muze.magphase(stft_matrix)
67
- (magnitude**2).cast_to(Numo::SFloat)
97
+ (magnitude**power).cast_to(Numo::SFloat)
68
98
  end
69
- private_class_method :power_spectrogram
99
+ private_class_method :spectrogram
70
100
 
71
- def finite_difference(data, width)
101
+ def finite_difference(data, width, mode:)
72
102
  matrix = Numo::SFloat.cast(data)
73
103
  matrix = matrix.expand_dims(1) if matrix.ndim == 1
74
104
 
@@ -81,40 +111,50 @@ module Muze
81
111
  cols.times do |col|
82
112
  numerator = 0.0
83
113
  (1..half).each do |offset|
84
- left = [[col - offset, 0].max, cols - 1].min
85
- right = [[col + offset, 0].max, cols - 1].min
86
- numerator += offset * (matrix[row, right] - matrix[row, left])
114
+ left = sample_with_mode(matrix, row, col - offset, cols, mode:)
115
+ right = sample_with_mode(matrix, row, col + offset, cols, mode:)
116
+ numerator += offset * (right - left)
87
117
  end
88
118
  output[row, col] = numerator / denominator
89
119
  end
90
120
  end
91
121
 
92
- data.ndim == 1 ? output[true, 0] : output
122
+ output
93
123
  end
94
124
  private_class_method :finite_difference
95
125
 
96
- def matrix_multiply(left, right)
97
- left_matrix = Numo::SFloat.cast(left)
98
- right_matrix = Numo::SFloat.cast(right)
99
- left_matrix = left_matrix.expand_dims(1) if left_matrix.ndim == 1
100
- right_matrix = right_matrix.expand_dims(1) if right_matrix.ndim == 1
126
+ def sample_with_mode(matrix, row, col, cols, mode:)
127
+ return matrix[row, col] if col.between?(0, cols - 1)
128
+
129
+ case mode
130
+ when :constant
131
+ 0.0
132
+ when :mirror
133
+ matrix[row, mirror_index(col, cols)]
134
+ else
135
+ matrix[row, [[col, 0].max, cols - 1].min]
136
+ end
137
+ end
138
+ private_class_method :sample_with_mode
101
139
 
102
- left_rows, left_cols = left_matrix.shape
103
- right_rows, right_cols = right_matrix.shape
104
- raise Muze::ParameterError, "Matrix dimensions do not align" unless left_cols == right_rows
140
+ def mirror_index(index, length)
141
+ return 0 if length <= 1
105
142
 
106
- output = Numo::SFloat.zeros(left_rows, right_cols)
143
+ period = (length - 1) * 2
144
+ mirrored = index % period
145
+ mirrored >= length ? period - mirrored : mirrored
146
+ end
147
+ private_class_method :mirror_index
107
148
 
108
- left_rows.times do |row|
109
- right_cols.times do |col|
110
- sum = 0.0
111
- left_cols.times { |idx| sum += left_matrix[row, idx] * right_matrix[idx, col] }
112
- output[row, col] = sum
113
- end
114
- end
149
+ def apply_lifter(coeffs, lifter:)
150
+ return coeffs if lifter.zero?
115
151
 
116
- output
152
+ rows, = coeffs.shape
153
+ rows.times do |index|
154
+ coeffs[index, true] = coeffs[index, true] * (1.0 + ((lifter / 2.0) * Math.sin(Math::PI * (index + 1) / lifter)))
155
+ end
156
+ coeffs
117
157
  end
118
- private_class_method :matrix_multiply
158
+ private_class_method :apply_lifter
119
159
  end
120
160
  end
@@ -10,8 +10,8 @@ module Muze
10
10
  # @param n_fft [Integer]
11
11
  # @param hop_length [Integer]
12
12
  # @return [Numo::SFloat] shape: [1, frames]
13
- def spectral_centroid(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512)
14
- magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:)
13
+ def spectral_centroid(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude)
14
+ magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
15
15
  _, frames = magnitude.shape
16
16
  output = Numo::SFloat.zeros(1, frames)
17
17
 
@@ -38,9 +38,11 @@ module Muze
38
38
  # @param hop_length [Integer]
39
39
  # @param p [Integer]
40
40
  # @return [Numo::SFloat] shape: [1, frames]
41
- def spectral_bandwidth(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, p: 2)
42
- magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:)
43
- centroids = spectral_centroid(y:, s: magnitude, sr:, n_fft:, hop_length:)
41
+ def spectral_bandwidth(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, p: 2, center: true, pad_mode: :reflect, s_kind: :magnitude)
42
+ raise Muze::ParameterError, "p must be positive" unless p.positive?
43
+
44
+ magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
45
+ centroids = spectral_centroid(y:, s: magnitude, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind: :magnitude)
44
46
  _, frames = magnitude.shape
45
47
  output = Numo::SFloat.zeros(1, frames)
46
48
 
@@ -71,8 +73,10 @@ module Muze
71
73
  # @param hop_length [Integer]
72
74
  # @param roll_percent [Float]
73
75
  # @return [Numo::SFloat] shape: [1, frames]
74
- def spectral_rolloff(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, roll_percent: 0.85)
75
- magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:)
76
+ def spectral_rolloff(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, roll_percent: 0.85, center: true, pad_mode: :reflect, s_kind: :magnitude)
77
+ raise Muze::ParameterError, "roll_percent must satisfy 0 < roll_percent < 1" unless roll_percent.positive? && roll_percent < 1.0
78
+
79
+ magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
76
80
  _, frames = magnitude.shape
77
81
  output = Numo::SFloat.zeros(1, frames)
78
82
 
@@ -100,8 +104,10 @@ module Muze
100
104
  # @param s [Numo::SFloat, nil]
101
105
  # @param amin [Float]
102
106
  # @return [Numo::SFloat] shape: [1, frames]
103
- def spectral_flatness(y: nil, s: nil, n_fft: 2048, hop_length: 512, amin: 1.0e-10)
104
- magnitude, = prepare_magnitude(y:, s:, sr: 22_050, n_fft:, hop_length:)
107
+ def spectral_flatness(y: nil, s: nil, n_fft: 2048, hop_length: 512, amin: 1.0e-10, center: true, pad_mode: :reflect, s_kind: :magnitude)
108
+ raise Muze::ParameterError, "amin must be positive" unless amin.positive?
109
+
110
+ magnitude, = prepare_magnitude(y:, s:, sr: 22_050, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
105
111
  _, frames = magnitude.shape
106
112
  output = Numo::SFloat.zeros(1, frames)
107
113
 
@@ -120,10 +126,14 @@ module Muze
120
126
  # @param n_bands [Integer]
121
127
  # @param quantile [Float]
122
128
  # @return [Numo::SFloat] shape: [n_bands + 1, frames]
123
- def spectral_contrast(y: nil, s: nil, n_fft: 2048, hop_length: 512, n_bands: 6, quantile: 0.02)
124
- magnitude, = prepare_magnitude(y:, s:, sr: 22_050, n_fft:, hop_length:)
129
+ def spectral_contrast(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, n_bands: 6, quantile: 0.02, fmin: 200.0, center: true, pad_mode: :reflect, s_kind: :magnitude)
130
+ raise Muze::ParameterError, "n_bands must be positive" unless n_bands.positive?
131
+ raise Muze::ParameterError, "quantile must satisfy 0 < quantile < 0.5" unless quantile.positive? && quantile < 0.5
132
+ raise Muze::ParameterError, "fmin must be positive" unless fmin.positive?
133
+
134
+ magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
125
135
  bins, frames = magnitude.shape
126
- edges = Array.new(n_bands + 2) { |idx| ((bins - 1) * idx / (n_bands + 1).to_f).round }
136
+ edges = spectral_contrast_edges(frequencies, n_bands:, fmin:, sr:)
127
137
  output = Numo::SFloat.zeros(n_bands + 1, frames)
128
138
 
129
139
  (n_bands + 1).times do |band|
@@ -145,20 +155,139 @@ module Muze
145
155
  output
146
156
  end
147
157
 
158
+ # @return [Array<Integer>]
159
+ def spectral_contrast_edges(frequencies, n_bands:, fmin:, sr:)
160
+ nyquist = sr / 2.0
161
+ hz_edges = [0.0, fmin]
162
+ n_bands.times { |band| hz_edges << [fmin * (2.0**(band + 1)), nyquist].min }
163
+ hz_edges << nyquist
164
+ hz_edges.map do |hz|
165
+ index = frequencies.each_index.min_by { |idx| (frequencies[idx] - hz).abs }
166
+ [[index, 0].max, frequencies.length - 1].min
167
+ end.each_cons(2).with_object([0]) do |(left, right), edges|
168
+ edges << [right, left + 1].max
169
+ end
170
+ end
171
+
172
+ # @return [Numo::SFloat] shape: [1, frames]
173
+ def spectral_flux(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude)
174
+ magnitude, = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
175
+ _, frames = magnitude.shape
176
+ output = Numo::SFloat.zeros(1, frames)
177
+
178
+ (1...frames).each do |frame_index|
179
+ diff = magnitude[true, frame_index] - magnitude[true, frame_index - 1]
180
+ output[0, frame_index] = Math.sqrt((diff * diff).sum)
181
+ end
182
+ output
183
+ end
184
+
185
+ # @return [Numo::SFloat] shape: [1, frames]
186
+ def spectral_entropy(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude)
187
+ magnitude, = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
188
+ _, frames = magnitude.shape
189
+ output = Numo::SFloat.zeros(1, frames)
190
+
191
+ frames.times do |frame_index|
192
+ spectrum = magnitude[true, frame_index]
193
+ total = spectrum.sum
194
+ next if total <= 0.0
195
+
196
+ probs = spectrum / total
197
+ entropy = probs.to_a.sum { |value| value.positive? ? -(value * Math.log2(value)) : 0.0 }
198
+ output[0, frame_index] = entropy / Math.log2([spectrum.size, 2].max)
199
+ end
200
+ output
201
+ end
202
+
203
+ # @return [Numo::SFloat] shape: [1, frames]
204
+ def spectral_crest(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude)
205
+ magnitude, = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
206
+ _, frames = magnitude.shape
207
+ output = Numo::SFloat.zeros(1, frames)
208
+
209
+ frames.times do |frame_index|
210
+ spectrum = magnitude[true, frame_index]
211
+ mean = spectrum.mean
212
+ output[0, frame_index] = mean <= 0.0 ? 0.0 : spectrum.max / mean
213
+ end
214
+ output
215
+ end
216
+
217
+ # @return [Numo::SFloat] shape: [1, frames]
218
+ def spectral_slope(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude)
219
+ magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
220
+ mean_frequency = frequencies.sum / frequencies.length.to_f
221
+ frequency_variance = frequencies.sum { |frequency| (frequency - mean_frequency)**2 }
222
+ _, frames = magnitude.shape
223
+ output = Numo::SFloat.zeros(1, frames)
224
+
225
+ frames.times do |frame_index|
226
+ spectrum = magnitude[true, frame_index].to_a
227
+ mean_spectrum = spectrum.sum / spectrum.length.to_f
228
+ covariance = frequencies.each_with_index.sum { |frequency, idx| (frequency - mean_frequency) * (spectrum[idx] - mean_spectrum) }
229
+ output[0, frame_index] = frequency_variance.zero? ? 0.0 : covariance / frequency_variance
230
+ end
231
+ output
232
+ end
233
+
234
+ # @return [Numo::SFloat] shape: [1, frames]
235
+ def spectral_decrease(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude)
236
+ magnitude, = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
237
+ bins, frames = magnitude.shape
238
+ output = Numo::SFloat.zeros(1, frames)
239
+
240
+ frames.times do |frame_index|
241
+ first = magnitude[0, frame_index]
242
+ denominator = 0.0
243
+ numerator = 0.0
244
+ (1...bins).each do |bin|
245
+ value = magnitude[bin, frame_index]
246
+ numerator += (value - first) / bin
247
+ denominator += value
248
+ end
249
+ output[0, frame_index] = denominator <= 0.0 ? 0.0 : numerator / denominator
250
+ end
251
+ output
252
+ end
253
+
254
+ # @return [Numo::SFloat] shape: [order + 1, frames]
255
+ def poly_features(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, order: 1, frequency: nil, center: true, pad_mode: :reflect, s_kind: :magnitude)
256
+ raise Muze::ParameterError, "order must be >= 0" unless order.is_a?(Integer) && order >= 0
257
+
258
+ magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:)
259
+ bins, frames = magnitude.shape
260
+ x_values = frequency ? Numo::SFloat.cast(frequency).to_a.flatten : frequencies
261
+ raise Muze::ParameterError, "frequency length must match spectrum bins" unless x_values.length == bins
262
+
263
+ x_values = normalize_frequency_axis(x_values)
264
+ output = Numo::SFloat.zeros(order + 1, frames)
265
+ frames.times do |frame_index|
266
+ coefficients = polynomial_coefficients(x_values, magnitude[true, frame_index].to_a, order)
267
+ coefficients.each_with_index { |value, index| output[index, frame_index] = value }
268
+ end
269
+
270
+ output
271
+ end
272
+
148
273
  # @param y [Numo::SFloat, Array<Float>]
149
274
  # @param frame_length [Integer]
150
275
  # @param hop_length [Integer]
151
276
  # @return [Numo::SFloat] shape: [1, frames]
152
- def zero_crossing_rate(y, frame_length: 2048, hop_length: 512)
153
- signal = y.is_a?(Numo::NArray) ? y.to_a : Array(y)
154
- frames = frame_signal(signal, frame_length, hop_length)
277
+ def zero_crossing_rate(y, frame_length: 2048, hop_length: 512, threshold: 0.0, center: false)
278
+ raise Muze::ParameterError, "threshold must be >= 0" if threshold.negative?
279
+
280
+ signal = mono_signal_to_a(y, "y")
281
+ signal = Array.new(frame_length / 2, 0.0) + signal + Array.new(frame_length / 2, 0.0) if center
282
+ frames = Muze::Core::Frames.slice(signal, frame_length:, hop_length:)
155
283
  values = frames.map do |frame|
156
284
  crossings = 0
157
- (1...frame.length).each { |idx| crossings += 1 if (frame[idx - 1] >= 0) != (frame[idx] >= 0) }
285
+ signs = frame.map { |value| value.abs <= threshold ? 0.0 : value }
286
+ (1...signs.length).each { |idx| crossings += 1 if (signs[idx - 1] >= 0) != (signs[idx] >= 0) }
158
287
  crossings.to_f / frame_length
159
288
  end
160
289
 
161
- Numo::SFloat[values]
290
+ Numo::SFloat[values].reshape(1, values.length)
162
291
  end
163
292
 
164
293
  # @param y [Numo::SFloat, Array<Float>, nil]
@@ -166,9 +295,10 @@ module Muze
166
295
  # @param frame_length [Integer]
167
296
  # @param hop_length [Integer]
168
297
  # @return [Numo::SFloat] shape: [1, frames]
169
- def rms(y: nil, s: nil, frame_length: 2048, hop_length: 512)
298
+ def rms(y: nil, s: nil, frame_length: 2048, hop_length: 512, center: false)
170
299
  if s
171
300
  matrix = Numo::SFloat.cast(s)
301
+ validate_finite_array!(matrix.to_a.flatten, "s")
172
302
  matrix = matrix.expand_dims(1) if matrix.ndim == 1
173
303
  _, frames = matrix.shape
174
304
  values = Array.new(frames) do |frame_index|
@@ -176,16 +306,17 @@ module Muze
176
306
  Math.sqrt((frame**2).sum / frame.size)
177
307
  end
178
308
 
179
- return Numo::SFloat[values]
309
+ return Numo::SFloat[values].reshape(1, values.length)
180
310
  end
181
311
 
182
- signal = y.is_a?(Numo::NArray) ? y.to_a : Array(y)
183
- frames = frame_signal(signal, frame_length, hop_length)
312
+ signal = mono_signal_to_a(y, "y")
313
+ signal = Array.new(frame_length / 2, 0.0) + signal + Array.new(frame_length / 2, 0.0) if center
314
+ frames = Muze::Core::Frames.slice(signal, frame_length:, hop_length:)
184
315
  values = frames.map do |frame|
185
316
  Math.sqrt(frame.sum { |value| value * value } / frame.length)
186
317
  end
187
318
 
188
- Numo::SFloat[values]
319
+ Numo::SFloat[values].reshape(1, values.length)
189
320
  end
190
321
 
191
322
  # @param y [Numo::SFloat, Array<Float>, nil]
@@ -194,12 +325,18 @@ module Muze
194
325
  # @param hop_length [Integer]
195
326
  # @param win_length [Integer]
196
327
  # @return [Numo::SFloat]
197
- def tempogram(y: nil, onset_envelope: nil, sr: 22_050, hop_length: 512, win_length: 384)
328
+ def tempogram(y: nil, onset_envelope: nil, sr: 22_050, hop_length: 512, win_length: 384, normalize: false)
329
+ raise Muze::ParameterError, "sr must be a positive integer" unless sr.is_a?(Integer) && sr.positive?
330
+ raise Muze::ParameterError, "hop_length must be a positive integer" unless hop_length.is_a?(Integer) && hop_length.positive?
331
+ raise Muze::ParameterError, "win_length must be a positive integer" unless win_length.is_a?(Integer) && win_length.positive?
332
+ raise Muze::ParameterError, "normalize must be true or false" unless [true, false].include?(normalize)
333
+
198
334
  envelope = if onset_envelope
199
335
  onset_envelope.is_a?(Numo::NArray) ? onset_envelope.to_a : Array(onset_envelope)
200
336
  else
201
337
  onset_env_from_signal(y, sr:, hop_length:)
202
338
  end
339
+ validate_finite_array!(envelope, "onset_envelope")
203
340
 
204
341
  frames = envelope.length
205
342
  tempogram = Numo::SFloat.zeros(win_length, frames)
@@ -214,41 +351,41 @@ module Muze
214
351
  (lag...segment.length).each do |offset|
215
352
  value += segment[offset] * segment[offset - lag]
216
353
  end
217
- tempogram[lag, frame_index] = value
354
+ tempogram[lag, frame_index] = normalize ? normalized_autocorrelation(segment, lag, value) : value
218
355
  end
219
356
  end
220
357
 
221
358
  tempogram
222
359
  end
223
360
 
224
- def prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:)
361
+ def prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center: true, pad_mode: :reflect, s_kind: :magnitude)
362
+ raise Muze::ParameterError, "s_kind must be :magnitude or :power" unless %i[magnitude power].include?(s_kind)
363
+
225
364
  spectrum = if s
226
- Numo::SFloat.cast(s)
365
+ provided = Numo::SFloat.cast(s)
366
+ validate_finite_array!(provided.to_a.flatten, "s")
367
+ if s_kind == :power
368
+ raise Muze::ParameterError, "power spectrogram must be non-negative" if provided.to_a.flatten.any?(&:negative?)
369
+
370
+ Numo::NMath.sqrt(provided).cast_to(Numo::SFloat)
371
+ else
372
+ provided
373
+ end
227
374
  else
228
- stft_matrix = Muze.stft(y, n_fft:, hop_length:)
375
+ stft_matrix = Muze.stft(y, n_fft:, hop_length:, center:, pad_mode:)
229
376
  magnitude, = Muze.magphase(stft_matrix)
230
377
  magnitude
231
378
  end
232
379
 
233
380
  spectrum = spectrum.expand_dims(1) if spectrum.ndim == 1
381
+ validate_finite_array!(spectrum.to_a.flatten, "spectrum")
234
382
  bins, = spectrum.shape
235
383
  fft_size = n_fft || ((bins - 1) * 2)
236
- frequencies = Array.new(bins) { |index| index * sr.to_f / fft_size }
384
+ frequencies = Muze.fft_frequencies(sr:, n_fft: fft_size).to_a[0...bins]
237
385
  [spectrum, frequencies]
238
386
  end
239
387
  private_class_method :prepare_magnitude
240
388
 
241
- def frame_signal(signal, frame_length, hop_length)
242
- return [signal + Array.new(frame_length - signal.length, 0.0)] if signal.length <= frame_length
243
-
244
- frame_count = ((signal.length - frame_length) / hop_length) + 1
245
- Array.new(frame_count) do |index|
246
- start = index * hop_length
247
- signal[start, frame_length]
248
- end
249
- end
250
- private_class_method :frame_signal
251
-
252
389
  def onset_env_from_signal(y, sr:, hop_length:)
253
390
  mel_spec = melspectrogram(y:, sr:, n_fft: 1024, hop_length:, n_mels: 40)
254
391
  _, frames = mel_spec.shape
@@ -262,5 +399,87 @@ module Muze
262
399
  onset
263
400
  end
264
401
  private_class_method :onset_env_from_signal
402
+
403
+ def normalized_autocorrelation(segment, lag, value)
404
+ left_energy = 0.0
405
+ right_energy = 0.0
406
+ (lag...segment.length).each do |offset|
407
+ left = segment[offset]
408
+ right = segment[offset - lag]
409
+ left_energy += left * left
410
+ right_energy += right * right
411
+ end
412
+ denominator = Math.sqrt(left_energy * right_energy)
413
+ denominator <= 1.0e-12 ? 0.0 : value / denominator
414
+ end
415
+ private_class_method :normalized_autocorrelation
416
+
417
+ def normalize_frequency_axis(values)
418
+ min = values.min
419
+ max = values.max
420
+ return Array.new(values.length, 0.0) if (max - min).abs <= 1.0e-12
421
+
422
+ values.map { |value| (2.0 * (value - min) / (max - min)) - 1.0 }
423
+ end
424
+ private_class_method :normalize_frequency_axis
425
+
426
+ def polynomial_coefficients(x_values, y_values, order)
427
+ size = order + 1
428
+ normal = Array.new(size) { Array.new(size, 0.0) }
429
+ rhs = Array.new(size, 0.0)
430
+
431
+ x_values.each_with_index do |x_value, index|
432
+ powers = Array.new((2 * order) + 1, 1.0)
433
+ (1...powers.length).each { |power| powers[power] = powers[power - 1] * x_value }
434
+ size.times do |row|
435
+ rhs[row] += y_values[index] * powers[row]
436
+ size.times { |col| normal[row][col] += powers[row + col] }
437
+ end
438
+ end
439
+
440
+ solve_linear_system(normal, rhs)
441
+ end
442
+ private_class_method :polynomial_coefficients
443
+
444
+ def solve_linear_system(matrix, rhs)
445
+ size = rhs.length
446
+ size.times do |pivot|
447
+ best = (pivot...size).max_by { |row| matrix[row][pivot].abs }
448
+ return Array.new(size, 0.0) if matrix[best][pivot].abs <= 1.0e-12
449
+
450
+ matrix[pivot], matrix[best] = matrix[best], matrix[pivot]
451
+ rhs[pivot], rhs[best] = rhs[best], rhs[pivot]
452
+
453
+ divisor = matrix[pivot][pivot]
454
+ pivot.upto(size - 1) { |col| matrix[pivot][col] /= divisor }
455
+ rhs[pivot] /= divisor
456
+
457
+ size.times do |row|
458
+ next if row == pivot
459
+
460
+ factor = matrix[row][pivot]
461
+ pivot.upto(size - 1) { |col| matrix[row][col] -= factor * matrix[pivot][col] }
462
+ rhs[row] -= factor * rhs[pivot]
463
+ end
464
+ end
465
+
466
+ rhs
467
+ end
468
+ private_class_method :solve_linear_system
469
+
470
+ def validate_finite_array!(values, label)
471
+ return if values.all? { |value| value.respond_to?(:finite?) && value.finite? }
472
+
473
+ raise Muze::ParameterError, "#{label} must contain only finite numeric values"
474
+ end
475
+ private_class_method :validate_finite_array!
476
+
477
+ def mono_signal_to_a(value, label)
478
+ signal = Muze::Core::Audio.validate_audio!(value, allow_empty: true)
479
+ raise Muze::ParameterError, "#{label} must be mono audio" if signal.ndim == 2
480
+
481
+ signal.to_a
482
+ end
483
+ private_class_method :mono_signal_to_a
265
484
  end
266
485
  end
@@ -3,19 +3,29 @@
3
3
  module Muze
4
4
  module Filters
5
5
  module_function
6
+ CHROMA_CACHE = Muze::Core::BoundedCache.new(max_size: 64)
6
7
 
7
8
  # @param sr [Integer]
8
9
  # @param n_fft [Integer]
9
10
  # @param n_chroma [Integer]
10
11
  # @param tuning [Float]
11
12
  # @return [Numo::SFloat] shape: [n_chroma, 1 + n_fft/2]
12
- def chroma(sr:, n_fft:, n_chroma: 12, tuning: 0.0)
13
+ def chroma(sr:, n_fft:, n_chroma: 12, tuning: 0.0, ctroct: nil, octwidth: nil)
14
+ key = [sr, n_fft, n_chroma, tuning, ctroct, octwidth]
15
+ CHROMA_CACHE.fetch(key) { build_chroma(sr:, n_fft:, n_chroma:, tuning:, ctroct:, octwidth:) }.dup
16
+ end
17
+
18
+ def build_chroma(sr:, n_fft:, n_chroma:, tuning:, ctroct:, octwidth:)
13
19
  raise Muze::ParameterError, "sr must be positive" unless sr.positive?
14
20
  raise Muze::ParameterError, "n_fft must be positive" unless n_fft.positive?
15
21
  raise Muze::ParameterError, "n_chroma must be positive" unless n_chroma.positive?
22
+ raise Muze::ParameterError, "tuning must be finite" unless tuning.respond_to?(:finite?) && tuning.finite?
23
+ raise Muze::ParameterError, "ctroct must be finite" if ctroct && !(ctroct.respond_to?(:finite?) && ctroct.finite?)
24
+ raise Muze::ParameterError, "octwidth must be positive" if octwidth && !(octwidth.respond_to?(:positive?) && octwidth.positive?)
16
25
 
17
26
  bins = (n_fft / 2) + 1
18
27
  matrix = Numo::SFloat.zeros(n_chroma, bins)
28
+ center_octave = ctroct || 5.0
19
29
 
20
30
  bins.times do |bin|
21
31
  frequency = (bin * sr.to_f) / n_fft
@@ -26,12 +36,13 @@ module Muze
26
36
 
27
37
  n_chroma.times do |chroma_index|
28
38
  distance = circular_distance(chroma_index, chroma_position, n_chroma)
29
- matrix[chroma_index, bin] = Math.exp(-(distance**2) / 2.0)
39
+ matrix[chroma_index, bin] = Math.exp(-(distance**2) / 2.0) * octave_weight(frequency, center_octave:, octwidth:)
30
40
  end
31
41
  end
32
42
 
33
43
  normalize_columns(matrix)
34
44
  end
45
+ private_class_method :build_chroma
35
46
 
36
47
  def circular_distance(a, b, modulo)
37
48
  direct = (a - b).abs
@@ -39,6 +50,14 @@ module Muze
39
50
  end
40
51
  private_class_method :circular_distance
41
52
 
53
+ def octave_weight(frequency, center_octave:, octwidth:)
54
+ return 1.0 unless octwidth
55
+
56
+ octave = Math.log2(frequency / 16.351597831287414)
57
+ Math.exp(-0.5 * (((octave - center_octave) / octwidth)**2))
58
+ end
59
+ private_class_method :octave_weight
60
+
42
61
  def normalize_columns(matrix)
43
62
  cols = matrix.shape[1]
44
63
  cols.times do |col|