muze 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +4 -0
- data/CHANGELOG.md +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +93 -0
- data/Rakefile +30 -0
- data/benchmarks/baseline.json +24 -0
- data/benchmarks/native_vs_ruby.rb +23 -0
- data/benchmarks/quality_metrics.rb +265 -0
- data/benchmarks/quality_thresholds.md +28 -0
- data/benchmarks/support/fixture_library.rb +107 -0
- data/examples/beat_tracking.rb +26 -0
- data/examples/chroma_svg.rb +33 -0
- data/examples/feature_report.rb +37 -0
- data/examples/hpss_demo.rb +46 -0
- data/examples/load_and_specshow.rb +30 -0
- data/ext/muze/extconf.rb +6 -0
- data/ext/muze/muze_ext.c +75 -0
- data/lib/muze/beat/beat_track.rb +107 -0
- data/lib/muze/core/dct.rb +63 -0
- data/lib/muze/core/resample.rb +122 -0
- data/lib/muze/core/stft.rb +231 -0
- data/lib/muze/core/windows.rb +69 -0
- data/lib/muze/display/specshow.rb +100 -0
- data/lib/muze/effects/harmonic_percussive.rb +62 -0
- data/lib/muze/effects/time_stretch.rb +171 -0
- data/lib/muze/errors.rb +18 -0
- data/lib/muze/feature/chroma.rb +68 -0
- data/lib/muze/feature/mfcc.rb +120 -0
- data/lib/muze/feature/spectral.rb +266 -0
- data/lib/muze/filters/chroma_filter.rb +54 -0
- data/lib/muze/filters/mel.rb +91 -0
- data/lib/muze/io/audio_loader/ffmpeg_backend.rb +127 -0
- data/lib/muze/io/audio_loader/wavify_backend.rb +52 -0
- data/lib/muze/io/audio_loader.rb +117 -0
- data/lib/muze/native.rb +45 -0
- data/lib/muze/onset/onset_detect.rb +97 -0
- data/lib/muze/version.rb +5 -0
- data/lib/muze.rb +251 -0
- metadata +132 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
module Feature
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
# @param y [Numo::SFloat, Array<Float>, nil]
|
|
8
|
+
# @param sr [Integer]
|
|
9
|
+
# @param s [Numo::SFloat, nil]
|
|
10
|
+
# @param n_chroma [Integer]
|
|
11
|
+
# @param n_fft [Integer]
|
|
12
|
+
# @param hop_length [Integer]
|
|
13
|
+
# @param norm [Integer, nil]
|
|
14
|
+
# @return [Numo::SFloat] shape: [n_chroma, frames]
|
|
15
|
+
def chroma_stft(y: nil, sr: 22_050, s: nil, n_chroma: 12, n_fft: 2048, hop_length: 512, norm: 2)
|
|
16
|
+
spectrum = if s
|
|
17
|
+
Numo::SFloat.cast(s)
|
|
18
|
+
else
|
|
19
|
+
stft_matrix = Muze.stft(y, n_fft:, hop_length:)
|
|
20
|
+
magnitude, = Muze.magphase(stft_matrix)
|
|
21
|
+
magnitude
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
spectrum = spectrum.expand_dims(1) if spectrum.ndim == 1
|
|
25
|
+
filter_bank = Muze::Filters.chroma(sr:, n_fft:, n_chroma:)
|
|
26
|
+
chroma = matrix_multiply(filter_bank, spectrum)
|
|
27
|
+
normalize(chroma, norm:)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def matrix_multiply(left, right)
|
|
31
|
+
left_rows, left_cols = left.shape
|
|
32
|
+
right_rows, right_cols = right.shape
|
|
33
|
+
raise Muze::ParameterError, "Matrix dimensions do not align" unless left_cols == right_rows
|
|
34
|
+
|
|
35
|
+
output = Numo::SFloat.zeros(left_rows, right_cols)
|
|
36
|
+
left_rows.times do |row|
|
|
37
|
+
right_cols.times do |col|
|
|
38
|
+
sum = 0.0
|
|
39
|
+
left_cols.times { |idx| sum += left[row, idx] * right[idx, col] }
|
|
40
|
+
output[row, col] = sum
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
output
|
|
45
|
+
end
|
|
46
|
+
private_class_method :matrix_multiply
|
|
47
|
+
|
|
48
|
+
def normalize(chroma, norm:)
|
|
49
|
+
return chroma if norm.nil?
|
|
50
|
+
|
|
51
|
+
frames = chroma.shape[1]
|
|
52
|
+
frames.times do |frame_index|
|
|
53
|
+
vector = chroma[true, frame_index]
|
|
54
|
+
denominator = if norm == 1
|
|
55
|
+
vector.abs.sum
|
|
56
|
+
else
|
|
57
|
+
Math.sqrt((vector**2).sum)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
next if denominator <= 1.0e-12
|
|
61
|
+
|
|
62
|
+
chroma[true, frame_index] = vector / denominator
|
|
63
|
+
end
|
|
64
|
+
chroma
|
|
65
|
+
end
|
|
66
|
+
private_class_method :normalize
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
# Feature extraction methods.
|
|
5
|
+
module Feature
|
|
6
|
+
module_function
|
|
7
|
+
|
|
8
|
+
# @param y [Numo::SFloat, Array<Float>, nil]
|
|
9
|
+
# @param sr [Integer]
|
|
10
|
+
# @param s [Numo::SFloat, nil]
|
|
11
|
+
# @param n_fft [Integer]
|
|
12
|
+
# @param hop_length [Integer]
|
|
13
|
+
# @param n_mels [Integer]
|
|
14
|
+
# @param fmin [Float]
|
|
15
|
+
# @param fmax [Float, nil]
|
|
16
|
+
# @return [Numo::SFloat]
|
|
17
|
+
def melspectrogram(y: nil, sr: 22_050, s: nil, n_fft: 2048, hop_length: 512, n_mels: 128, fmin: 0.0, fmax: nil)
|
|
18
|
+
power_spectrum = s ? Numo::SFloat.cast(s) : power_spectrogram(y, n_fft:, hop_length:)
|
|
19
|
+
filter_bank = Muze::Filters.mel(sr:, n_fft:, n_mels:, fmin:, fmax:)
|
|
20
|
+
matrix_multiply(filter_bank, power_spectrum)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# @param y [Numo::SFloat, Array<Float>, nil]
|
|
24
|
+
# @param sr [Integer]
|
|
25
|
+
# @param s [Numo::SFloat, nil]
|
|
26
|
+
# @param n_mfcc [Integer]
|
|
27
|
+
# @param n_fft [Integer]
|
|
28
|
+
# @param hop_length [Integer]
|
|
29
|
+
# @param n_mels [Integer]
|
|
30
|
+
# @param fmin [Float]
|
|
31
|
+
# @param fmax [Float, nil]
|
|
32
|
+
# @return [Numo::SFloat]
|
|
33
|
+
def mfcc(y: nil, sr: 22_050, s: nil, n_mfcc: 20, n_fft: 2048, hop_length: 512, n_mels: 128, fmin: 0.0, fmax: nil)
|
|
34
|
+
raise Muze::ParameterError, "n_mfcc must be positive" unless n_mfcc.positive?
|
|
35
|
+
|
|
36
|
+
mel_spec = if s
|
|
37
|
+
Numo::SFloat.cast(s)
|
|
38
|
+
else
|
|
39
|
+
melspectrogram(y:, sr:, n_fft:, hop_length:, n_mels:, fmin:, fmax:)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
log_mel = Muze.power_to_db(mel_spec)
|
|
43
|
+
dct = Muze::Core::DCT.dct(log_mel, axis: 0, norm: :ortho)
|
|
44
|
+
dct[0...n_mfcc, true].cast_to(Numo::SFloat)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# @param data [Numo::SFloat]
|
|
48
|
+
# @param order [Integer]
|
|
49
|
+
# @param width [Integer]
|
|
50
|
+
# @param mode [Symbol]
|
|
51
|
+
# @return [Numo::SFloat]
|
|
52
|
+
def delta(data, order: 1, width: 9, mode: :interp)
|
|
53
|
+
raise Muze::ParameterError, "order must be >= 1" unless order >= 1
|
|
54
|
+
raise Muze::ParameterError, "width must be odd and >= 3" unless width.odd? && width >= 3
|
|
55
|
+
raise Muze::ParameterError, "mode must be :interp" unless mode == :interp
|
|
56
|
+
|
|
57
|
+
result = Numo::SFloat.cast(data)
|
|
58
|
+
order.times { result = finite_difference(result, width) }
|
|
59
|
+
result
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def power_spectrogram(y, n_fft:, hop_length:)
|
|
63
|
+
raise Muze::ParameterError, "y must be provided when s is nil" if y.nil?
|
|
64
|
+
|
|
65
|
+
stft_matrix = Muze.stft(y, n_fft:, hop_length:)
|
|
66
|
+
magnitude, = Muze.magphase(stft_matrix)
|
|
67
|
+
(magnitude**2).cast_to(Numo::SFloat)
|
|
68
|
+
end
|
|
69
|
+
private_class_method :power_spectrogram
|
|
70
|
+
|
|
71
|
+
def finite_difference(data, width)
|
|
72
|
+
matrix = Numo::SFloat.cast(data)
|
|
73
|
+
matrix = matrix.expand_dims(1) if matrix.ndim == 1
|
|
74
|
+
|
|
75
|
+
rows, cols = matrix.shape
|
|
76
|
+
half = width / 2
|
|
77
|
+
denominator = (1..half).sum { |idx| 2.0 * (idx * idx) }
|
|
78
|
+
output = Numo::SFloat.zeros(rows, cols)
|
|
79
|
+
|
|
80
|
+
rows.times do |row|
|
|
81
|
+
cols.times do |col|
|
|
82
|
+
numerator = 0.0
|
|
83
|
+
(1..half).each do |offset|
|
|
84
|
+
left = [[col - offset, 0].max, cols - 1].min
|
|
85
|
+
right = [[col + offset, 0].max, cols - 1].min
|
|
86
|
+
numerator += offset * (matrix[row, right] - matrix[row, left])
|
|
87
|
+
end
|
|
88
|
+
output[row, col] = numerator / denominator
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
data.ndim == 1 ? output[true, 0] : output
|
|
93
|
+
end
|
|
94
|
+
private_class_method :finite_difference
|
|
95
|
+
|
|
96
|
+
def matrix_multiply(left, right)
|
|
97
|
+
left_matrix = Numo::SFloat.cast(left)
|
|
98
|
+
right_matrix = Numo::SFloat.cast(right)
|
|
99
|
+
left_matrix = left_matrix.expand_dims(1) if left_matrix.ndim == 1
|
|
100
|
+
right_matrix = right_matrix.expand_dims(1) if right_matrix.ndim == 1
|
|
101
|
+
|
|
102
|
+
left_rows, left_cols = left_matrix.shape
|
|
103
|
+
right_rows, right_cols = right_matrix.shape
|
|
104
|
+
raise Muze::ParameterError, "Matrix dimensions do not align" unless left_cols == right_rows
|
|
105
|
+
|
|
106
|
+
output = Numo::SFloat.zeros(left_rows, right_cols)
|
|
107
|
+
|
|
108
|
+
left_rows.times do |row|
|
|
109
|
+
right_cols.times do |col|
|
|
110
|
+
sum = 0.0
|
|
111
|
+
left_cols.times { |idx| sum += left_matrix[row, idx] * right_matrix[idx, col] }
|
|
112
|
+
output[row, col] = sum
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
output
|
|
117
|
+
end
|
|
118
|
+
private_class_method :matrix_multiply
|
|
119
|
+
end
|
|
120
|
+
end
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
module Feature
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
# @param y [Numo::SFloat, Array<Float>, nil]
|
|
8
|
+
# @param s [Numo::SFloat, nil]
|
|
9
|
+
# @param sr [Integer]
|
|
10
|
+
# @param n_fft [Integer]
|
|
11
|
+
# @param hop_length [Integer]
|
|
12
|
+
# @return [Numo::SFloat] shape: [1, frames]
|
|
13
|
+
def spectral_centroid(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512)
|
|
14
|
+
magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:)
|
|
15
|
+
_, frames = magnitude.shape
|
|
16
|
+
output = Numo::SFloat.zeros(1, frames)
|
|
17
|
+
|
|
18
|
+
frames.times do |frame_index|
|
|
19
|
+
spectrum = magnitude[true, frame_index]
|
|
20
|
+
denominator = spectrum.sum
|
|
21
|
+
if denominator <= 0.0
|
|
22
|
+
output[0, frame_index] = 0.0
|
|
23
|
+
next
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
numerator = 0.0
|
|
27
|
+
frequencies.length.times { |bin| numerator += frequencies[bin] * spectrum[bin] }
|
|
28
|
+
output[0, frame_index] = numerator / denominator
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
output
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# @param y [Numo::SFloat, Array<Float>, nil]
|
|
35
|
+
# @param s [Numo::SFloat, nil]
|
|
36
|
+
# @param sr [Integer]
|
|
37
|
+
# @param n_fft [Integer]
|
|
38
|
+
# @param hop_length [Integer]
|
|
39
|
+
# @param p [Integer]
|
|
40
|
+
# @return [Numo::SFloat] shape: [1, frames]
|
|
41
|
+
def spectral_bandwidth(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, p: 2)
|
|
42
|
+
magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:)
|
|
43
|
+
centroids = spectral_centroid(y:, s: magnitude, sr:, n_fft:, hop_length:)
|
|
44
|
+
_, frames = magnitude.shape
|
|
45
|
+
output = Numo::SFloat.zeros(1, frames)
|
|
46
|
+
|
|
47
|
+
frames.times do |frame_index|
|
|
48
|
+
spectrum = magnitude[true, frame_index]
|
|
49
|
+
denominator = spectrum.sum
|
|
50
|
+
if denominator <= 0.0
|
|
51
|
+
output[0, frame_index] = 0.0
|
|
52
|
+
next
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
sum = 0.0
|
|
56
|
+
frequencies.length.times do |bin|
|
|
57
|
+
distance = (frequencies[bin] - centroids[0, frame_index]).abs
|
|
58
|
+
sum += spectrum[bin] * (distance**p)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
output[0, frame_index] = (sum / denominator)**(1.0 / p)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
output
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# @param y [Numo::SFloat, Array<Float>, nil]
|
|
68
|
+
# @param s [Numo::SFloat, nil]
|
|
69
|
+
# @param sr [Integer]
|
|
70
|
+
# @param n_fft [Integer]
|
|
71
|
+
# @param hop_length [Integer]
|
|
72
|
+
# @param roll_percent [Float]
|
|
73
|
+
# @return [Numo::SFloat] shape: [1, frames]
|
|
74
|
+
def spectral_rolloff(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, roll_percent: 0.85)
|
|
75
|
+
magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:)
|
|
76
|
+
_, frames = magnitude.shape
|
|
77
|
+
output = Numo::SFloat.zeros(1, frames)
|
|
78
|
+
|
|
79
|
+
frames.times do |frame_index|
|
|
80
|
+
spectrum = magnitude[true, frame_index]
|
|
81
|
+
threshold = spectrum.sum * roll_percent
|
|
82
|
+
cumulative = 0.0
|
|
83
|
+
rolloff_frequency = frequencies.last
|
|
84
|
+
|
|
85
|
+
frequencies.length.times do |bin|
|
|
86
|
+
cumulative += spectrum[bin]
|
|
87
|
+
if cumulative >= threshold
|
|
88
|
+
rolloff_frequency = frequencies[bin]
|
|
89
|
+
break
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
output[0, frame_index] = rolloff_frequency
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
output
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# @param y [Numo::SFloat, Array<Float>, nil]
|
|
100
|
+
# @param s [Numo::SFloat, nil]
|
|
101
|
+
# @param amin [Float]
|
|
102
|
+
# @return [Numo::SFloat] shape: [1, frames]
|
|
103
|
+
def spectral_flatness(y: nil, s: nil, n_fft: 2048, hop_length: 512, amin: 1.0e-10)
|
|
104
|
+
magnitude, = prepare_magnitude(y:, s:, sr: 22_050, n_fft:, hop_length:)
|
|
105
|
+
_, frames = magnitude.shape
|
|
106
|
+
output = Numo::SFloat.zeros(1, frames)
|
|
107
|
+
|
|
108
|
+
frames.times do |frame_index|
|
|
109
|
+
spectrum = magnitude[true, frame_index].to_a.map { |value| [value, amin].max }
|
|
110
|
+
geometric = Math.exp(spectrum.sum { |value| Math.log(value) } / spectrum.length)
|
|
111
|
+
arithmetic = spectrum.sum(0.0) / spectrum.length
|
|
112
|
+
output[0, frame_index] = geometric / arithmetic
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
output
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# @param y [Numo::SFloat, Array<Float>, nil]
|
|
119
|
+
# @param s [Numo::SFloat, nil]
|
|
120
|
+
# @param n_bands [Integer]
|
|
121
|
+
# @param quantile [Float]
|
|
122
|
+
# @return [Numo::SFloat] shape: [n_bands + 1, frames]
|
|
123
|
+
def spectral_contrast(y: nil, s: nil, n_fft: 2048, hop_length: 512, n_bands: 6, quantile: 0.02)
|
|
124
|
+
magnitude, = prepare_magnitude(y:, s:, sr: 22_050, n_fft:, hop_length:)
|
|
125
|
+
bins, frames = magnitude.shape
|
|
126
|
+
edges = Array.new(n_bands + 2) { |idx| ((bins - 1) * idx / (n_bands + 1).to_f).round }
|
|
127
|
+
output = Numo::SFloat.zeros(n_bands + 1, frames)
|
|
128
|
+
|
|
129
|
+
(n_bands + 1).times do |band|
|
|
130
|
+
lower = edges[band]
|
|
131
|
+
upper = [edges[band + 1], lower + 1].max
|
|
132
|
+
|
|
133
|
+
frames.times do |frame_index|
|
|
134
|
+
segment = magnitude[lower...upper, frame_index].to_a.sort
|
|
135
|
+
next if segment.empty?
|
|
136
|
+
|
|
137
|
+
low_idx = [(segment.length * quantile).floor, segment.length - 1].min
|
|
138
|
+
high_idx = [(segment.length * (1.0 - quantile)).floor, segment.length - 1].min
|
|
139
|
+
valley = [segment[low_idx], 1.0e-10].max
|
|
140
|
+
peak = [segment[high_idx], 1.0e-10].max
|
|
141
|
+
output[band, frame_index] = 10.0 * Math.log10(peak / valley)
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
output
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# @param y [Numo::SFloat, Array<Float>]
|
|
149
|
+
# @param frame_length [Integer]
|
|
150
|
+
# @param hop_length [Integer]
|
|
151
|
+
# @return [Numo::SFloat] shape: [1, frames]
|
|
152
|
+
def zero_crossing_rate(y, frame_length: 2048, hop_length: 512)
|
|
153
|
+
signal = y.is_a?(Numo::NArray) ? y.to_a : Array(y)
|
|
154
|
+
frames = frame_signal(signal, frame_length, hop_length)
|
|
155
|
+
values = frames.map do |frame|
|
|
156
|
+
crossings = 0
|
|
157
|
+
(1...frame.length).each { |idx| crossings += 1 if (frame[idx - 1] >= 0) != (frame[idx] >= 0) }
|
|
158
|
+
crossings.to_f / frame_length
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
Numo::SFloat[values]
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# @param y [Numo::SFloat, Array<Float>, nil]
|
|
165
|
+
# @param s [Numo::SFloat, nil]
|
|
166
|
+
# @param frame_length [Integer]
|
|
167
|
+
# @param hop_length [Integer]
|
|
168
|
+
# @return [Numo::SFloat] shape: [1, frames]
|
|
169
|
+
def rms(y: nil, s: nil, frame_length: 2048, hop_length: 512)
|
|
170
|
+
if s
|
|
171
|
+
matrix = Numo::SFloat.cast(s)
|
|
172
|
+
matrix = matrix.expand_dims(1) if matrix.ndim == 1
|
|
173
|
+
_, frames = matrix.shape
|
|
174
|
+
values = Array.new(frames) do |frame_index|
|
|
175
|
+
frame = matrix[true, frame_index]
|
|
176
|
+
Math.sqrt((frame**2).sum / frame.size)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
return Numo::SFloat[values]
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
signal = y.is_a?(Numo::NArray) ? y.to_a : Array(y)
|
|
183
|
+
frames = frame_signal(signal, frame_length, hop_length)
|
|
184
|
+
values = frames.map do |frame|
|
|
185
|
+
Math.sqrt(frame.sum { |value| value * value } / frame.length)
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
Numo::SFloat[values]
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# @param y [Numo::SFloat, Array<Float>, nil]
|
|
192
|
+
# @param onset_envelope [Numo::SFloat, Array<Float>, nil]
|
|
193
|
+
# @param sr [Integer]
|
|
194
|
+
# @param hop_length [Integer]
|
|
195
|
+
# @param win_length [Integer]
|
|
196
|
+
# @return [Numo::SFloat]
|
|
197
|
+
def tempogram(y: nil, onset_envelope: nil, sr: 22_050, hop_length: 512, win_length: 384)
|
|
198
|
+
envelope = if onset_envelope
|
|
199
|
+
onset_envelope.is_a?(Numo::NArray) ? onset_envelope.to_a : Array(onset_envelope)
|
|
200
|
+
else
|
|
201
|
+
onset_env_from_signal(y, sr:, hop_length:)
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
frames = envelope.length
|
|
205
|
+
tempogram = Numo::SFloat.zeros(win_length, frames)
|
|
206
|
+
|
|
207
|
+
frames.times do |frame_index|
|
|
208
|
+
window_start = [0, frame_index - win_length + 1].max
|
|
209
|
+
segment = envelope[window_start..frame_index]
|
|
210
|
+
win_length.times do |lag|
|
|
211
|
+
break if lag >= segment.length
|
|
212
|
+
|
|
213
|
+
value = 0.0
|
|
214
|
+
(lag...segment.length).each do |offset|
|
|
215
|
+
value += segment[offset] * segment[offset - lag]
|
|
216
|
+
end
|
|
217
|
+
tempogram[lag, frame_index] = value
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
tempogram
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:)
|
|
225
|
+
spectrum = if s
|
|
226
|
+
Numo::SFloat.cast(s)
|
|
227
|
+
else
|
|
228
|
+
stft_matrix = Muze.stft(y, n_fft:, hop_length:)
|
|
229
|
+
magnitude, = Muze.magphase(stft_matrix)
|
|
230
|
+
magnitude
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
spectrum = spectrum.expand_dims(1) if spectrum.ndim == 1
|
|
234
|
+
bins, = spectrum.shape
|
|
235
|
+
fft_size = n_fft || ((bins - 1) * 2)
|
|
236
|
+
frequencies = Array.new(bins) { |index| index * sr.to_f / fft_size }
|
|
237
|
+
[spectrum, frequencies]
|
|
238
|
+
end
|
|
239
|
+
private_class_method :prepare_magnitude
|
|
240
|
+
|
|
241
|
+
def frame_signal(signal, frame_length, hop_length)
|
|
242
|
+
return [signal + Array.new(frame_length - signal.length, 0.0)] if signal.length <= frame_length
|
|
243
|
+
|
|
244
|
+
frame_count = ((signal.length - frame_length) / hop_length) + 1
|
|
245
|
+
Array.new(frame_count) do |index|
|
|
246
|
+
start = index * hop_length
|
|
247
|
+
signal[start, frame_length]
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
private_class_method :frame_signal
|
|
251
|
+
|
|
252
|
+
def onset_env_from_signal(y, sr:, hop_length:)
|
|
253
|
+
mel_spec = melspectrogram(y:, sr:, n_fft: 1024, hop_length:, n_mels: 40)
|
|
254
|
+
_, frames = mel_spec.shape
|
|
255
|
+
onset = Array.new(frames, 0.0)
|
|
256
|
+
frames.times do |frame_index|
|
|
257
|
+
next if frame_index.zero?
|
|
258
|
+
|
|
259
|
+
diff = mel_spec[true, frame_index] - mel_spec[true, frame_index - 1]
|
|
260
|
+
onset[frame_index] = diff.clip(0.0, Float::INFINITY).sum
|
|
261
|
+
end
|
|
262
|
+
onset
|
|
263
|
+
end
|
|
264
|
+
private_class_method :onset_env_from_signal
|
|
265
|
+
end
|
|
266
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
module Filters
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
# @param sr [Integer]
|
|
8
|
+
# @param n_fft [Integer]
|
|
9
|
+
# @param n_chroma [Integer]
|
|
10
|
+
# @param tuning [Float]
|
|
11
|
+
# @return [Numo::SFloat] shape: [n_chroma, 1 + n_fft/2]
|
|
12
|
+
def chroma(sr:, n_fft:, n_chroma: 12, tuning: 0.0)
|
|
13
|
+
raise Muze::ParameterError, "sr must be positive" unless sr.positive?
|
|
14
|
+
raise Muze::ParameterError, "n_fft must be positive" unless n_fft.positive?
|
|
15
|
+
raise Muze::ParameterError, "n_chroma must be positive" unless n_chroma.positive?
|
|
16
|
+
|
|
17
|
+
bins = (n_fft / 2) + 1
|
|
18
|
+
matrix = Numo::SFloat.zeros(n_chroma, bins)
|
|
19
|
+
|
|
20
|
+
bins.times do |bin|
|
|
21
|
+
frequency = (bin * sr.to_f) / n_fft
|
|
22
|
+
next if frequency <= 0.0
|
|
23
|
+
|
|
24
|
+
midi = 69.0 + (12.0 * Math.log2(frequency / 440.0)) - tuning
|
|
25
|
+
chroma_position = midi % n_chroma
|
|
26
|
+
|
|
27
|
+
n_chroma.times do |chroma_index|
|
|
28
|
+
distance = circular_distance(chroma_index, chroma_position, n_chroma)
|
|
29
|
+
matrix[chroma_index, bin] = Math.exp(-(distance**2) / 2.0)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
normalize_columns(matrix)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def circular_distance(a, b, modulo)
|
|
37
|
+
direct = (a - b).abs
|
|
38
|
+
[direct, modulo - direct].min
|
|
39
|
+
end
|
|
40
|
+
private_class_method :circular_distance
|
|
41
|
+
|
|
42
|
+
def normalize_columns(matrix)
|
|
43
|
+
cols = matrix.shape[1]
|
|
44
|
+
cols.times do |col|
|
|
45
|
+
sum = matrix[true, col].sum
|
|
46
|
+
next if sum <= 0.0
|
|
47
|
+
|
|
48
|
+
matrix[true, col] = matrix[true, col] / sum
|
|
49
|
+
end
|
|
50
|
+
matrix
|
|
51
|
+
end
|
|
52
|
+
private_class_method :normalize_columns
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
# Filterbank generation utilities.
|
|
5
|
+
module Filters
|
|
6
|
+
module_function
|
|
7
|
+
|
|
8
|
+
# @param sr [Integer]
|
|
9
|
+
# @param n_fft [Integer]
|
|
10
|
+
# @param n_mels [Integer]
|
|
11
|
+
# @param fmin [Float]
|
|
12
|
+
# @param fmax [Float, nil]
|
|
13
|
+
# @param htk [Boolean]
|
|
14
|
+
# @return [Numo::SFloat] shape: [n_mels, 1 + n_fft/2]
|
|
15
|
+
def mel(sr: 22_050, n_fft: 2048, n_mels: 128, fmin: 0.0, fmax: nil, htk: false)
|
|
16
|
+
raise Muze::ParameterError, "sr must be positive" unless sr.positive?
|
|
17
|
+
raise Muze::ParameterError, "n_fft must be positive" unless n_fft.positive?
|
|
18
|
+
raise Muze::ParameterError, "n_mels must be positive" unless n_mels.positive?
|
|
19
|
+
|
|
20
|
+
fmax ||= sr / 2.0
|
|
21
|
+
mel_min = hz_to_mel(fmin, htk:)
|
|
22
|
+
mel_max = hz_to_mel(fmax, htk:)
|
|
23
|
+
|
|
24
|
+
mel_points = Array.new(n_mels + 2) do |idx|
|
|
25
|
+
mel_min + ((mel_max - mel_min) * idx / (n_mels + 1).to_f)
|
|
26
|
+
end
|
|
27
|
+
hz_points = mel_points.map { |mel_value| mel_to_hz(mel_value, htk:) }
|
|
28
|
+
fft_bins = hz_points.map { |hz| ((n_fft + 1) * hz / sr).floor }
|
|
29
|
+
|
|
30
|
+
matrix = Numo::SFloat.zeros(n_mels, (n_fft / 2) + 1)
|
|
31
|
+
|
|
32
|
+
n_mels.times do |mel_index|
|
|
33
|
+
left = fft_bins[mel_index]
|
|
34
|
+
center = fft_bins[mel_index + 1]
|
|
35
|
+
right = fft_bins[mel_index + 2]
|
|
36
|
+
|
|
37
|
+
next if center <= left || right <= center
|
|
38
|
+
|
|
39
|
+
(left...center).each do |bin|
|
|
40
|
+
next unless bin.between?(0, (n_fft / 2))
|
|
41
|
+
|
|
42
|
+
matrix[mel_index, bin] = (bin - left).to_f / (center - left)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
(center...right).each do |bin|
|
|
46
|
+
next unless bin.between?(0, (n_fft / 2))
|
|
47
|
+
|
|
48
|
+
matrix[mel_index, bin] = (right - bin).to_f / (right - center)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
matrix
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# @param hz [Float]
|
|
56
|
+
# @param htk [Boolean]
|
|
57
|
+
# @return [Float]
|
|
58
|
+
def hz_to_mel(hz, htk: false)
|
|
59
|
+
return 2595.0 * Math.log10(1.0 + (hz / 700.0)) if htk
|
|
60
|
+
|
|
61
|
+
f_sp = 200.0 / 3.0
|
|
62
|
+
min_log_hz = 1000.0
|
|
63
|
+
min_log_mel = min_log_hz / f_sp
|
|
64
|
+
log_step = Math.log(6.4) / 27.0
|
|
65
|
+
|
|
66
|
+
if hz < min_log_hz
|
|
67
|
+
hz / f_sp
|
|
68
|
+
else
|
|
69
|
+
min_log_mel + (Math.log(hz / min_log_hz) / log_step)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# @param mel_value [Float]
|
|
74
|
+
# @param htk [Boolean]
|
|
75
|
+
# @return [Float]
|
|
76
|
+
def mel_to_hz(mel_value, htk: false)
|
|
77
|
+
return 700.0 * ((10.0**(mel_value / 2595.0)) - 1.0) if htk
|
|
78
|
+
|
|
79
|
+
f_sp = 200.0 / 3.0
|
|
80
|
+
min_log_hz = 1000.0
|
|
81
|
+
min_log_mel = min_log_hz / f_sp
|
|
82
|
+
log_step = Math.log(6.4) / 27.0
|
|
83
|
+
|
|
84
|
+
if mel_value < min_log_mel
|
|
85
|
+
mel_value * f_sp
|
|
86
|
+
else
|
|
87
|
+
min_log_hz * Math.exp(log_step * (mel_value - min_log_mel))
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|