muze 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +4 -0
- data/CHANGELOG.md +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +93 -0
- data/Rakefile +30 -0
- data/benchmarks/baseline.json +24 -0
- data/benchmarks/native_vs_ruby.rb +23 -0
- data/benchmarks/quality_metrics.rb +265 -0
- data/benchmarks/quality_thresholds.md +28 -0
- data/benchmarks/support/fixture_library.rb +107 -0
- data/examples/beat_tracking.rb +26 -0
- data/examples/chroma_svg.rb +33 -0
- data/examples/feature_report.rb +37 -0
- data/examples/hpss_demo.rb +46 -0
- data/examples/load_and_specshow.rb +30 -0
- data/ext/muze/extconf.rb +6 -0
- data/ext/muze/muze_ext.c +75 -0
- data/lib/muze/beat/beat_track.rb +107 -0
- data/lib/muze/core/dct.rb +63 -0
- data/lib/muze/core/resample.rb +122 -0
- data/lib/muze/core/stft.rb +231 -0
- data/lib/muze/core/windows.rb +69 -0
- data/lib/muze/display/specshow.rb +100 -0
- data/lib/muze/effects/harmonic_percussive.rb +62 -0
- data/lib/muze/effects/time_stretch.rb +171 -0
- data/lib/muze/errors.rb +18 -0
- data/lib/muze/feature/chroma.rb +68 -0
- data/lib/muze/feature/mfcc.rb +120 -0
- data/lib/muze/feature/spectral.rb +266 -0
- data/lib/muze/filters/chroma_filter.rb +54 -0
- data/lib/muze/filters/mel.rb +91 -0
- data/lib/muze/io/audio_loader/ffmpeg_backend.rb +127 -0
- data/lib/muze/io/audio_loader/wavify_backend.rb +52 -0
- data/lib/muze/io/audio_loader.rb +117 -0
- data/lib/muze/native.rb +45 -0
- data/lib/muze/onset/onset_detect.rb +97 -0
- data/lib/muze/version.rb +5 -0
- data/lib/muze.rb +251 -0
- metadata +132 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
module Core
|
|
5
|
+
# Short-time Fourier transform and related utilities.
|
|
6
|
+
module STFT
|
|
7
|
+
EPSILON = 1.0e-12
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
# @param y [Numo::SFloat, Array<Float>] waveform signal
|
|
11
|
+
# @param n_fft [Integer]
|
|
12
|
+
# @param hop_length [Integer]
|
|
13
|
+
# @param win_length [Integer, nil]
|
|
14
|
+
# @param window [Symbol]
|
|
15
|
+
# @param center [Boolean]
|
|
16
|
+
# @param pad_mode [Symbol]
|
|
17
|
+
# @return [Numo::DComplex] shape: [1 + n_fft/2, frames]
|
|
18
|
+
def stft(y, n_fft: 2048, hop_length: 512, win_length: nil, window: :hann, center: true, pad_mode: :reflect)
|
|
19
|
+
_ = pad_mode
|
|
20
|
+
win_length ||= n_fft
|
|
21
|
+
validate_stft_params!(n_fft:, hop_length:, win_length:)
|
|
22
|
+
|
|
23
|
+
signal = y.is_a?(Numo::NArray) ? y.to_a : Array(y)
|
|
24
|
+
signal = reflect_pad(signal, n_fft / 2) if center
|
|
25
|
+
signal = signal.empty? ? [0.0] : signal
|
|
26
|
+
|
|
27
|
+
frames = frame_signal(signal, n_fft, hop_length)
|
|
28
|
+
window_values = Muze::Core::Windows.resolve(window, win_length).to_a
|
|
29
|
+
window_offset = (n_fft - win_length) / 2
|
|
30
|
+
|
|
31
|
+
frequency_bins = (n_fft / 2) + 1
|
|
32
|
+
stft_matrix = Numo::DComplex.zeros(frequency_bins, frames.length)
|
|
33
|
+
|
|
34
|
+
frames.each_with_index do |frame, frame_index|
|
|
35
|
+
windowed = Array.new(n_fft, 0.0)
|
|
36
|
+
win_length.times do |index|
|
|
37
|
+
frame_index_in_window = index + window_offset
|
|
38
|
+
windowed[frame_index_in_window] = frame[frame_index_in_window] * window_values[index]
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
spectrum = fft_complex(windowed.map { |value| Complex(value, 0.0) })
|
|
42
|
+
frequency_bins.times { |bin| stft_matrix[bin, frame_index] = spectrum[bin] }
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
stft_matrix
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# @param stft_matrix [Numo::DComplex]
|
|
49
|
+
# @param hop_length [Integer]
|
|
50
|
+
# @param win_length [Integer, nil]
|
|
51
|
+
# @param window [Symbol]
|
|
52
|
+
# @param center [Boolean]
|
|
53
|
+
# @param length [Integer, nil]
|
|
54
|
+
# @return [Numo::SFloat]
|
|
55
|
+
def istft(stft_matrix, hop_length: 512, win_length: nil, window: :hann, center: true, length: nil)
|
|
56
|
+
frequency_bins, frame_count = stft_matrix.shape
|
|
57
|
+
n_fft = (frequency_bins - 1) * 2
|
|
58
|
+
win_length ||= n_fft
|
|
59
|
+
validate_stft_params!(n_fft:, hop_length:, win_length:)
|
|
60
|
+
|
|
61
|
+
signal_length = n_fft + (hop_length * [frame_count - 1, 0].max)
|
|
62
|
+
output = Array.new(signal_length, 0.0)
|
|
63
|
+
window_sums = Array.new(signal_length, 0.0)
|
|
64
|
+
window_values = Muze::Core::Windows.resolve(window, win_length).to_a
|
|
65
|
+
window_offset = (n_fft - win_length) / 2
|
|
66
|
+
|
|
67
|
+
frame_count.times do |frame_index|
|
|
68
|
+
half_spectrum = Array.new(frequency_bins) { |bin| stft_matrix[bin, frame_index] }
|
|
69
|
+
mirrored = half_spectrum[1...-1].reverse.map(&:conj)
|
|
70
|
+
full_spectrum = half_spectrum + mirrored
|
|
71
|
+
time_domain = ifft_complex(full_spectrum).map(&:real)
|
|
72
|
+
|
|
73
|
+
win_length.times do |index|
|
|
74
|
+
output_index = (frame_index * hop_length) + index + window_offset
|
|
75
|
+
break if output_index >= signal_length
|
|
76
|
+
|
|
77
|
+
window_value = window_values[index]
|
|
78
|
+
frame_value = time_domain[index + window_offset]
|
|
79
|
+
output[output_index] += frame_value * window_value
|
|
80
|
+
window_sums[output_index] += window_value * window_value
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
output.map!.with_index do |value, index|
|
|
85
|
+
denominator = window_sums[index]
|
|
86
|
+
denominator > EPSILON ? (value / denominator) : value
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
if center
|
|
90
|
+
pad = n_fft / 2
|
|
91
|
+
output = output[pad...(output.length - pad)] || []
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
output = adjust_length(output, length) if length
|
|
95
|
+
Numo::SFloat.cast(output)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# @param stft_matrix [Numo::DComplex]
|
|
99
|
+
# @return [Array<Numo::SFloat, Numo::DComplex>]
|
|
100
|
+
def magphase(stft_matrix)
|
|
101
|
+
magnitude = stft_matrix.abs.cast_to(Numo::SFloat)
|
|
102
|
+
phase = stft_matrix / (magnitude + EPSILON)
|
|
103
|
+
[magnitude, phase]
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# @param s [Numo::NArray]
|
|
107
|
+
# @param ref [Float, Symbol, Proc]
|
|
108
|
+
# @param amin [Float]
|
|
109
|
+
# @param top_db [Float, nil]
|
|
110
|
+
# @return [Numo::SFloat]
|
|
111
|
+
def amplitude_to_db(s, ref: 1.0, amin: 1.0e-5, top_db: 80.0)
|
|
112
|
+
magnitude = s.is_a?(Numo::DComplex) ? s.abs.cast_to(Numo::SFloat) : Numo::SFloat.cast(s)
|
|
113
|
+
log_scale(magnitude, ref:, amin:, top_db:, multiplier: 20.0)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# @param s [Numo::NArray]
|
|
117
|
+
# @param ref [Float, Symbol, Proc]
|
|
118
|
+
# @param amin [Float]
|
|
119
|
+
# @param top_db [Float, nil]
|
|
120
|
+
# @return [Numo::SFloat]
|
|
121
|
+
def power_to_db(s, ref: 1.0, amin: 1.0e-10, top_db: 80.0)
|
|
122
|
+
power = Numo::SFloat.cast(s)
|
|
123
|
+
log_scale(power, ref:, amin:, top_db:, multiplier: 10.0)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# @param s_db [Numo::NArray]
|
|
127
|
+
# @param ref [Float]
|
|
128
|
+
# @return [Numo::SFloat]
|
|
129
|
+
def db_to_amplitude(s_db, ref: 1.0)
|
|
130
|
+
Numo::SFloat.cast(ref.to_f * Numo::NMath.exp((Numo::SFloat.cast(s_db) / 20.0) * Math.log(10.0)))
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# @param s_db [Numo::NArray]
|
|
134
|
+
# @param ref [Float]
|
|
135
|
+
# @return [Numo::SFloat]
|
|
136
|
+
def db_to_power(s_db, ref: 1.0)
|
|
137
|
+
Numo::SFloat.cast(ref.to_f * Numo::NMath.exp((Numo::SFloat.cast(s_db) / 10.0) * Math.log(10.0)))
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def adjust_length(signal, length)
|
|
141
|
+
return signal[0, length] if signal.length >= length
|
|
142
|
+
|
|
143
|
+
signal + Array.new(length - signal.length, 0.0)
|
|
144
|
+
end
|
|
145
|
+
private_class_method :adjust_length
|
|
146
|
+
|
|
147
|
+
def log_scale(values, ref:, amin:, top_db:, multiplier:)
|
|
148
|
+
clipped = values.clip(amin, Float::INFINITY)
|
|
149
|
+
reference = reference_value(ref, clipped, amin)
|
|
150
|
+
base = multiplier * Math.log10(reference)
|
|
151
|
+
db = (multiplier * Numo::NMath.log10(clipped)) - base
|
|
152
|
+
|
|
153
|
+
return db.cast_to(Numo::SFloat) if top_db.nil?
|
|
154
|
+
|
|
155
|
+
floor = db.max - top_db
|
|
156
|
+
db.clip(floor, Float::INFINITY).cast_to(Numo::SFloat)
|
|
157
|
+
end
|
|
158
|
+
private_class_method :log_scale
|
|
159
|
+
|
|
160
|
+
def reference_value(ref, values, amin)
|
|
161
|
+
value = case ref
|
|
162
|
+
when :max then values.max
|
|
163
|
+
when Proc then ref.call(values)
|
|
164
|
+
else
|
|
165
|
+
ref.to_f
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
[value, amin].max
|
|
169
|
+
end
|
|
170
|
+
private_class_method :reference_value
|
|
171
|
+
|
|
172
|
+
def validate_stft_params!(n_fft:, hop_length:, win_length:)
|
|
173
|
+
raise Muze::ParameterError, "n_fft must be positive" if n_fft <= 0
|
|
174
|
+
raise Muze::ParameterError, "n_fft must be a power of two" unless power_of_two?(n_fft)
|
|
175
|
+
raise Muze::ParameterError, "hop_length must be positive" if hop_length <= 0
|
|
176
|
+
raise Muze::ParameterError, "hop_length must be <= n_fft" if hop_length > n_fft
|
|
177
|
+
raise Muze::ParameterError, "win_length must be between 1 and n_fft" unless win_length.between?(1, n_fft)
|
|
178
|
+
end
|
|
179
|
+
private_class_method :validate_stft_params!
|
|
180
|
+
|
|
181
|
+
def power_of_two?(value)
|
|
182
|
+
(value & (value - 1)).zero?
|
|
183
|
+
end
|
|
184
|
+
private_class_method :power_of_two?
|
|
185
|
+
|
|
186
|
+
def frame_signal(signal, n_fft, hop_length)
|
|
187
|
+
Muze::Native.frame_slices(signal, n_fft, hop_length)
|
|
188
|
+
end
|
|
189
|
+
private_class_method :frame_signal
|
|
190
|
+
|
|
191
|
+
def reflect_pad(signal, pad)
|
|
192
|
+
return signal if pad <= 0 || signal.length <= 1
|
|
193
|
+
|
|
194
|
+
front = signal[1, pad].to_a.reverse
|
|
195
|
+
back = signal[-(pad + 1), pad].to_a.reverse
|
|
196
|
+
front + signal + back
|
|
197
|
+
end
|
|
198
|
+
private_class_method :reflect_pad
|
|
199
|
+
|
|
200
|
+
def fft_complex(values)
|
|
201
|
+
length = values.length
|
|
202
|
+
return values if length <= 1
|
|
203
|
+
|
|
204
|
+
raise Muze::ParameterError, "FFT length must be a power of two" unless power_of_two?(length)
|
|
205
|
+
|
|
206
|
+
even = fft_complex(values.values_at(*0.step(length - 1, 2)))
|
|
207
|
+
odd = fft_complex(values.values_at(*1.step(length - 1, 2)))
|
|
208
|
+
|
|
209
|
+
output = Array.new(length)
|
|
210
|
+
half = length / 2
|
|
211
|
+
|
|
212
|
+
half.times do |k|
|
|
213
|
+
twiddle = Complex.polar(1.0, -2.0 * Math::PI * k / length) * odd[k]
|
|
214
|
+
output[k] = even[k] + twiddle
|
|
215
|
+
output[k + half] = even[k] - twiddle
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
output
|
|
219
|
+
end
|
|
220
|
+
private_class_method :fft_complex
|
|
221
|
+
|
|
222
|
+
def ifft_complex(values)
|
|
223
|
+
conjugated = values.map(&:conj)
|
|
224
|
+
transformed = fft_complex(conjugated)
|
|
225
|
+
scale = values.length.to_f
|
|
226
|
+
transformed.map { |value| value.conj / scale }
|
|
227
|
+
end
|
|
228
|
+
private_class_method :ifft_complex
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
module Core
|
|
5
|
+
# Window function generators for short-time analysis.
|
|
6
|
+
module Windows
|
|
7
|
+
module_function
|
|
8
|
+
|
|
9
|
+
# @param n [Integer]
|
|
10
|
+
# @return [Numo::SFloat]
|
|
11
|
+
def hann(n)
|
|
12
|
+
raise Muze::ParameterError, "window length must be positive" if n <= 0
|
|
13
|
+
return Numo::SFloat[1.0] if n == 1
|
|
14
|
+
|
|
15
|
+
build_window(n) { |k, denom| 0.5 * (1.0 - Math.cos((2.0 * Math::PI * k) / denom)) }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# @param n [Integer]
|
|
19
|
+
# @return [Numo::SFloat]
|
|
20
|
+
def hamming(n)
|
|
21
|
+
raise Muze::ParameterError, "window length must be positive" if n <= 0
|
|
22
|
+
return Numo::SFloat[1.0] if n == 1
|
|
23
|
+
|
|
24
|
+
build_window(n) { |k, denom| 0.54 - (0.46 * Math.cos((2.0 * Math::PI * k) / denom)) }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# @param n [Integer]
|
|
28
|
+
# @return [Numo::SFloat]
|
|
29
|
+
def blackman(n)
|
|
30
|
+
raise Muze::ParameterError, "window length must be positive" if n <= 0
|
|
31
|
+
return Numo::SFloat[1.0] if n == 1
|
|
32
|
+
|
|
33
|
+
build_window(n) do |k, denom|
|
|
34
|
+
phase = (2.0 * Math::PI * k) / denom
|
|
35
|
+
0.42 - (0.5 * Math.cos(phase)) + (0.08 * Math.cos(2.0 * phase))
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# @param n [Integer]
|
|
40
|
+
# @return [Numo::SFloat]
|
|
41
|
+
def ones(n)
|
|
42
|
+
raise Muze::ParameterError, "window length must be positive" if n <= 0
|
|
43
|
+
|
|
44
|
+
Numo::SFloat.ones(n)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# @param name [Symbol]
|
|
48
|
+
# @param n [Integer]
|
|
49
|
+
# @return [Numo::SFloat]
|
|
50
|
+
def resolve(name, n)
|
|
51
|
+
case name
|
|
52
|
+
when :hann then hann(n)
|
|
53
|
+
when :hamming then hamming(n)
|
|
54
|
+
when :blackman then blackman(n)
|
|
55
|
+
when :ones, :boxcar, :rect then ones(n)
|
|
56
|
+
else
|
|
57
|
+
raise Muze::ParameterError, "Unsupported window: #{name}"
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def build_window(length)
|
|
62
|
+
denominator = length - 1
|
|
63
|
+
values = Array.new(length) { |k| yield(k, denominator).to_f }
|
|
64
|
+
Numo::SFloat.cast(values)
|
|
65
|
+
end
|
|
66
|
+
private_class_method :build_window
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
module Display
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
# @param data [Numo::NArray]
|
|
8
|
+
# @param sr [Integer]
|
|
9
|
+
# @param hop_length [Integer]
|
|
10
|
+
# @param x_axis [Symbol]
|
|
11
|
+
# @param y_axis [Symbol]
|
|
12
|
+
# @param output [String, nil]
|
|
13
|
+
# @return [String] SVG content
|
|
14
|
+
def specshow(data, sr: 22_050, hop_length: 512, x_axis: :time, y_axis: :linear, output: nil)
|
|
15
|
+
_ = [sr, hop_length]
|
|
16
|
+
validate_axis!(x_axis:, y_axis:)
|
|
17
|
+
|
|
18
|
+
matrix = Numo::SFloat.cast(data)
|
|
19
|
+
matrix = matrix.expand_dims(1) if matrix.ndim == 1
|
|
20
|
+
rows, cols = matrix.shape
|
|
21
|
+
|
|
22
|
+
width = 800.0
|
|
23
|
+
height = 400.0
|
|
24
|
+
cell_width = width / [cols, 1].max
|
|
25
|
+
cell_height = height / [rows, 1].max
|
|
26
|
+
min = matrix.min
|
|
27
|
+
max = matrix.max
|
|
28
|
+
range = [max - min, 1.0e-12].max
|
|
29
|
+
|
|
30
|
+
rects = []
|
|
31
|
+
rows.times do |row|
|
|
32
|
+
cols.times do |col|
|
|
33
|
+
normalized = (matrix[row, col] - min) / range
|
|
34
|
+
color = heat_color(normalized)
|
|
35
|
+
x = col * cell_width
|
|
36
|
+
y = (rows - row - 1) * cell_height
|
|
37
|
+
rects << "<rect x='#{x.round(3)}' y='#{y.round(3)}' width='#{cell_width.round(3)}' height='#{cell_height.round(3)}' fill='#{color}' />"
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
svg = [
|
|
42
|
+
"<svg xmlns='http://www.w3.org/2000/svg' width='#{width.to_i}' height='#{height.to_i}' viewBox='0 0 #{width.to_i} #{height.to_i}'>",
|
|
43
|
+
"<rect width='100%' height='100%' fill='#0b132b' />",
|
|
44
|
+
rects.join,
|
|
45
|
+
"</svg>"
|
|
46
|
+
].join
|
|
47
|
+
|
|
48
|
+
File.write(output, svg) if output
|
|
49
|
+
svg
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# @param y [Numo::SFloat, Array<Float>]
|
|
53
|
+
# @param sr [Integer]
|
|
54
|
+
# @param output [String, nil]
|
|
55
|
+
# @return [String] SVG content
|
|
56
|
+
def waveshow(y, sr: 22_050, output: nil)
|
|
57
|
+
_ = sr
|
|
58
|
+
signal = y.is_a?(Numo::NArray) ? y.to_a : Array(y)
|
|
59
|
+
width = 800.0
|
|
60
|
+
height = 240.0
|
|
61
|
+
middle = height / 2.0
|
|
62
|
+
step = [signal.length.to_f / width, 1.0].max
|
|
63
|
+
|
|
64
|
+
points = []
|
|
65
|
+
x = 0
|
|
66
|
+
while x < width
|
|
67
|
+
sample_index = [((x * step).floor), signal.length - 1].min
|
|
68
|
+
value = signal[sample_index] || 0.0
|
|
69
|
+
y_pos = middle - (value * middle * 0.9)
|
|
70
|
+
points << "#{x.round(2)},#{y_pos.round(2)}"
|
|
71
|
+
x += 1
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
svg = [
|
|
75
|
+
"<svg xmlns='http://www.w3.org/2000/svg' width='#{width.to_i}' height='#{height.to_i}' viewBox='0 0 #{width.to_i} #{height.to_i}'>",
|
|
76
|
+
"<rect width='100%' height='100%' fill='#111827' />",
|
|
77
|
+
"<polyline fill='none' stroke='#22d3ee' stroke-width='1.5' points='#{points.join(' ')}' />",
|
|
78
|
+
"</svg>"
|
|
79
|
+
].join
|
|
80
|
+
|
|
81
|
+
File.write(output, svg) if output
|
|
82
|
+
svg
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def validate_axis!(x_axis:, y_axis:)
|
|
86
|
+
raise Muze::ParameterError, "unsupported x_axis" unless %i[time frames].include?(x_axis)
|
|
87
|
+
raise Muze::ParameterError, "unsupported y_axis" unless %i[linear log mel hz].include?(y_axis)
|
|
88
|
+
end
|
|
89
|
+
private_class_method :validate_axis!
|
|
90
|
+
|
|
91
|
+
def heat_color(value)
|
|
92
|
+
clamped = [[value, 0.0].max, 1.0].min
|
|
93
|
+
r = (255 * clamped).to_i
|
|
94
|
+
g = (255 * (1.0 - (clamped - 0.5).abs * 2.0)).to_i
|
|
95
|
+
b = (255 * (1.0 - clamped)).to_i
|
|
96
|
+
format("rgb(%<r>d,%<g>d,%<b>d)", r:, g: [g, 0].max, b:)
|
|
97
|
+
end
|
|
98
|
+
private_class_method :heat_color
|
|
99
|
+
end
|
|
100
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
module Effects
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
# @param y [Numo::SFloat, Array<Float>]
|
|
8
|
+
# @param kernel_size [Integer]
|
|
9
|
+
# @param power [Float]
|
|
10
|
+
# @param margin [Float]
|
|
11
|
+
# @param n_fft [Integer]
|
|
12
|
+
# @param hop_length [Integer]
|
|
13
|
+
# @return [Array(Numo::SFloat, Numo::SFloat)] harmonic and percussive waveforms
|
|
14
|
+
def hpss(y, kernel_size: 31, power: 2.0, margin: 1.0, n_fft: 2048, hop_length: 512)
|
|
15
|
+
stft_matrix = Muze.stft(y, n_fft:, hop_length:)
|
|
16
|
+
magnitude, = Muze.magphase(stft_matrix)
|
|
17
|
+
|
|
18
|
+
harmonic_median = median_filter(magnitude, kernel_size, axis: 1)
|
|
19
|
+
percussive_median = median_filter(magnitude, kernel_size, axis: 0)
|
|
20
|
+
|
|
21
|
+
harmonic_weight = harmonic_median**power
|
|
22
|
+
percussive_weight = percussive_median**power
|
|
23
|
+
|
|
24
|
+
harmonic_mask = harmonic_weight / (harmonic_weight + (margin * percussive_weight) + 1.0e-12)
|
|
25
|
+
percussive_mask = percussive_weight / (percussive_weight + (margin * harmonic_weight) + 1.0e-12)
|
|
26
|
+
|
|
27
|
+
harmonic_stft = stft_matrix * harmonic_mask
|
|
28
|
+
percussive_stft = stft_matrix * percussive_mask
|
|
29
|
+
|
|
30
|
+
signal = y.is_a?(Numo::NArray) ? y : Numo::SFloat.cast(y)
|
|
31
|
+
harmonic = Muze.istft(harmonic_stft, hop_length:, length: signal.size)
|
|
32
|
+
percussive = Muze.istft(percussive_stft, hop_length:, length: signal.size)
|
|
33
|
+
[harmonic, percussive]
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def median_filter(matrix, kernel_size, axis:)
|
|
37
|
+
half = kernel_size / 2
|
|
38
|
+
rows, cols = matrix.shape
|
|
39
|
+
output = Numo::SFloat.zeros(rows, cols)
|
|
40
|
+
|
|
41
|
+
rows.times do |row|
|
|
42
|
+
cols.times do |col|
|
|
43
|
+
values = []
|
|
44
|
+
if axis == 1
|
|
45
|
+
start_col = [col - half, 0].max
|
|
46
|
+
end_col = [col + half, cols - 1].min
|
|
47
|
+
(start_col..end_col).each { |index| values << matrix[row, index] }
|
|
48
|
+
else
|
|
49
|
+
start_row = [row - half, 0].max
|
|
50
|
+
end_row = [row + half, rows - 1].min
|
|
51
|
+
(start_row..end_row).each { |index| values << matrix[index, col] }
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
output[row, col] = Muze::Native.median1d(values)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
output
|
|
59
|
+
end
|
|
60
|
+
private_class_method :median_filter
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
module Effects
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
# Keep fast path for short clips where phase vocoder overhead dominates.
|
|
8
|
+
MIN_PHASE_VOCODER_SAMPLES = 32_768
|
|
9
|
+
|
|
10
|
+
# @param y [Numo::SFloat, Array<Float>]
|
|
11
|
+
# @param rate [Float]
|
|
12
|
+
# @return [Numo::SFloat]
|
|
13
|
+
def time_stretch(y, rate: 1.0)
|
|
14
|
+
raise Muze::ParameterError, "rate must be positive" unless rate.positive?
|
|
15
|
+
|
|
16
|
+
signal = y.is_a?(Numo::NArray) ? Numo::SFloat.cast(y) : Numo::SFloat.cast(Array(y))
|
|
17
|
+
return signal if signal.empty? || rate == 1.0
|
|
18
|
+
return linear_time_stretch(signal.to_a, rate) if signal.size < MIN_PHASE_VOCODER_SAMPLES
|
|
19
|
+
|
|
20
|
+
n_fft = phase_vocoder_fft_size(signal.size)
|
|
21
|
+
hop_length = [n_fft / 4, 1].max
|
|
22
|
+
|
|
23
|
+
stft_matrix = Muze::Core::STFT.stft(signal, n_fft:, hop_length:, center: true)
|
|
24
|
+
stretched_stft = phase_vocoder(stft_matrix, rate:, hop_length:, n_fft:)
|
|
25
|
+
target_length = [(signal.size / rate).round, 1].max
|
|
26
|
+
|
|
27
|
+
Muze::Core::STFT.istft(stretched_stft, hop_length:, center: true, length: target_length)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# @param y [Numo::SFloat, Array<Float>]
|
|
31
|
+
# @param sr [Integer]
|
|
32
|
+
# @param n_steps [Float]
|
|
33
|
+
# @return [Numo::SFloat]
|
|
34
|
+
def pitch_shift(y, sr: 22_050, n_steps: 0)
|
|
35
|
+
_ = sr
|
|
36
|
+
signal = y.is_a?(Numo::NArray) ? y : Numo::SFloat.cast(y)
|
|
37
|
+
return signal if n_steps.zero?
|
|
38
|
+
|
|
39
|
+
rate = 2.0**(-n_steps.to_f / 12.0)
|
|
40
|
+
stretched = time_stretch(signal, rate:)
|
|
41
|
+
preferred_res_type = signal.size >= MIN_PHASE_VOCODER_SAMPLES ? :sinc : :linear
|
|
42
|
+
restored = resample_for_pitch_shift(stretched, target_size: signal.size, preferred_res_type:)
|
|
43
|
+
Numo::SFloat.cast(restored[0...signal.size])
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# @param y [Numo::SFloat, Array<Float>]
|
|
47
|
+
# @param top_db [Float]
|
|
48
|
+
# @param frame_length [Integer]
|
|
49
|
+
# @param hop_length [Integer]
|
|
50
|
+
# @return [Array(Numo::SFloat, Array<Integer>)] trimmed signal and [start, end]
|
|
51
|
+
def trim(y, top_db: 60, frame_length: 2048, hop_length: 512)
|
|
52
|
+
_ = [frame_length, hop_length]
|
|
53
|
+
signal = y.is_a?(Numo::NArray) ? y : Numo::SFloat.cast(y)
|
|
54
|
+
abs_signal = signal.abs
|
|
55
|
+
threshold = [abs_signal.max, 1.0e-12].max * (10.0**(-top_db / 20.0))
|
|
56
|
+
indices = abs_signal.to_a.each_index.select { |index| abs_signal[index] >= threshold }
|
|
57
|
+
return [Numo::SFloat[], [0, 0]] if indices.empty?
|
|
58
|
+
|
|
59
|
+
start_sample = indices.first
|
|
60
|
+
end_sample = indices.last + 1
|
|
61
|
+
[signal[start_sample...end_sample], [start_sample, end_sample]]
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# @param signal_length [Integer]
|
|
65
|
+
# @return [Integer]
|
|
66
|
+
def phase_vocoder_fft_size(signal_length)
|
|
67
|
+
max_fft = [signal_length, 2048].min
|
|
68
|
+
fft_size = 1
|
|
69
|
+
fft_size *= 2 while (fft_size * 2) <= max_fft
|
|
70
|
+
[fft_size, 32].max
|
|
71
|
+
end
|
|
72
|
+
private_class_method :phase_vocoder_fft_size
|
|
73
|
+
|
|
74
|
+
# @param stft_matrix [Numo::DComplex]
|
|
75
|
+
# @param rate [Float]
|
|
76
|
+
# @param hop_length [Integer]
|
|
77
|
+
# @param n_fft [Integer]
|
|
78
|
+
# @return [Numo::DComplex]
|
|
79
|
+
def phase_vocoder(stft_matrix, rate:, hop_length:, n_fft:)
|
|
80
|
+
frequency_bins, frame_count = stft_matrix.shape
|
|
81
|
+
return stft_matrix if frame_count <= 1
|
|
82
|
+
|
|
83
|
+
time_steps = []
|
|
84
|
+
position = 0.0
|
|
85
|
+
max_frame = frame_count - 1
|
|
86
|
+
while position <= max_frame
|
|
87
|
+
time_steps << position
|
|
88
|
+
position += rate
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
stretched = Numo::DComplex.zeros(frequency_bins, time_steps.length)
|
|
92
|
+
phase_advance = Array.new(frequency_bins) { |bin| (2.0 * Math::PI * hop_length * bin) / n_fft }
|
|
93
|
+
phase_accumulator = Array.new(frequency_bins) { |bin| phase_of(stft_matrix[bin, 0]) }
|
|
94
|
+
|
|
95
|
+
time_steps.each_with_index do |step, output_index|
|
|
96
|
+
if output_index.zero?
|
|
97
|
+
frequency_bins.times { |bin| stretched[bin, output_index] = stft_matrix[bin, 0] }
|
|
98
|
+
next
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
frame_index = step.floor
|
|
102
|
+
next_frame_index = [frame_index + 1, frame_count - 1].min
|
|
103
|
+
alpha = step - frame_index
|
|
104
|
+
|
|
105
|
+
frequency_bins.times do |bin|
|
|
106
|
+
current = stft_matrix[bin, frame_index]
|
|
107
|
+
following = stft_matrix[bin, next_frame_index]
|
|
108
|
+
magnitude = ((1.0 - alpha) * current.abs) + (alpha * following.abs)
|
|
109
|
+
|
|
110
|
+
phase_delta = phase_of(following) - phase_of(current) - phase_advance[bin]
|
|
111
|
+
phase_delta = wrap_phase(phase_delta)
|
|
112
|
+
phase_accumulator[bin] += phase_advance[bin] + phase_delta
|
|
113
|
+
|
|
114
|
+
stretched[bin, output_index] = Complex.polar(magnitude, phase_accumulator[bin])
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
stretched
|
|
119
|
+
end
|
|
120
|
+
private_class_method :phase_vocoder
|
|
121
|
+
|
|
122
|
+
# @param complex_number [Complex]
|
|
123
|
+
# @return [Float]
|
|
124
|
+
def phase_of(complex_number)
|
|
125
|
+
Math.atan2(complex_number.imag, complex_number.real)
|
|
126
|
+
end
|
|
127
|
+
private_class_method :phase_of
|
|
128
|
+
|
|
129
|
+
# @param phase [Float]
|
|
130
|
+
# @return [Float]
|
|
131
|
+
def wrap_phase(phase)
|
|
132
|
+
((phase + Math::PI) % (2.0 * Math::PI)) - Math::PI
|
|
133
|
+
end
|
|
134
|
+
private_class_method :wrap_phase
|
|
135
|
+
|
|
136
|
+
# @param signal [Array<Float>]
|
|
137
|
+
# @param rate [Float]
|
|
138
|
+
# @return [Numo::SFloat]
|
|
139
|
+
def linear_time_stretch(signal, rate)
|
|
140
|
+
target_length = [(signal.length / rate).round, 1].max
|
|
141
|
+
stretched = Array.new(target_length, 0.0)
|
|
142
|
+
|
|
143
|
+
target_length.times do |index|
|
|
144
|
+
source_position = index * rate
|
|
145
|
+
left = source_position.floor
|
|
146
|
+
right = [left + 1, signal.length - 1].min
|
|
147
|
+
alpha = source_position - left
|
|
148
|
+
stretched[index] = ((1.0 - alpha) * signal[left]) + (alpha * signal[right])
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
Numo::SFloat.cast(stretched)
|
|
152
|
+
end
|
|
153
|
+
private_class_method :linear_time_stretch
|
|
154
|
+
|
|
155
|
+
# Prefer sinc-quality resampling, then fall back to linear on failure.
|
|
156
|
+
# @param stretched [Numo::SFloat]
|
|
157
|
+
# @param target_size [Integer]
|
|
158
|
+
# @param preferred_res_type [Symbol]
|
|
159
|
+
# @return [Numo::SFloat]
|
|
160
|
+
def resample_for_pitch_shift(stretched, target_size:, preferred_res_type:)
|
|
161
|
+
if preferred_res_type == :sinc
|
|
162
|
+
return Muze::Core::Resample.resample(stretched, orig_sr: stretched.size, target_sr: target_size, res_type: :sinc)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
Muze::Core::Resample.resample(stretched, orig_sr: stretched.size, target_sr: target_size, res_type: :linear)
|
|
166
|
+
rescue Muze::Error, StandardError
|
|
167
|
+
Muze::Core::Resample.resample(stretched, orig_sr: stretched.size, target_sr: target_size, res_type: :linear)
|
|
168
|
+
end
|
|
169
|
+
private_class_method :resample_for_pitch_shift
|
|
170
|
+
end
|
|
171
|
+
end
|
data/lib/muze/errors.rb
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
# Base error for all Muze failures.
|
|
5
|
+
class Error < StandardError; end
|
|
6
|
+
|
|
7
|
+
# Raised when audio file loading fails.
|
|
8
|
+
class AudioLoadError < Error; end
|
|
9
|
+
|
|
10
|
+
# Raised when unsupported audio format is used.
|
|
11
|
+
class UnsupportedFormatError < Error; end
|
|
12
|
+
|
|
13
|
+
# Raised when method parameters are invalid.
|
|
14
|
+
class ParameterError < Error; end
|
|
15
|
+
|
|
16
|
+
# Raised when optional runtime dependency is unavailable.
|
|
17
|
+
class DependencyError < Error; end
|
|
18
|
+
end
|