muze 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -1
- data/README.md +5 -0
- data/Rakefile +3 -0
- data/ext/muze/muze_ext.c +129 -12
- data/lib/muze/beat/beat_track.rb +93 -11
- data/lib/muze/core/audio.rb +129 -0
- data/lib/muze/core/cache.rb +38 -0
- data/lib/muze/core/dct.rb +24 -21
- data/lib/muze/core/frames.rb +31 -0
- data/lib/muze/core/matrix.rb +23 -0
- data/lib/muze/core/resample.rb +111 -19
- data/lib/muze/core/stft.rb +312 -52
- data/lib/muze/core/windows.rb +113 -17
- data/lib/muze/display/specshow.rb +307 -41
- data/lib/muze/effects/harmonic_percussive.rb +83 -18
- data/lib/muze/effects/streaming.rb +101 -0
- data/lib/muze/effects/time_stretch.rb +353 -36
- data/lib/muze/feature/aggregation.rb +49 -0
- data/lib/muze/feature/chroma.rb +43 -15
- data/lib/muze/feature/context.rb +81 -0
- data/lib/muze/feature/mfcc.rb +78 -38
- data/lib/muze/feature/spectral.rb +258 -39
- data/lib/muze/filters/chroma_filter.rb +21 -2
- data/lib/muze/filters/mel.rb +47 -1
- data/lib/muze/io/audio_loader/ffmpeg_backend.rb +179 -15
- data/lib/muze/io/audio_loader/wavify_backend.rb +118 -11
- data/lib/muze/io/audio_loader.rb +178 -48
- data/lib/muze/io/audio_writer.rb +48 -0
- data/lib/muze/native.rb +91 -8
- data/lib/muze/onset/onset_detect.rb +114 -23
- data/lib/muze/version.rb +1 -1
- data/lib/muze.rb +237 -60
- metadata +11 -21
- data/benchmarks/baseline.json +0 -24
- data/benchmarks/native_vs_ruby.rb +0 -23
- data/benchmarks/quality_metrics.rb +0 -265
- data/benchmarks/quality_thresholds.md +0 -28
- data/benchmarks/support/fixture_library.rb +0 -107
|
@@ -11,41 +11,46 @@ module Muze
|
|
|
11
11
|
# @param y_axis [Symbol]
|
|
12
12
|
# @param output [String, nil]
|
|
13
13
|
# @return [String] SVG content
|
|
14
|
-
def specshow(data, sr: 22_050, hop_length: 512, x_axis: :time, y_axis: :linear, output: nil)
|
|
15
|
-
_ = [sr, hop_length]
|
|
14
|
+
def specshow(data, sr: 22_050, hop_length: 512, x_axis: :time, y_axis: :linear, output: nil, width: 800, height: 400, cmap: :heat, vmin: nil, vmax: nil, fragment: false, render: :auto)
|
|
16
15
|
validate_axis!(x_axis:, y_axis:)
|
|
16
|
+
raise Muze::ParameterError, "width and height must be positive" unless width.positive? && height.positive?
|
|
17
|
+
raise Muze::ParameterError, "sr and hop_length must be positive" unless sr.positive? && hop_length.positive?
|
|
18
|
+
raise Muze::ParameterError, "render must be :auto, :rects, or :image" unless %i[auto rects image].include?(render)
|
|
17
19
|
|
|
18
20
|
matrix = Numo::SFloat.cast(data)
|
|
19
21
|
matrix = matrix.expand_dims(1) if matrix.ndim == 1
|
|
22
|
+
validate_matrix!(matrix)
|
|
23
|
+
validate_color_bounds!(vmin:, vmax:)
|
|
24
|
+
render = image_render?(matrix, render:) ? :image : :rects
|
|
25
|
+
matrix = downsample_matrix(matrix, max_cells: 12_000) if render == :rects
|
|
20
26
|
rows, cols = matrix.shape
|
|
21
27
|
|
|
22
|
-
width =
|
|
23
|
-
height =
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
min = matrix.min
|
|
27
|
-
max = matrix.max
|
|
28
|
+
width = width.to_f
|
|
29
|
+
height = height.to_f
|
|
30
|
+
min = vmin || matrix.min
|
|
31
|
+
max = vmax || matrix.max
|
|
28
32
|
range = [max - min, 1.0e-12].max
|
|
29
33
|
|
|
30
|
-
|
|
31
|
-
rows.times do |row|
|
|
32
|
-
cols.times do |col|
|
|
33
|
-
normalized = (matrix[row, col] - min) / range
|
|
34
|
-
color = heat_color(normalized)
|
|
35
|
-
x = col * cell_width
|
|
36
|
-
y = (rows - row - 1) * cell_height
|
|
37
|
-
rects << "<rect x='#{x.round(3)}' y='#{y.round(3)}' width='#{cell_width.round(3)}' height='#{cell_height.round(3)}' fill='#{color}' />"
|
|
38
|
-
end
|
|
39
|
-
end
|
|
34
|
+
content = render == :image ? image_element(matrix, width:, height:, min:, range:, cmap:) : rect_elements(matrix, rows:, cols:, width:, height:, min:, range:, cmap:, y_axis:, x_axis:, sr:, hop_length:)
|
|
40
35
|
|
|
41
|
-
|
|
36
|
+
body = [
|
|
37
|
+
"<g data-x-axis='#{x_axis}' data-y-axis='#{y_axis}' data-sr='#{sr}' data-hop-length='#{hop_length}' data-render='#{render}'>",
|
|
38
|
+
content,
|
|
39
|
+
"</g>"
|
|
40
|
+
].join
|
|
41
|
+
|
|
42
|
+
svg = if fragment
|
|
43
|
+
body
|
|
44
|
+
else
|
|
45
|
+
[
|
|
42
46
|
"<svg xmlns='http://www.w3.org/2000/svg' width='#{width.to_i}' height='#{height.to_i}' viewBox='0 0 #{width.to_i} #{height.to_i}'>",
|
|
43
47
|
"<rect width='100%' height='100%' fill='#0b132b' />",
|
|
44
|
-
|
|
48
|
+
body,
|
|
45
49
|
"</svg>"
|
|
46
|
-
|
|
50
|
+
].join
|
|
51
|
+
end
|
|
47
52
|
|
|
48
|
-
|
|
53
|
+
write_output(output, svg) if output
|
|
49
54
|
svg
|
|
50
55
|
end
|
|
51
56
|
|
|
@@ -53,32 +58,68 @@ module Muze
|
|
|
53
58
|
# @param sr [Integer]
|
|
54
59
|
# @param output [String, nil]
|
|
55
60
|
# @return [String] SVG content
|
|
56
|
-
def waveshow(y, sr: 22_050, output: nil)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
+
def waveshow(y, sr: 22_050, output: nil, width: 800, height: 240, normalize: true, channels: :overlay)
|
|
62
|
+
raise Muze::ParameterError, "width and height must be positive" unless width.positive? && height.positive?
|
|
63
|
+
raise Muze::ParameterError, "sr must be positive" unless sr.positive?
|
|
64
|
+
raise Muze::ParameterError, "normalize must be true or false" unless [true, false].include?(normalize)
|
|
65
|
+
raise Muze::ParameterError, "channels must be :overlay or :split" unless %i[overlay split].include?(channels)
|
|
66
|
+
|
|
67
|
+
signal = Muze::Core::Audio.validate_audio!(y, allow_empty: true).to_a
|
|
68
|
+
channel_data = signal.first.is_a?(Array) ? transpose_channels(signal) : [signal]
|
|
69
|
+
channel_data = channel_data.map { |channel| normalize ? normalize_wave(channel) : channel }
|
|
70
|
+
width = width.to_f
|
|
71
|
+
height = height.to_f
|
|
61
72
|
middle = height / 2.0
|
|
62
|
-
|
|
73
|
+
paths = channel_data.each_with_index.map do |channel, index|
|
|
74
|
+
top, lane_height = channel_lane(index, channel_data.length, height, channels:)
|
|
75
|
+
envelope_path(channel, width:, top:, height: lane_height)
|
|
76
|
+
end
|
|
63
77
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
78
|
+
svg = [
|
|
79
|
+
"<svg xmlns='http://www.w3.org/2000/svg' width='#{width.to_i}' height='#{height.to_i}' viewBox='0 0 #{width.to_i} #{height.to_i}'>",
|
|
80
|
+
"<rect width='100%' height='100%' fill='#111827' />",
|
|
81
|
+
"<g data-sr='#{sr}' data-channels='#{channels}' transform='translate(0 #{middle * 0.0})'>",
|
|
82
|
+
paths.join,
|
|
83
|
+
"</g>",
|
|
84
|
+
"</svg>"
|
|
85
|
+
].join
|
|
86
|
+
|
|
87
|
+
write_output(output, svg) if output
|
|
88
|
+
svg
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# @return [String] SVG content
|
|
92
|
+
def onsetshow(onset_envelope, sr: 22_050, hop_length: 512, output: nil, width: 800, height: 160, normalize: true)
|
|
93
|
+
raise Muze::ParameterError, "width and height must be positive" unless width.positive? && height.positive?
|
|
94
|
+
raise Muze::ParameterError, "sr and hop_length must be positive" unless sr.positive? && hop_length.positive?
|
|
95
|
+
raise Muze::ParameterError, "normalize must be true or false" unless [true, false].include?(normalize)
|
|
96
|
+
|
|
97
|
+
envelope = Numo::SFloat.cast(onset_envelope).to_a.flatten
|
|
98
|
+
raise Muze::ParameterError, "onset envelope must contain only finite values" unless envelope.all? { |value| value.respond_to?(:finite?) && value.finite? }
|
|
99
|
+
|
|
100
|
+
peak = envelope.map(&:abs).max.to_f
|
|
101
|
+
values = normalize && peak.positive? ? envelope.map { |value| value / peak } : envelope
|
|
102
|
+
width = width.to_f
|
|
103
|
+
height = height.to_f
|
|
104
|
+
bar_width = width / [values.length, 1].max
|
|
105
|
+
bars = values.each_with_index.map do |value, index|
|
|
106
|
+
scaled = [[value, 0.0].max, 1.0].min
|
|
107
|
+
bar_height = scaled * height
|
|
108
|
+
x = index * bar_width
|
|
109
|
+
y = height - bar_height
|
|
110
|
+
"<rect x='#{x.round(3)}' y='#{y.round(3)}' width='#{[bar_width, 0.1].max.round(3)}' height='#{bar_height.round(3)}' fill='#22d3ee' />"
|
|
72
111
|
end
|
|
73
112
|
|
|
74
113
|
svg = [
|
|
75
114
|
"<svg xmlns='http://www.w3.org/2000/svg' width='#{width.to_i}' height='#{height.to_i}' viewBox='0 0 #{width.to_i} #{height.to_i}'>",
|
|
76
115
|
"<rect width='100%' height='100%' fill='#111827' />",
|
|
77
|
-
"<
|
|
116
|
+
"<g data-kind='onset' data-sr='#{sr}' data-hop-length='#{hop_length}'>",
|
|
117
|
+
bars.join,
|
|
118
|
+
"</g>",
|
|
78
119
|
"</svg>"
|
|
79
120
|
].join
|
|
80
121
|
|
|
81
|
-
|
|
122
|
+
write_output(output, svg) if output
|
|
82
123
|
svg
|
|
83
124
|
end
|
|
84
125
|
|
|
@@ -88,13 +129,238 @@ module Muze
|
|
|
88
129
|
end
|
|
89
130
|
private_class_method :validate_axis!
|
|
90
131
|
|
|
91
|
-
def
|
|
132
|
+
def validate_matrix!(matrix)
|
|
133
|
+
return if matrix.to_a.flatten.all? { |value| value.respond_to?(:finite?) && value.finite? }
|
|
134
|
+
|
|
135
|
+
raise Muze::ParameterError, "data must contain only finite numeric values"
|
|
136
|
+
end
|
|
137
|
+
private_class_method :validate_matrix!
|
|
138
|
+
|
|
139
|
+
def validate_color_bounds!(vmin:, vmax:)
|
|
140
|
+
[[:vmin, vmin], [:vmax, vmax]].each do |name, value|
|
|
141
|
+
next if value.nil? || (value.respond_to?(:finite?) && value.finite?)
|
|
142
|
+
|
|
143
|
+
raise Muze::ParameterError, "#{name} must be finite"
|
|
144
|
+
end
|
|
145
|
+
return unless vmin && vmax && vmin > vmax
|
|
146
|
+
|
|
147
|
+
raise Muze::ParameterError, "vmin must be <= vmax"
|
|
148
|
+
end
|
|
149
|
+
private_class_method :validate_color_bounds!
|
|
150
|
+
|
|
151
|
+
def color_for(value, cmap:)
|
|
152
|
+
case cmap
|
|
153
|
+
when :heat then rgb_string(heat_rgb(value))
|
|
154
|
+
when :gray, :grey then rgb_string(gray_rgb(value))
|
|
155
|
+
when :magma then rgb_string(magma_rgb(value))
|
|
156
|
+
else
|
|
157
|
+
raise Muze::ParameterError, "unsupported cmap"
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
private_class_method :color_for
|
|
161
|
+
|
|
162
|
+
def color_tuple_for(value, cmap:)
|
|
163
|
+
case cmap
|
|
164
|
+
when :heat then heat_rgb(value)
|
|
165
|
+
when :gray, :grey then gray_rgb(value)
|
|
166
|
+
when :magma then magma_rgb(value)
|
|
167
|
+
else
|
|
168
|
+
raise Muze::ParameterError, "unsupported cmap"
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
private_class_method :color_tuple_for
|
|
172
|
+
|
|
173
|
+
def rgb_string(tuple)
|
|
174
|
+
format("rgb(%<r>d,%<g>d,%<b>d)", r: tuple[0], g: tuple[1], b: tuple[2])
|
|
175
|
+
end
|
|
176
|
+
private_class_method :rgb_string
|
|
177
|
+
|
|
178
|
+
def heat_rgb(value)
|
|
92
179
|
clamped = [[value, 0.0].max, 1.0].min
|
|
93
180
|
r = (255 * clamped).to_i
|
|
94
181
|
g = (255 * (1.0 - (clamped - 0.5).abs * 2.0)).to_i
|
|
95
182
|
b = (255 * (1.0 - clamped)).to_i
|
|
96
|
-
|
|
183
|
+
[r, [g, 0].max, b]
|
|
184
|
+
end
|
|
185
|
+
private_class_method :heat_rgb
|
|
186
|
+
|
|
187
|
+
def gray_rgb(value)
|
|
188
|
+
level = (255 * [[value, 0.0].max, 1.0].min).to_i
|
|
189
|
+
[level, level, level]
|
|
190
|
+
end
|
|
191
|
+
private_class_method :gray_rgb
|
|
192
|
+
|
|
193
|
+
def magma_rgb(value)
|
|
194
|
+
clamped = [[value, 0.0].max, 1.0].min
|
|
195
|
+
r = (252 * clamped).to_i
|
|
196
|
+
g = (80 * (clamped**1.5)).to_i
|
|
197
|
+
b = (120 * (1.0 - clamped) + 40).to_i
|
|
198
|
+
[r, g, b]
|
|
199
|
+
end
|
|
200
|
+
private_class_method :magma_rgb
|
|
201
|
+
|
|
202
|
+
def rect_elements(matrix, rows:, cols:, width:, height:, min:, range:, cmap:, y_axis:, x_axis:, sr:, hop_length:)
|
|
203
|
+
rects = []
|
|
204
|
+
rows.times do |row|
|
|
205
|
+
y_top = y_position(row + 1, rows, height, y_axis:, sr:)
|
|
206
|
+
y_bottom = y_position(row, rows, height, y_axis:, sr:)
|
|
207
|
+
cell_height = [y_bottom - y_top, 0.1].max
|
|
208
|
+
cols.times do |col|
|
|
209
|
+
normalized = (matrix[row, col] - min) / range
|
|
210
|
+
color = color_for(normalized, cmap:)
|
|
211
|
+
x = x_position(col, cols, width, hop_length:, sr:, x_axis:)
|
|
212
|
+
next_x = x_position(col + 1, cols, width, hop_length:, sr:, x_axis:)
|
|
213
|
+
rect_width = [next_x - x, 0.1].max
|
|
214
|
+
rects << "<rect x='#{x.round(3)}' y='#{y_top.round(3)}' width='#{rect_width.round(3)}' height='#{cell_height.round(3)}' fill='#{color}' />"
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
rects.join
|
|
218
|
+
end
|
|
219
|
+
private_class_method :rect_elements
|
|
220
|
+
|
|
221
|
+
def image_render?(matrix, render:)
|
|
222
|
+
return true if render == :image
|
|
223
|
+
return false if render == :rects
|
|
224
|
+
|
|
225
|
+
matrix.shape.reduce(:*) > 12_000
|
|
226
|
+
end
|
|
227
|
+
private_class_method :image_render?
|
|
228
|
+
|
|
229
|
+
def image_element(matrix, width:, height:, min:, range:, cmap:)
|
|
230
|
+
rows, cols = matrix.shape
|
|
231
|
+
href = "data:image/bmp;base64,#{base64_encode(bmp_bytes(matrix, min:, range:, cmap:))}"
|
|
232
|
+
"<image x='0' y='0' width='#{width.to_i}' height='#{height.to_i}' preserveAspectRatio='none' href='#{href}' />"
|
|
233
|
+
end
|
|
234
|
+
private_class_method :image_element
|
|
235
|
+
|
|
236
|
+
def bmp_bytes(matrix, min:, range:, cmap:)
|
|
237
|
+
rows, cols = matrix.shape
|
|
238
|
+
row_stride = ((cols * 3) + 3) & ~3
|
|
239
|
+
pixel_bytes = row_stride * rows
|
|
240
|
+
file_size = 54 + pixel_bytes
|
|
241
|
+
header = +"BM".b
|
|
242
|
+
header << [file_size, 0, 54].pack("V V V")
|
|
243
|
+
header << [40, cols, rows, 1, 24, 0, pixel_bytes, 2835, 2835, 0, 0].pack("V V V v v V V V V V V")
|
|
244
|
+
padding = "\x00".b * (row_stride - (cols * 3))
|
|
245
|
+
pixels = +""
|
|
246
|
+
|
|
247
|
+
(rows - 1).downto(0) do |row|
|
|
248
|
+
cols.times do |col|
|
|
249
|
+
r, g, b = color_tuple_for((matrix[row, col] - min) / range, cmap:)
|
|
250
|
+
pixels << [b, g, r].pack("C3")
|
|
251
|
+
end
|
|
252
|
+
pixels << padding
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
header << pixels
|
|
256
|
+
end
|
|
257
|
+
private_class_method :bmp_bytes
|
|
258
|
+
|
|
259
|
+
BASE64_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
|
260
|
+
|
|
261
|
+
def base64_encode(bytes)
|
|
262
|
+
binary = bytes.b
|
|
263
|
+
output = +""
|
|
264
|
+
index = 0
|
|
265
|
+
while index < binary.bytesize
|
|
266
|
+
chunk = binary.byteslice(index, 3).bytes
|
|
267
|
+
pad = 3 - chunk.length
|
|
268
|
+
chunk += [0] * pad
|
|
269
|
+
value = (chunk[0] << 16) | (chunk[1] << 8) | chunk[2]
|
|
270
|
+
output << BASE64_ALPHABET[(value >> 18) & 0x3f]
|
|
271
|
+
output << BASE64_ALPHABET[(value >> 12) & 0x3f]
|
|
272
|
+
output << (pad >= 2 ? "=" : BASE64_ALPHABET[(value >> 6) & 0x3f])
|
|
273
|
+
output << (pad >= 1 ? "=" : BASE64_ALPHABET[value & 0x3f])
|
|
274
|
+
index += 3
|
|
275
|
+
end
|
|
276
|
+
output
|
|
277
|
+
end
|
|
278
|
+
private_class_method :base64_encode
|
|
279
|
+
|
|
280
|
+
def x_position(col, cols, width, hop_length:, sr:, x_axis:)
|
|
281
|
+
return col * width / [cols, 1].max if x_axis == :frames
|
|
282
|
+
|
|
283
|
+
total_time = [cols * hop_length.to_f / sr, 1.0e-12].max
|
|
284
|
+
(col * hop_length.to_f / sr) * width / total_time
|
|
285
|
+
end
|
|
286
|
+
private_class_method :x_position
|
|
287
|
+
|
|
288
|
+
def y_position(row, rows, height, y_axis:, sr:)
|
|
289
|
+
normalized = case y_axis
|
|
290
|
+
when :linear, :hz
|
|
291
|
+
row.to_f / [rows, 1].max
|
|
292
|
+
when :mel
|
|
293
|
+
hz = row * (sr / 2.0) / [rows, 1].max
|
|
294
|
+
Muze::Filters.hz_to_mel(hz) / Muze::Filters.hz_to_mel(sr / 2.0)
|
|
295
|
+
when :log
|
|
296
|
+
Math.log10(1.0 + (9.0 * row / [rows, 1].max.to_f))
|
|
297
|
+
end
|
|
298
|
+
height - (normalized * height)
|
|
299
|
+
end
|
|
300
|
+
private_class_method :y_position
|
|
301
|
+
|
|
302
|
+
def downsample_matrix(matrix, max_cells:)
|
|
303
|
+
rows, cols = matrix.shape
|
|
304
|
+
return matrix if rows * cols <= max_cells
|
|
305
|
+
|
|
306
|
+
col_step = [(rows * cols / max_cells.to_f).ceil, 1].max
|
|
307
|
+
selected_cols = (0...cols).step(col_step).to_a
|
|
308
|
+
output = Numo::SFloat.zeros(rows, selected_cols.length)
|
|
309
|
+
selected_cols.each_with_index { |col, index| output[true, index] = matrix[true, col] }
|
|
310
|
+
output
|
|
311
|
+
end
|
|
312
|
+
private_class_method :downsample_matrix
|
|
313
|
+
|
|
314
|
+
def transpose_channels(samples)
|
|
315
|
+
channel_count = samples.first.length
|
|
316
|
+
Array.new(channel_count) { |channel| samples.map { |frame| frame[channel] } }
|
|
317
|
+
end
|
|
318
|
+
private_class_method :transpose_channels
|
|
319
|
+
|
|
320
|
+
def normalize_wave(channel)
|
|
321
|
+
peak = channel.map(&:abs).max || 0.0
|
|
322
|
+
return channel if peak <= 0.0
|
|
323
|
+
|
|
324
|
+
channel.map { |value| value / peak }
|
|
325
|
+
end
|
|
326
|
+
private_class_method :normalize_wave
|
|
327
|
+
|
|
328
|
+
def channel_lane(index, count, height, channels:)
|
|
329
|
+
return [0.0, height] if channels == :overlay
|
|
330
|
+
|
|
331
|
+
lane_height = height / count
|
|
332
|
+
[index * lane_height, lane_height]
|
|
333
|
+
end
|
|
334
|
+
private_class_method :channel_lane
|
|
335
|
+
|
|
336
|
+
def envelope_path(channel, width:, top:, height:)
|
|
337
|
+
middle = top + (height / 2.0)
|
|
338
|
+
step = [channel.length.to_f / width, 1.0].max
|
|
339
|
+
segments = []
|
|
340
|
+
|
|
341
|
+
x = 0
|
|
342
|
+
while x < width
|
|
343
|
+
start_index = (x * step).floor
|
|
344
|
+
end_index = [((x + 1) * step).ceil, channel.length].min
|
|
345
|
+
window = channel[start_index...end_index] || [0.0]
|
|
346
|
+
min = window.min || 0.0
|
|
347
|
+
max = window.max || 0.0
|
|
348
|
+
y_min = middle - (min * height * 0.45)
|
|
349
|
+
y_max = middle - (max * height * 0.45)
|
|
350
|
+
segments << "M #{x.round(2)} #{y_min.round(2)} L #{x.round(2)} #{y_max.round(2)}"
|
|
351
|
+
x += 1
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
"<path d='#{segments.join(' ')}' fill='none' stroke='#22d3ee' stroke-width='1.2' />"
|
|
355
|
+
end
|
|
356
|
+
private_class_method :envelope_path
|
|
357
|
+
|
|
358
|
+
def write_output(output, svg)
|
|
359
|
+
path = output.respond_to?(:to_path) ? output.to_path : output
|
|
360
|
+
File.write(path, svg)
|
|
361
|
+
rescue SystemCallError => e
|
|
362
|
+
raise Muze::Error, "Failed to write SVG output #{path}: #{e.message}"
|
|
97
363
|
end
|
|
98
|
-
private_class_method :
|
|
364
|
+
private_class_method :write_output
|
|
99
365
|
end
|
|
100
366
|
end
|
|
@@ -11,8 +11,20 @@ module Muze
|
|
|
11
11
|
# @param n_fft [Integer]
|
|
12
12
|
# @param hop_length [Integer]
|
|
13
13
|
# @return [Array(Numo::SFloat, Numo::SFloat)] harmonic and percussive waveforms
|
|
14
|
-
def hpss(y, kernel_size: 31, power: 2.0, margin: 1.0, n_fft: 2048, hop_length: 512)
|
|
15
|
-
|
|
14
|
+
def hpss(y, kernel_size: 31, power: 2.0, margin: 1.0, n_fft: 2048, hop_length: 512, return_masks: false)
|
|
15
|
+
validate_hpss_params!(kernel_size:, power:, margin:)
|
|
16
|
+
raise Muze::ParameterError, "return_masks must be true or false" unless [true, false].include?(return_masks)
|
|
17
|
+
|
|
18
|
+
signal = Muze::Core::Audio.validate_audio!(y, allow_empty: true)
|
|
19
|
+
return hpss_channels(signal, kernel_size:, power:, margin:, n_fft:, hop_length:, return_masks:) if signal.ndim == 2
|
|
20
|
+
|
|
21
|
+
hpss_mono(signal, kernel_size:, power:, margin:, n_fft:, hop_length:, return_masks:)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def hpss_mono(signal, kernel_size:, power:, margin:, n_fft:, hop_length:, return_masks:)
|
|
25
|
+
signal = Numo::SFloat.cast(signal)
|
|
26
|
+
|
|
27
|
+
stft_matrix = Muze.stft(signal, n_fft:, hop_length:)
|
|
16
28
|
magnitude, = Muze.magphase(stft_matrix)
|
|
17
29
|
|
|
18
30
|
harmonic_median = median_filter(magnitude, kernel_size, axis: 1)
|
|
@@ -20,43 +32,96 @@ module Muze
|
|
|
20
32
|
|
|
21
33
|
harmonic_weight = harmonic_median**power
|
|
22
34
|
percussive_weight = percussive_median**power
|
|
35
|
+
harmonic_margin, percussive_margin = Array(margin)
|
|
36
|
+
harmonic_margin ||= margin
|
|
37
|
+
percussive_margin ||= harmonic_margin
|
|
23
38
|
|
|
24
|
-
harmonic_mask = harmonic_weight
|
|
25
|
-
percussive_mask = percussive_weight
|
|
39
|
+
harmonic_mask = soft_mask(harmonic_weight, harmonic_weight + (harmonic_margin * percussive_weight), power: 1.0)
|
|
40
|
+
percussive_mask = soft_mask(percussive_weight, percussive_weight + (percussive_margin * harmonic_weight), power: 1.0)
|
|
26
41
|
|
|
27
42
|
harmonic_stft = stft_matrix * harmonic_mask
|
|
28
43
|
percussive_stft = stft_matrix * percussive_mask
|
|
29
44
|
|
|
30
|
-
signal = y.is_a?(Numo::NArray) ? y : Numo::SFloat.cast(y)
|
|
31
45
|
harmonic = Muze.istft(harmonic_stft, hop_length:, length: signal.size)
|
|
32
46
|
percussive = Muze.istft(percussive_stft, hop_length:, length: signal.size)
|
|
47
|
+
return [harmonic, percussive, harmonic_mask, percussive_mask] if return_masks
|
|
48
|
+
|
|
49
|
+
[harmonic, percussive]
|
|
50
|
+
end
|
|
51
|
+
private_class_method :hpss_mono
|
|
52
|
+
|
|
53
|
+
def hpss_channels(signal, kernel_size:, power:, margin:, n_fft:, hop_length:, return_masks:)
|
|
54
|
+
frames, channels = signal.shape
|
|
55
|
+
harmonic = Numo::SFloat.zeros(frames, channels)
|
|
56
|
+
percussive = Numo::SFloat.zeros(frames, channels)
|
|
57
|
+
harmonic_masks = []
|
|
58
|
+
percussive_masks = []
|
|
59
|
+
|
|
60
|
+
channels.times do |channel|
|
|
61
|
+
result = hpss_mono(
|
|
62
|
+
signal[true, channel],
|
|
63
|
+
kernel_size:,
|
|
64
|
+
power:,
|
|
65
|
+
margin:,
|
|
66
|
+
n_fft:,
|
|
67
|
+
hop_length:,
|
|
68
|
+
return_masks: true
|
|
69
|
+
)
|
|
70
|
+
harmonic[true, channel] = result[0]
|
|
71
|
+
percussive[true, channel] = result[1]
|
|
72
|
+
harmonic_masks << result[2]
|
|
73
|
+
percussive_masks << result[3]
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
return [harmonic, percussive, harmonic_masks, percussive_masks] if return_masks
|
|
77
|
+
|
|
33
78
|
[harmonic, percussive]
|
|
34
79
|
end
|
|
80
|
+
private_class_method :hpss_channels
|
|
81
|
+
|
|
82
|
+
def validate_hpss_params!(kernel_size:, power:, margin:)
|
|
83
|
+
raise Muze::ParameterError, "kernel_size must be a positive odd integer" unless kernel_size.is_a?(Integer) && kernel_size.positive? && kernel_size.odd?
|
|
84
|
+
raise Muze::ParameterError, "power must be positive" unless power.positive?
|
|
85
|
+
|
|
86
|
+
margins = Array(margin)
|
|
87
|
+
raise Muze::ParameterError, "margin must be positive or [harmonic_margin, percussive_margin]" unless [1, 2].include?(margins.length)
|
|
88
|
+
return if margins.all? { |value| value.respond_to?(:positive?) && value.positive? }
|
|
89
|
+
|
|
90
|
+
raise Muze::ParameterError, "margin must be positive"
|
|
91
|
+
end
|
|
92
|
+
private_class_method :validate_hpss_params!
|
|
93
|
+
|
|
94
|
+
def soft_mask(numerator, denominator, power:)
|
|
95
|
+
powered_numerator = numerator**power
|
|
96
|
+
powered_denominator = denominator**power
|
|
97
|
+
powered_numerator / (powered_denominator + 1.0e-12)
|
|
98
|
+
end
|
|
99
|
+
private_class_method :soft_mask
|
|
35
100
|
|
|
36
101
|
def median_filter(matrix, kernel_size, axis:)
|
|
37
102
|
half = kernel_size / 2
|
|
38
103
|
rows, cols = matrix.shape
|
|
39
104
|
output = Numo::SFloat.zeros(rows, cols)
|
|
40
105
|
|
|
41
|
-
|
|
106
|
+
if axis == 1
|
|
107
|
+
rows.times do |row|
|
|
108
|
+
values = cols.times.map { |col| matrix[row, col] }
|
|
109
|
+
sliding_median(values, half).each_with_index { |value, col| output[row, col] = value }
|
|
110
|
+
end
|
|
111
|
+
else
|
|
42
112
|
cols.times do |col|
|
|
43
|
-
values = []
|
|
44
|
-
|
|
45
|
-
start_col = [col - half, 0].max
|
|
46
|
-
end_col = [col + half, cols - 1].min
|
|
47
|
-
(start_col..end_col).each { |index| values << matrix[row, index] }
|
|
48
|
-
else
|
|
49
|
-
start_row = [row - half, 0].max
|
|
50
|
-
end_row = [row + half, rows - 1].min
|
|
51
|
-
(start_row..end_row).each { |index| values << matrix[index, col] }
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
output[row, col] = Muze::Native.median1d(values)
|
|
113
|
+
values = rows.times.map { |row| matrix[row, col] }
|
|
114
|
+
sliding_median(values, half).each_with_index { |value, row| output[row, col] = value }
|
|
55
115
|
end
|
|
56
116
|
end
|
|
57
117
|
|
|
58
118
|
output
|
|
59
119
|
end
|
|
60
120
|
private_class_method :median_filter
|
|
121
|
+
|
|
122
|
+
def sliding_median(values, half)
|
|
123
|
+
Muze::Native.median_filter1d(values, half)
|
|
124
|
+
end
|
|
125
|
+
private_class_method :sliding_median
|
|
61
126
|
end
|
|
62
127
|
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
module Effects
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
def time_stretch_stream(chunks, rate: 1.0, n_fft: nil, hop_length: nil, method: :phase_vocoder, phase_lock: false, force_phase_vocoder: false, overlap: 2048)
|
|
8
|
+
return enum_for(__method__, chunks, rate:, n_fft:, hop_length:, method:, phase_lock:, force_phase_vocoder:, overlap:) unless block_given?
|
|
9
|
+
|
|
10
|
+
validate_positive_number!(rate, "rate")
|
|
11
|
+
validate_stream_overlap!(overlap)
|
|
12
|
+
stream_effect_chunks(chunks, overlap:) do |working, prefix_frames|
|
|
13
|
+
stretched = time_stretch(working, rate:, n_fft:, hop_length:, method:, phase_lock:, force_phase_vocoder:)
|
|
14
|
+
drop = [(prefix_frames / rate).round, audio_frame_count(stretched)].min
|
|
15
|
+
yield drop_audio_frames(stretched, drop)
|
|
16
|
+
end
|
|
17
|
+
nil
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def pitch_shift_stream(chunks, sr: 22_050, n_steps: 0, bins_per_octave: 12, res_type: :auto, normalize: false, clip: nil, overlap: 2048)
|
|
21
|
+
return enum_for(__method__, chunks, sr:, n_steps:, bins_per_octave:, res_type:, normalize:, clip:, overlap:) unless block_given?
|
|
22
|
+
|
|
23
|
+
validate_stream_overlap!(overlap)
|
|
24
|
+
stream_effect_chunks(chunks, overlap:) do |working, prefix_frames|
|
|
25
|
+
shifted = pitch_shift(working, sr:, n_steps:, bins_per_octave:, res_type:, normalize:, clip:)
|
|
26
|
+
yield drop_audio_frames(shifted, prefix_frames)
|
|
27
|
+
end
|
|
28
|
+
nil
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def hpss_stream(chunks, kernel_size: 31, power: 2.0, margin: 1.0, n_fft: 2048, hop_length: 512, overlap: n_fft)
|
|
32
|
+
return enum_for(__method__, chunks, kernel_size:, power:, margin:, n_fft:, hop_length:, overlap:) unless block_given?
|
|
33
|
+
|
|
34
|
+
validate_stream_overlap!(overlap)
|
|
35
|
+
stream_effect_chunks(chunks, overlap:) do |working, prefix_frames|
|
|
36
|
+
harmonic, percussive = hpss(working, kernel_size:, power:, margin:, n_fft:, hop_length:)
|
|
37
|
+
yield drop_audio_frames(harmonic, prefix_frames), drop_audio_frames(percussive, prefix_frames)
|
|
38
|
+
end
|
|
39
|
+
nil
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def stream_effect_chunks(chunks, overlap:)
|
|
43
|
+
tail = nil
|
|
44
|
+
chunks.each do |chunk|
|
|
45
|
+
signal = Muze::Core::Audio.validate_audio!(chunk, allow_empty: true)
|
|
46
|
+
next if signal.empty?
|
|
47
|
+
|
|
48
|
+
working = tail ? concat_audio(tail, signal) : signal
|
|
49
|
+
prefix_frames = tail ? audio_frame_count(tail) : 0
|
|
50
|
+
yield working, prefix_frames
|
|
51
|
+
tail = overlap.positive? ? take_audio_tail(working, overlap) : nil
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
private_class_method :stream_effect_chunks
|
|
55
|
+
|
|
56
|
+
def audio_frame_count(signal)
|
|
57
|
+
signal.ndim == 2 ? signal.shape[0] : signal.size
|
|
58
|
+
end
|
|
59
|
+
private_class_method :audio_frame_count
|
|
60
|
+
|
|
61
|
+
def concat_audio(left, right)
|
|
62
|
+
return right if left.nil? || left.empty?
|
|
63
|
+
return left if right.empty?
|
|
64
|
+
|
|
65
|
+
if left.ndim == 2 || right.ndim == 2
|
|
66
|
+
raise Muze::ParameterError, "chunk channel counts must match" unless left.ndim == 2 && right.ndim == 2 && left.shape[1] == right.shape[1]
|
|
67
|
+
|
|
68
|
+
output = Numo::SFloat.zeros(left.shape[0] + right.shape[0], left.shape[1])
|
|
69
|
+
output[0...left.shape[0], true] = left
|
|
70
|
+
output[left.shape[0]...(left.shape[0] + right.shape[0]), true] = right
|
|
71
|
+
return output
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
Numo::SFloat.cast(left.to_a + right.to_a)
|
|
75
|
+
end
|
|
76
|
+
private_class_method :concat_audio
|
|
77
|
+
|
|
78
|
+
def take_audio_tail(signal, count)
|
|
79
|
+
frames = audio_frame_count(signal)
|
|
80
|
+
start = [frames - count, 0].max
|
|
81
|
+
drop_audio_frames(signal, start)
|
|
82
|
+
end
|
|
83
|
+
private_class_method :take_audio_tail
|
|
84
|
+
|
|
85
|
+
def drop_audio_frames(signal, count)
|
|
86
|
+
frames = audio_frame_count(signal)
|
|
87
|
+
start = [[count, 0].max, frames].min
|
|
88
|
+
return signal[start...frames, true] if signal.ndim == 2
|
|
89
|
+
|
|
90
|
+
signal[start...frames]
|
|
91
|
+
end
|
|
92
|
+
private_class_method :drop_audio_frames
|
|
93
|
+
|
|
94
|
+
def validate_stream_overlap!(overlap)
|
|
95
|
+
return if overlap.is_a?(Integer) && overlap >= 0
|
|
96
|
+
|
|
97
|
+
raise Muze::ParameterError, "overlap must be a non-negative integer"
|
|
98
|
+
end
|
|
99
|
+
private_class_method :validate_stream_overlap!
|
|
100
|
+
end
|
|
101
|
+
end
|