muze 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -1
  3. data/README.md +5 -0
  4. data/Rakefile +3 -0
  5. data/ext/muze/muze_ext.c +129 -12
  6. data/lib/muze/beat/beat_track.rb +93 -11
  7. data/lib/muze/core/audio.rb +129 -0
  8. data/lib/muze/core/cache.rb +38 -0
  9. data/lib/muze/core/dct.rb +24 -21
  10. data/lib/muze/core/frames.rb +31 -0
  11. data/lib/muze/core/matrix.rb +23 -0
  12. data/lib/muze/core/resample.rb +111 -19
  13. data/lib/muze/core/stft.rb +312 -52
  14. data/lib/muze/core/windows.rb +113 -17
  15. data/lib/muze/display/specshow.rb +307 -41
  16. data/lib/muze/effects/harmonic_percussive.rb +83 -18
  17. data/lib/muze/effects/streaming.rb +101 -0
  18. data/lib/muze/effects/time_stretch.rb +353 -36
  19. data/lib/muze/feature/aggregation.rb +49 -0
  20. data/lib/muze/feature/chroma.rb +43 -15
  21. data/lib/muze/feature/context.rb +81 -0
  22. data/lib/muze/feature/mfcc.rb +78 -38
  23. data/lib/muze/feature/spectral.rb +258 -39
  24. data/lib/muze/filters/chroma_filter.rb +21 -2
  25. data/lib/muze/filters/mel.rb +47 -1
  26. data/lib/muze/io/audio_loader/ffmpeg_backend.rb +179 -15
  27. data/lib/muze/io/audio_loader/wavify_backend.rb +118 -11
  28. data/lib/muze/io/audio_loader.rb +178 -48
  29. data/lib/muze/io/audio_writer.rb +48 -0
  30. data/lib/muze/native.rb +91 -8
  31. data/lib/muze/onset/onset_detect.rb +114 -23
  32. data/lib/muze/version.rb +1 -1
  33. data/lib/muze.rb +237 -60
  34. metadata +11 -21
  35. data/benchmarks/baseline.json +0 -24
  36. data/benchmarks/native_vs_ruby.rb +0 -23
  37. data/benchmarks/quality_metrics.rb +0 -265
  38. data/benchmarks/quality_thresholds.md +0 -28
  39. data/benchmarks/support/fixture_library.rb +0 -107
@@ -11,41 +11,46 @@ module Muze
11
11
  # @param y_axis [Symbol]
12
12
  # @param output [String, nil]
13
13
  # @return [String] SVG content
14
- def specshow(data, sr: 22_050, hop_length: 512, x_axis: :time, y_axis: :linear, output: nil)
15
- _ = [sr, hop_length]
14
+ def specshow(data, sr: 22_050, hop_length: 512, x_axis: :time, y_axis: :linear, output: nil, width: 800, height: 400, cmap: :heat, vmin: nil, vmax: nil, fragment: false, render: :auto)
16
15
  validate_axis!(x_axis:, y_axis:)
16
+ raise Muze::ParameterError, "width and height must be positive" unless width.positive? && height.positive?
17
+ raise Muze::ParameterError, "sr and hop_length must be positive" unless sr.positive? && hop_length.positive?
18
+ raise Muze::ParameterError, "render must be :auto, :rects, or :image" unless %i[auto rects image].include?(render)
17
19
 
18
20
  matrix = Numo::SFloat.cast(data)
19
21
  matrix = matrix.expand_dims(1) if matrix.ndim == 1
22
+ validate_matrix!(matrix)
23
+ validate_color_bounds!(vmin:, vmax:)
24
+ render = image_render?(matrix, render:) ? :image : :rects
25
+ matrix = downsample_matrix(matrix, max_cells: 12_000) if render == :rects
20
26
  rows, cols = matrix.shape
21
27
 
22
- width = 800.0
23
- height = 400.0
24
- cell_width = width / [cols, 1].max
25
- cell_height = height / [rows, 1].max
26
- min = matrix.min
27
- max = matrix.max
28
+ width = width.to_f
29
+ height = height.to_f
30
+ min = vmin || matrix.min
31
+ max = vmax || matrix.max
28
32
  range = [max - min, 1.0e-12].max
29
33
 
30
- rects = []
31
- rows.times do |row|
32
- cols.times do |col|
33
- normalized = (matrix[row, col] - min) / range
34
- color = heat_color(normalized)
35
- x = col * cell_width
36
- y = (rows - row - 1) * cell_height
37
- rects << "<rect x='#{x.round(3)}' y='#{y.round(3)}' width='#{cell_width.round(3)}' height='#{cell_height.round(3)}' fill='#{color}' />"
38
- end
39
- end
34
+ content = render == :image ? image_element(matrix, width:, height:, min:, range:, cmap:) : rect_elements(matrix, rows:, cols:, width:, height:, min:, range:, cmap:, y_axis:, x_axis:, sr:, hop_length:)
40
35
 
41
- svg = [
36
+ body = [
37
+ "<g data-x-axis='#{x_axis}' data-y-axis='#{y_axis}' data-sr='#{sr}' data-hop-length='#{hop_length}' data-render='#{render}'>",
38
+ content,
39
+ "</g>"
40
+ ].join
41
+
42
+ svg = if fragment
43
+ body
44
+ else
45
+ [
42
46
  "<svg xmlns='http://www.w3.org/2000/svg' width='#{width.to_i}' height='#{height.to_i}' viewBox='0 0 #{width.to_i} #{height.to_i}'>",
43
47
  "<rect width='100%' height='100%' fill='#0b132b' />",
44
- rects.join,
48
+ body,
45
49
  "</svg>"
46
- ].join
50
+ ].join
51
+ end
47
52
 
48
- File.write(output, svg) if output
53
+ write_output(output, svg) if output
49
54
  svg
50
55
  end
51
56
 
@@ -53,32 +58,68 @@ module Muze
53
58
  # @param sr [Integer]
54
59
  # @param output [String, nil]
55
60
  # @return [String] SVG content
56
- def waveshow(y, sr: 22_050, output: nil)
57
- _ = sr
58
- signal = y.is_a?(Numo::NArray) ? y.to_a : Array(y)
59
- width = 800.0
60
- height = 240.0
61
+ def waveshow(y, sr: 22_050, output: nil, width: 800, height: 240, normalize: true, channels: :overlay)
62
+ raise Muze::ParameterError, "width and height must be positive" unless width.positive? && height.positive?
63
+ raise Muze::ParameterError, "sr must be positive" unless sr.positive?
64
+ raise Muze::ParameterError, "normalize must be true or false" unless [true, false].include?(normalize)
65
+ raise Muze::ParameterError, "channels must be :overlay or :split" unless %i[overlay split].include?(channels)
66
+
67
+ signal = Muze::Core::Audio.validate_audio!(y, allow_empty: true).to_a
68
+ channel_data = signal.first.is_a?(Array) ? transpose_channels(signal) : [signal]
69
+ channel_data = channel_data.map { |channel| normalize ? normalize_wave(channel) : channel }
70
+ width = width.to_f
71
+ height = height.to_f
61
72
  middle = height / 2.0
62
- step = [signal.length.to_f / width, 1.0].max
73
+ paths = channel_data.each_with_index.map do |channel, index|
74
+ top, lane_height = channel_lane(index, channel_data.length, height, channels:)
75
+ envelope_path(channel, width:, top:, height: lane_height)
76
+ end
63
77
 
64
- points = []
65
- x = 0
66
- while x < width
67
- sample_index = [((x * step).floor), signal.length - 1].min
68
- value = signal[sample_index] || 0.0
69
- y_pos = middle - (value * middle * 0.9)
70
- points << "#{x.round(2)},#{y_pos.round(2)}"
71
- x += 1
78
+ svg = [
79
+ "<svg xmlns='http://www.w3.org/2000/svg' width='#{width.to_i}' height='#{height.to_i}' viewBox='0 0 #{width.to_i} #{height.to_i}'>",
80
+ "<rect width='100%' height='100%' fill='#111827' />",
81
+ "<g data-sr='#{sr}' data-channels='#{channels}' transform='translate(0 #{middle * 0.0})'>",
82
+ paths.join,
83
+ "</g>",
84
+ "</svg>"
85
+ ].join
86
+
87
+ write_output(output, svg) if output
88
+ svg
89
+ end
90
+
91
+ # @return [String] SVG content
92
+ def onsetshow(onset_envelope, sr: 22_050, hop_length: 512, output: nil, width: 800, height: 160, normalize: true)
93
+ raise Muze::ParameterError, "width and height must be positive" unless width.positive? && height.positive?
94
+ raise Muze::ParameterError, "sr and hop_length must be positive" unless sr.positive? && hop_length.positive?
95
+ raise Muze::ParameterError, "normalize must be true or false" unless [true, false].include?(normalize)
96
+
97
+ envelope = Numo::SFloat.cast(onset_envelope).to_a.flatten
98
+ raise Muze::ParameterError, "onset envelope must contain only finite values" unless envelope.all? { |value| value.respond_to?(:finite?) && value.finite? }
99
+
100
+ peak = envelope.map(&:abs).max.to_f
101
+ values = normalize && peak.positive? ? envelope.map { |value| value / peak } : envelope
102
+ width = width.to_f
103
+ height = height.to_f
104
+ bar_width = width / [values.length, 1].max
105
+ bars = values.each_with_index.map do |value, index|
106
+ scaled = [[value, 0.0].max, 1.0].min
107
+ bar_height = scaled * height
108
+ x = index * bar_width
109
+ y = height - bar_height
110
+ "<rect x='#{x.round(3)}' y='#{y.round(3)}' width='#{[bar_width, 0.1].max.round(3)}' height='#{bar_height.round(3)}' fill='#22d3ee' />"
72
111
  end
73
112
 
74
113
  svg = [
75
114
  "<svg xmlns='http://www.w3.org/2000/svg' width='#{width.to_i}' height='#{height.to_i}' viewBox='0 0 #{width.to_i} #{height.to_i}'>",
76
115
  "<rect width='100%' height='100%' fill='#111827' />",
77
- "<polyline fill='none' stroke='#22d3ee' stroke-width='1.5' points='#{points.join(' ')}' />",
116
+ "<g data-kind='onset' data-sr='#{sr}' data-hop-length='#{hop_length}'>",
117
+ bars.join,
118
+ "</g>",
78
119
  "</svg>"
79
120
  ].join
80
121
 
81
- File.write(output, svg) if output
122
+ write_output(output, svg) if output
82
123
  svg
83
124
  end
84
125
 
@@ -88,13 +129,238 @@ module Muze
88
129
  end
89
130
  private_class_method :validate_axis!
90
131
 
91
- def heat_color(value)
132
+ def validate_matrix!(matrix)
133
+ return if matrix.to_a.flatten.all? { |value| value.respond_to?(:finite?) && value.finite? }
134
+
135
+ raise Muze::ParameterError, "data must contain only finite numeric values"
136
+ end
137
+ private_class_method :validate_matrix!
138
+
139
+ def validate_color_bounds!(vmin:, vmax:)
140
+ [[:vmin, vmin], [:vmax, vmax]].each do |name, value|
141
+ next if value.nil? || (value.respond_to?(:finite?) && value.finite?)
142
+
143
+ raise Muze::ParameterError, "#{name} must be finite"
144
+ end
145
+ return unless vmin && vmax && vmin > vmax
146
+
147
+ raise Muze::ParameterError, "vmin must be <= vmax"
148
+ end
149
+ private_class_method :validate_color_bounds!
150
+
151
+ def color_for(value, cmap:)
152
+ case cmap
153
+ when :heat then rgb_string(heat_rgb(value))
154
+ when :gray, :grey then rgb_string(gray_rgb(value))
155
+ when :magma then rgb_string(magma_rgb(value))
156
+ else
157
+ raise Muze::ParameterError, "unsupported cmap"
158
+ end
159
+ end
160
+ private_class_method :color_for
161
+
162
+ def color_tuple_for(value, cmap:)
163
+ case cmap
164
+ when :heat then heat_rgb(value)
165
+ when :gray, :grey then gray_rgb(value)
166
+ when :magma then magma_rgb(value)
167
+ else
168
+ raise Muze::ParameterError, "unsupported cmap"
169
+ end
170
+ end
171
+ private_class_method :color_tuple_for
172
+
173
+ def rgb_string(tuple)
174
+ format("rgb(%<r>d,%<g>d,%<b>d)", r: tuple[0], g: tuple[1], b: tuple[2])
175
+ end
176
+ private_class_method :rgb_string
177
+
178
+ def heat_rgb(value)
92
179
  clamped = [[value, 0.0].max, 1.0].min
93
180
  r = (255 * clamped).to_i
94
181
  g = (255 * (1.0 - (clamped - 0.5).abs * 2.0)).to_i
95
182
  b = (255 * (1.0 - clamped)).to_i
96
- format("rgb(%<r>d,%<g>d,%<b>d)", r:, g: [g, 0].max, b:)
183
+ [r, [g, 0].max, b]
184
+ end
185
+ private_class_method :heat_rgb
186
+
187
+ def gray_rgb(value)
188
+ level = (255 * [[value, 0.0].max, 1.0].min).to_i
189
+ [level, level, level]
190
+ end
191
+ private_class_method :gray_rgb
192
+
193
+ def magma_rgb(value)
194
+ clamped = [[value, 0.0].max, 1.0].min
195
+ r = (252 * clamped).to_i
196
+ g = (80 * (clamped**1.5)).to_i
197
+ b = (120 * (1.0 - clamped) + 40).to_i
198
+ [r, g, b]
199
+ end
200
+ private_class_method :magma_rgb
201
+
202
+ def rect_elements(matrix, rows:, cols:, width:, height:, min:, range:, cmap:, y_axis:, x_axis:, sr:, hop_length:)
203
+ rects = []
204
+ rows.times do |row|
205
+ y_top = y_position(row + 1, rows, height, y_axis:, sr:)
206
+ y_bottom = y_position(row, rows, height, y_axis:, sr:)
207
+ cell_height = [y_bottom - y_top, 0.1].max
208
+ cols.times do |col|
209
+ normalized = (matrix[row, col] - min) / range
210
+ color = color_for(normalized, cmap:)
211
+ x = x_position(col, cols, width, hop_length:, sr:, x_axis:)
212
+ next_x = x_position(col + 1, cols, width, hop_length:, sr:, x_axis:)
213
+ rect_width = [next_x - x, 0.1].max
214
+ rects << "<rect x='#{x.round(3)}' y='#{y_top.round(3)}' width='#{rect_width.round(3)}' height='#{cell_height.round(3)}' fill='#{color}' />"
215
+ end
216
+ end
217
+ rects.join
218
+ end
219
+ private_class_method :rect_elements
220
+
221
+ def image_render?(matrix, render:)
222
+ return true if render == :image
223
+ return false if render == :rects
224
+
225
+ matrix.shape.reduce(:*) > 12_000
226
+ end
227
+ private_class_method :image_render?
228
+
229
+ def image_element(matrix, width:, height:, min:, range:, cmap:)
230
+ rows, cols = matrix.shape
231
+ href = "data:image/bmp;base64,#{base64_encode(bmp_bytes(matrix, min:, range:, cmap:))}"
232
+ "<image x='0' y='0' width='#{width.to_i}' height='#{height.to_i}' preserveAspectRatio='none' href='#{href}' />"
233
+ end
234
+ private_class_method :image_element
235
+
236
+ def bmp_bytes(matrix, min:, range:, cmap:)
237
+ rows, cols = matrix.shape
238
+ row_stride = ((cols * 3) + 3) & ~3
239
+ pixel_bytes = row_stride * rows
240
+ file_size = 54 + pixel_bytes
241
+ header = +"BM".b
242
+ header << [file_size, 0, 54].pack("V V V")
243
+ header << [40, cols, rows, 1, 24, 0, pixel_bytes, 2835, 2835, 0, 0].pack("V V V v v V V V V V V")
244
+ padding = "\x00".b * (row_stride - (cols * 3))
245
+ pixels = +""
246
+
247
+ (rows - 1).downto(0) do |row|
248
+ cols.times do |col|
249
+ r, g, b = color_tuple_for((matrix[row, col] - min) / range, cmap:)
250
+ pixels << [b, g, r].pack("C3")
251
+ end
252
+ pixels << padding
253
+ end
254
+
255
+ header << pixels
256
+ end
257
+ private_class_method :bmp_bytes
258
+
259
+ BASE64_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
260
+
261
+ def base64_encode(bytes)
262
+ binary = bytes.b
263
+ output = +""
264
+ index = 0
265
+ while index < binary.bytesize
266
+ chunk = binary.byteslice(index, 3).bytes
267
+ pad = 3 - chunk.length
268
+ chunk += [0] * pad
269
+ value = (chunk[0] << 16) | (chunk[1] << 8) | chunk[2]
270
+ output << BASE64_ALPHABET[(value >> 18) & 0x3f]
271
+ output << BASE64_ALPHABET[(value >> 12) & 0x3f]
272
+ output << (pad >= 2 ? "=" : BASE64_ALPHABET[(value >> 6) & 0x3f])
273
+ output << (pad >= 1 ? "=" : BASE64_ALPHABET[value & 0x3f])
274
+ index += 3
275
+ end
276
+ output
277
+ end
278
+ private_class_method :base64_encode
279
+
280
+ def x_position(col, cols, width, hop_length:, sr:, x_axis:)
281
+ return col * width / [cols, 1].max if x_axis == :frames
282
+
283
+ total_time = [cols * hop_length.to_f / sr, 1.0e-12].max
284
+ (col * hop_length.to_f / sr) * width / total_time
285
+ end
286
+ private_class_method :x_position
287
+
288
+ def y_position(row, rows, height, y_axis:, sr:)
289
+ normalized = case y_axis
290
+ when :linear, :hz
291
+ row.to_f / [rows, 1].max
292
+ when :mel
293
+ hz = row * (sr / 2.0) / [rows, 1].max
294
+ Muze::Filters.hz_to_mel(hz) / Muze::Filters.hz_to_mel(sr / 2.0)
295
+ when :log
296
+ Math.log10(1.0 + (9.0 * row / [rows, 1].max.to_f))
297
+ end
298
+ height - (normalized * height)
299
+ end
300
+ private_class_method :y_position
301
+
302
+ def downsample_matrix(matrix, max_cells:)
303
+ rows, cols = matrix.shape
304
+ return matrix if rows * cols <= max_cells
305
+
306
+ col_step = [(rows * cols / max_cells.to_f).ceil, 1].max
307
+ selected_cols = (0...cols).step(col_step).to_a
308
+ output = Numo::SFloat.zeros(rows, selected_cols.length)
309
+ selected_cols.each_with_index { |col, index| output[true, index] = matrix[true, col] }
310
+ output
311
+ end
312
+ private_class_method :downsample_matrix
313
+
314
+ def transpose_channels(samples)
315
+ channel_count = samples.first.length
316
+ Array.new(channel_count) { |channel| samples.map { |frame| frame[channel] } }
317
+ end
318
+ private_class_method :transpose_channels
319
+
320
+ def normalize_wave(channel)
321
+ peak = channel.map(&:abs).max || 0.0
322
+ return channel if peak <= 0.0
323
+
324
+ channel.map { |value| value / peak }
325
+ end
326
+ private_class_method :normalize_wave
327
+
328
+ def channel_lane(index, count, height, channels:)
329
+ return [0.0, height] if channels == :overlay
330
+
331
+ lane_height = height / count
332
+ [index * lane_height, lane_height]
333
+ end
334
+ private_class_method :channel_lane
335
+
336
+ def envelope_path(channel, width:, top:, height:)
337
+ middle = top + (height / 2.0)
338
+ step = [channel.length.to_f / width, 1.0].max
339
+ segments = []
340
+
341
+ x = 0
342
+ while x < width
343
+ start_index = (x * step).floor
344
+ end_index = [((x + 1) * step).ceil, channel.length].min
345
+ window = channel[start_index...end_index] || [0.0]
346
+ min = window.min || 0.0
347
+ max = window.max || 0.0
348
+ y_min = middle - (min * height * 0.45)
349
+ y_max = middle - (max * height * 0.45)
350
+ segments << "M #{x.round(2)} #{y_min.round(2)} L #{x.round(2)} #{y_max.round(2)}"
351
+ x += 1
352
+ end
353
+
354
+ "<path d='#{segments.join(' ')}' fill='none' stroke='#22d3ee' stroke-width='1.2' />"
355
+ end
356
+ private_class_method :envelope_path
357
+
358
+ def write_output(output, svg)
359
+ path = output.respond_to?(:to_path) ? output.to_path : output
360
+ File.write(path, svg)
361
+ rescue SystemCallError => e
362
+ raise Muze::Error, "Failed to write SVG output #{path}: #{e.message}"
97
363
  end
98
- private_class_method :heat_color
364
+ private_class_method :write_output
99
365
  end
100
366
  end
@@ -11,8 +11,20 @@ module Muze
11
11
  # @param n_fft [Integer]
12
12
  # @param hop_length [Integer]
13
13
  # @return [Array(Numo::SFloat, Numo::SFloat)] harmonic and percussive waveforms
14
- def hpss(y, kernel_size: 31, power: 2.0, margin: 1.0, n_fft: 2048, hop_length: 512)
15
- stft_matrix = Muze.stft(y, n_fft:, hop_length:)
14
+ def hpss(y, kernel_size: 31, power: 2.0, margin: 1.0, n_fft: 2048, hop_length: 512, return_masks: false)
15
+ validate_hpss_params!(kernel_size:, power:, margin:)
16
+ raise Muze::ParameterError, "return_masks must be true or false" unless [true, false].include?(return_masks)
17
+
18
+ signal = Muze::Core::Audio.validate_audio!(y, allow_empty: true)
19
+ return hpss_channels(signal, kernel_size:, power:, margin:, n_fft:, hop_length:, return_masks:) if signal.ndim == 2
20
+
21
+ hpss_mono(signal, kernel_size:, power:, margin:, n_fft:, hop_length:, return_masks:)
22
+ end
23
+
24
+ def hpss_mono(signal, kernel_size:, power:, margin:, n_fft:, hop_length:, return_masks:)
25
+ signal = Numo::SFloat.cast(signal)
26
+
27
+ stft_matrix = Muze.stft(signal, n_fft:, hop_length:)
16
28
  magnitude, = Muze.magphase(stft_matrix)
17
29
 
18
30
  harmonic_median = median_filter(magnitude, kernel_size, axis: 1)
@@ -20,43 +32,96 @@ module Muze
20
32
 
21
33
  harmonic_weight = harmonic_median**power
22
34
  percussive_weight = percussive_median**power
35
+ harmonic_margin, percussive_margin = Array(margin)
36
+ harmonic_margin ||= margin
37
+ percussive_margin ||= harmonic_margin
23
38
 
24
- harmonic_mask = harmonic_weight / (harmonic_weight + (margin * percussive_weight) + 1.0e-12)
25
- percussive_mask = percussive_weight / (percussive_weight + (margin * harmonic_weight) + 1.0e-12)
39
+ harmonic_mask = soft_mask(harmonic_weight, harmonic_weight + (harmonic_margin * percussive_weight), power: 1.0)
40
+ percussive_mask = soft_mask(percussive_weight, percussive_weight + (percussive_margin * harmonic_weight), power: 1.0)
26
41
 
27
42
  harmonic_stft = stft_matrix * harmonic_mask
28
43
  percussive_stft = stft_matrix * percussive_mask
29
44
 
30
- signal = y.is_a?(Numo::NArray) ? y : Numo::SFloat.cast(y)
31
45
  harmonic = Muze.istft(harmonic_stft, hop_length:, length: signal.size)
32
46
  percussive = Muze.istft(percussive_stft, hop_length:, length: signal.size)
47
+ return [harmonic, percussive, harmonic_mask, percussive_mask] if return_masks
48
+
49
+ [harmonic, percussive]
50
+ end
51
+ private_class_method :hpss_mono
52
+
53
+ def hpss_channels(signal, kernel_size:, power:, margin:, n_fft:, hop_length:, return_masks:)
54
+ frames, channels = signal.shape
55
+ harmonic = Numo::SFloat.zeros(frames, channels)
56
+ percussive = Numo::SFloat.zeros(frames, channels)
57
+ harmonic_masks = []
58
+ percussive_masks = []
59
+
60
+ channels.times do |channel|
61
+ result = hpss_mono(
62
+ signal[true, channel],
63
+ kernel_size:,
64
+ power:,
65
+ margin:,
66
+ n_fft:,
67
+ hop_length:,
68
+ return_masks: true
69
+ )
70
+ harmonic[true, channel] = result[0]
71
+ percussive[true, channel] = result[1]
72
+ harmonic_masks << result[2]
73
+ percussive_masks << result[3]
74
+ end
75
+
76
+ return [harmonic, percussive, harmonic_masks, percussive_masks] if return_masks
77
+
33
78
  [harmonic, percussive]
34
79
  end
80
+ private_class_method :hpss_channels
81
+
82
+ def validate_hpss_params!(kernel_size:, power:, margin:)
83
+ raise Muze::ParameterError, "kernel_size must be a positive odd integer" unless kernel_size.is_a?(Integer) && kernel_size.positive? && kernel_size.odd?
84
+ raise Muze::ParameterError, "power must be positive" unless power.positive?
85
+
86
+ margins = Array(margin)
87
+ raise Muze::ParameterError, "margin must be positive or [harmonic_margin, percussive_margin]" unless [1, 2].include?(margins.length)
88
+ return if margins.all? { |value| value.respond_to?(:positive?) && value.positive? }
89
+
90
+ raise Muze::ParameterError, "margin must be positive"
91
+ end
92
+ private_class_method :validate_hpss_params!
93
+
94
+ def soft_mask(numerator, denominator, power:)
95
+ powered_numerator = numerator**power
96
+ powered_denominator = denominator**power
97
+ powered_numerator / (powered_denominator + 1.0e-12)
98
+ end
99
+ private_class_method :soft_mask
35
100
 
36
101
  def median_filter(matrix, kernel_size, axis:)
37
102
  half = kernel_size / 2
38
103
  rows, cols = matrix.shape
39
104
  output = Numo::SFloat.zeros(rows, cols)
40
105
 
41
- rows.times do |row|
106
+ if axis == 1
107
+ rows.times do |row|
108
+ values = cols.times.map { |col| matrix[row, col] }
109
+ sliding_median(values, half).each_with_index { |value, col| output[row, col] = value }
110
+ end
111
+ else
42
112
  cols.times do |col|
43
- values = []
44
- if axis == 1
45
- start_col = [col - half, 0].max
46
- end_col = [col + half, cols - 1].min
47
- (start_col..end_col).each { |index| values << matrix[row, index] }
48
- else
49
- start_row = [row - half, 0].max
50
- end_row = [row + half, rows - 1].min
51
- (start_row..end_row).each { |index| values << matrix[index, col] }
52
- end
53
-
54
- output[row, col] = Muze::Native.median1d(values)
113
+ values = rows.times.map { |row| matrix[row, col] }
114
+ sliding_median(values, half).each_with_index { |value, row| output[row, col] = value }
55
115
  end
56
116
  end
57
117
 
58
118
  output
59
119
  end
60
120
  private_class_method :median_filter
121
+
122
+ def sliding_median(values, half)
123
+ Muze::Native.median_filter1d(values, half)
124
+ end
125
+ private_class_method :sliding_median
61
126
  end
62
127
  end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Muze
4
+ module Effects
5
+ module_function
6
+
7
+ def time_stretch_stream(chunks, rate: 1.0, n_fft: nil, hop_length: nil, method: :phase_vocoder, phase_lock: false, force_phase_vocoder: false, overlap: 2048)
8
+ return enum_for(__method__, chunks, rate:, n_fft:, hop_length:, method:, phase_lock:, force_phase_vocoder:, overlap:) unless block_given?
9
+
10
+ validate_positive_number!(rate, "rate")
11
+ validate_stream_overlap!(overlap)
12
+ stream_effect_chunks(chunks, overlap:) do |working, prefix_frames|
13
+ stretched = time_stretch(working, rate:, n_fft:, hop_length:, method:, phase_lock:, force_phase_vocoder:)
14
+ drop = [(prefix_frames / rate).round, audio_frame_count(stretched)].min
15
+ yield drop_audio_frames(stretched, drop)
16
+ end
17
+ nil
18
+ end
19
+
20
+ def pitch_shift_stream(chunks, sr: 22_050, n_steps: 0, bins_per_octave: 12, res_type: :auto, normalize: false, clip: nil, overlap: 2048)
21
+ return enum_for(__method__, chunks, sr:, n_steps:, bins_per_octave:, res_type:, normalize:, clip:, overlap:) unless block_given?
22
+
23
+ validate_stream_overlap!(overlap)
24
+ stream_effect_chunks(chunks, overlap:) do |working, prefix_frames|
25
+ shifted = pitch_shift(working, sr:, n_steps:, bins_per_octave:, res_type:, normalize:, clip:)
26
+ yield drop_audio_frames(shifted, prefix_frames)
27
+ end
28
+ nil
29
+ end
30
+
31
+ def hpss_stream(chunks, kernel_size: 31, power: 2.0, margin: 1.0, n_fft: 2048, hop_length: 512, overlap: n_fft)
32
+ return enum_for(__method__, chunks, kernel_size:, power:, margin:, n_fft:, hop_length:, overlap:) unless block_given?
33
+
34
+ validate_stream_overlap!(overlap)
35
+ stream_effect_chunks(chunks, overlap:) do |working, prefix_frames|
36
+ harmonic, percussive = hpss(working, kernel_size:, power:, margin:, n_fft:, hop_length:)
37
+ yield drop_audio_frames(harmonic, prefix_frames), drop_audio_frames(percussive, prefix_frames)
38
+ end
39
+ nil
40
+ end
41
+
42
+ def stream_effect_chunks(chunks, overlap:)
43
+ tail = nil
44
+ chunks.each do |chunk|
45
+ signal = Muze::Core::Audio.validate_audio!(chunk, allow_empty: true)
46
+ next if signal.empty?
47
+
48
+ working = tail ? concat_audio(tail, signal) : signal
49
+ prefix_frames = tail ? audio_frame_count(tail) : 0
50
+ yield working, prefix_frames
51
+ tail = overlap.positive? ? take_audio_tail(working, overlap) : nil
52
+ end
53
+ end
54
+ private_class_method :stream_effect_chunks
55
+
56
+ def audio_frame_count(signal)
57
+ signal.ndim == 2 ? signal.shape[0] : signal.size
58
+ end
59
+ private_class_method :audio_frame_count
60
+
61
+ def concat_audio(left, right)
62
+ return right if left.nil? || left.empty?
63
+ return left if right.empty?
64
+
65
+ if left.ndim == 2 || right.ndim == 2
66
+ raise Muze::ParameterError, "chunk channel counts must match" unless left.ndim == 2 && right.ndim == 2 && left.shape[1] == right.shape[1]
67
+
68
+ output = Numo::SFloat.zeros(left.shape[0] + right.shape[0], left.shape[1])
69
+ output[0...left.shape[0], true] = left
70
+ output[left.shape[0]...(left.shape[0] + right.shape[0]), true] = right
71
+ return output
72
+ end
73
+
74
+ Numo::SFloat.cast(left.to_a + right.to_a)
75
+ end
76
+ private_class_method :concat_audio
77
+
78
+ def take_audio_tail(signal, count)
79
+ frames = audio_frame_count(signal)
80
+ start = [frames - count, 0].max
81
+ drop_audio_frames(signal, start)
82
+ end
83
+ private_class_method :take_audio_tail
84
+
85
+ def drop_audio_frames(signal, count)
86
+ frames = audio_frame_count(signal)
87
+ start = [[count, 0].max, frames].min
88
+ return signal[start...frames, true] if signal.ndim == 2
89
+
90
+ signal[start...frames]
91
+ end
92
+ private_class_method :drop_audio_frames
93
+
94
+ def validate_stream_overlap!(overlap)
95
+ return if overlap.is_a?(Integer) && overlap >= 0
96
+
97
+ raise Muze::ParameterError, "overlap must be a non-negative integer"
98
+ end
99
+ private_class_method :validate_stream_overlap!
100
+ end
101
+ end