muze 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -1
  3. data/README.md +5 -0
  4. data/Rakefile +3 -0
  5. data/ext/muze/muze_ext.c +129 -12
  6. data/lib/muze/beat/beat_track.rb +93 -11
  7. data/lib/muze/core/audio.rb +129 -0
  8. data/lib/muze/core/cache.rb +38 -0
  9. data/lib/muze/core/dct.rb +24 -21
  10. data/lib/muze/core/frames.rb +31 -0
  11. data/lib/muze/core/matrix.rb +23 -0
  12. data/lib/muze/core/resample.rb +111 -19
  13. data/lib/muze/core/stft.rb +312 -52
  14. data/lib/muze/core/windows.rb +113 -17
  15. data/lib/muze/display/specshow.rb +307 -41
  16. data/lib/muze/effects/harmonic_percussive.rb +83 -18
  17. data/lib/muze/effects/streaming.rb +101 -0
  18. data/lib/muze/effects/time_stretch.rb +353 -36
  19. data/lib/muze/feature/aggregation.rb +49 -0
  20. data/lib/muze/feature/chroma.rb +43 -15
  21. data/lib/muze/feature/context.rb +81 -0
  22. data/lib/muze/feature/mfcc.rb +78 -38
  23. data/lib/muze/feature/spectral.rb +258 -39
  24. data/lib/muze/filters/chroma_filter.rb +21 -2
  25. data/lib/muze/filters/mel.rb +47 -1
  26. data/lib/muze/io/audio_loader/ffmpeg_backend.rb +179 -15
  27. data/lib/muze/io/audio_loader/wavify_backend.rb +118 -11
  28. data/lib/muze/io/audio_loader.rb +178 -48
  29. data/lib/muze/io/audio_writer.rb +48 -0
  30. data/lib/muze/native.rb +91 -8
  31. data/lib/muze/onset/onset_detect.rb +114 -23
  32. data/lib/muze/version.rb +1 -1
  33. data/lib/muze.rb +237 -60
  34. metadata +11 -21
  35. data/benchmarks/baseline.json +0 -24
  36. data/benchmarks/native_vs_ruby.rb +0 -23
  37. data/benchmarks/quality_metrics.rb +0 -265
  38. data/benchmarks/quality_thresholds.md +0 -28
  39. data/benchmarks/support/fixture_library.rb +0 -107
@@ -9,31 +9,40 @@ module Muze
9
9
  module AudioLoader
10
10
  module_function
11
11
 
12
- SUPPORTED_FORMATS = %w[wav flac mp3 ogg].freeze
13
-
14
- # @param path [String]
15
- # @param sr [Integer] destination sample rate
16
- # @param mono [Boolean]
12
+ BACKENDS = [
13
+ WavifyBackend,
14
+ FFMPEGBackend
15
+ ].freeze
16
+ SUPPORTED_FORMATS = BACKENDS.flat_map { |backend| backend::SUPPORTED_EXTENSIONS }.map { |ext| ext.delete_prefix(".") }.sort.freeze
17
+
18
+ # @param path [String, Pathname, IO]
19
+ # @param sr [Integer, nil] destination sample rate; nil preserves source rate
20
+ # @param mono [Boolean, Symbol]
17
21
  # @param offset [Float] seconds from start
18
22
  # @param duration [Float, nil] duration in seconds
23
+ # @param dtype [Class, Symbol]
24
+ # @param normalize [Boolean]
25
+ # @param format [Symbol, String, nil]
26
+ # @param weights [Array<Float>, nil]
27
+ # @param max_bytes [Integer, nil]
19
28
  # @return [Array(Numo::SFloat, Integer)] waveform and sample rate
20
- def load(path, sr: 22_050, mono: true, offset: 0.0, duration: nil)
21
- validate_args!(sr:, offset:, duration:)
22
- raise Muze::AudioLoadError, "File not found: #{path}" unless File.exist?(path)
23
-
24
- backend = select_backend(path)
25
- raw_samples, source_sr, _channels = backend.read(path)
26
- sliced = slice_by_time(raw_samples, source_sr, offset:, duration:)
27
-
28
- signal = if mono
29
- downmix_to_mono(sliced)
30
- else
31
- sliced
32
- end
33
-
34
- resampled = resample(signal, source_sr, sr)
35
- [Numo::SFloat.cast(resampled), sr]
36
- rescue Muze::AudioLoadError
29
+ def load(path, sr: 22_050, mono: true, offset: 0.0, duration: nil, dtype: Numo::SFloat, normalize: false, format: nil, weights: nil, max_bytes: nil)
30
+ source = resolve_source(path, format:)
31
+ validate_args!(sr:, mono:, offset:, duration:, dtype:, normalize:, weights:, max_bytes:)
32
+ validate_source_size!(source, max_bytes:)
33
+
34
+ backend = select_backend(source)
35
+ raw_samples, source_sr, _channels = backend.read(source.fetch(:input), offset:, duration:)
36
+ sliced = backend.applies_time_window? ? raw_samples : slice_by_time(raw_samples, source_sr, offset:, duration:)
37
+
38
+ signal = downmix(sliced, mono:, weights:)
39
+ target_sr = sr || source_sr
40
+
41
+ resampled = resample(signal, source_sr, target_sr)
42
+ output = cast_signal(resampled, dtype)
43
+ output = normalize_signal(output) if normalize
44
+ [output, target_sr]
45
+ rescue Muze::AudioLoadError, Muze::ParameterError
37
46
  raise
38
47
  rescue Muze::UnsupportedFormatError, Muze::DependencyError => e
39
48
  raise Muze::AudioLoadError, e.message
@@ -41,27 +50,110 @@ module Muze
41
50
  raise Muze::AudioLoadError, "Failed to load #{path}: #{e.message}"
42
51
  end
43
52
 
44
- def validate_args!(sr:, offset:, duration:)
45
- raise Muze::ParameterError, "sr must be positive" unless sr.is_a?(Integer) && sr.positive?
53
+ # @param path [String, Pathname, IO]
54
+ # @param chunk_frames [Integer]
55
+ # @yieldparam chunk [Numo::SFloat, Numo::DFloat]
56
+ # @yieldparam sample_rate [Integer]
57
+ # @return [Enumerator, nil]
58
+ def load_stream(path, sr: nil, mono: true, offset: 0.0, duration: nil, dtype: Numo::SFloat, format: nil, weights: nil, max_bytes: nil, chunk_frames: 16_384)
59
+ return enum_for(__method__, path, sr:, mono:, offset:, duration:, dtype:, format:, weights:, max_bytes:, chunk_frames:) unless block_given?
60
+
61
+ source = resolve_source(path, format:)
62
+ validate_args!(sr:, mono:, offset:, duration:, dtype:, normalize: false, weights:, max_bytes:)
63
+ validate_stream_args!(chunk_frames:)
64
+ validate_source_size!(source, max_bytes:)
65
+
66
+ backend = select_backend(source)
67
+ backend.read_stream(source.fetch(:input), chunk_frames:, offset:, duration:) do |raw_samples, source_sr, _channels|
68
+ target_sr = sr || source_sr
69
+ signal = downmix(raw_samples, mono:, weights:)
70
+ resampled = resample(signal, source_sr, target_sr)
71
+ yield cast_signal(resampled, dtype), target_sr unless resampled.empty?
72
+ end
73
+ nil
74
+ rescue Muze::AudioLoadError, Muze::ParameterError
75
+ raise
76
+ rescue Muze::UnsupportedFormatError, Muze::DependencyError => e
77
+ raise Muze::AudioLoadError, e.message
78
+ rescue StandardError => e
79
+ raise Muze::AudioLoadError, "Failed to stream #{path}: #{e.message}"
80
+ end
81
+
82
+ # @param path [String, Pathname, IO]
83
+ # @return [Hash]
84
+ def info(path, format: nil)
85
+ source = resolve_source(path, format:)
86
+
87
+ select_backend(source).info(source.fetch(:input))
88
+ rescue Muze::AudioLoadError
89
+ raise
90
+ rescue Muze::UnsupportedFormatError, Muze::DependencyError => e
91
+ raise Muze::AudioLoadError, e.message
92
+ rescue StandardError => e
93
+ raise Muze::AudioLoadError, "Failed to inspect #{path}: #{e.message}"
94
+ end
95
+
96
+ def resolve_source(path, format:)
97
+ resolved = path.respond_to?(:to_path) ? path.to_path : path
98
+ if resolved.is_a?(String)
99
+ raise Muze::AudioLoadError, "File not found: #{resolved}" unless File.exist?(resolved)
100
+
101
+ return { input: resolved, extension: normalized_extension(format || File.extname(resolved)), path?: true }
102
+ end
103
+
104
+ return { input: resolved, extension: normalized_extension(format || :wav), path?: false } if resolved.respond_to?(:read)
105
+
106
+ raise Muze::AudioLoadError, "Audio source must be a String, Pathname, or IO-like object"
107
+ end
108
+ private_class_method :resolve_source
109
+
110
+ def normalized_extension(format)
111
+ extension = format.to_s
112
+ extension = ".#{extension}" unless extension.start_with?(".")
113
+ extension.downcase
114
+ end
115
+ private_class_method :normalized_extension
116
+
117
+ def validate_args!(sr:, mono:, offset:, duration:, dtype:, normalize:, weights:, max_bytes:)
118
+ raise Muze::ParameterError, "sr must be positive or nil" unless sr.nil? || (sr.is_a?(Integer) && sr.positive?)
119
+ raise Muze::ParameterError, "mono must be true, false, :mean, :left, :right, or :weighted" unless [true, false, :mean, :left, :right, :weighted].include?(mono)
46
120
  raise Muze::ParameterError, "offset must be >= 0" if offset.negative?
121
+ raise Muze::ParameterError, "normalize must be true or false" unless [true, false].include?(normalize)
122
+ raise Muze::ParameterError, "weights must be an Array when mono is :weighted" if mono == :weighted && !weights.is_a?(Array)
123
+ raise Muze::ParameterError, "max_bytes must be positive" if max_bytes && (!max_bytes.is_a?(Integer) || max_bytes <= 0)
124
+ dtype_class(dtype)
47
125
  return if duration.nil? || duration.positive?
48
126
 
49
127
  raise Muze::ParameterError, "duration must be positive"
50
128
  end
51
129
  private_class_method :validate_args!
52
130
 
53
- def select_backend(path)
54
- extension = File.extname(path).downcase
131
+ def validate_stream_args!(chunk_frames:)
132
+ return if chunk_frames.is_a?(Integer) && chunk_frames.positive?
55
133
 
56
- if WavifyBackend.supported_extension?(extension)
57
- WavifyBackend
58
- elsif FFMPEGBackend.supported_extension?(extension)
59
- raise Muze::DependencyError, FFMPEGBackend.installation_message(extension) unless FFMPEGBackend.available?
134
+ raise Muze::ParameterError, "chunk_frames must be a positive integer"
135
+ end
136
+ private_class_method :validate_stream_args!
60
137
 
61
- FFMPEGBackend
62
- else
63
- raise Muze::UnsupportedFormatError, unsupported_format_message(extension)
138
+ def validate_source_size!(source, max_bytes:)
139
+ return unless max_bytes && source.fetch(:path?)
140
+
141
+ size = File.size(source.fetch(:input))
142
+ raise Muze::AudioLoadError, "Audio file is too large (#{size} bytes > #{max_bytes} bytes)" if size > max_bytes
143
+ end
144
+ private_class_method :validate_source_size!
145
+
146
+ def select_backend(source)
147
+ extension = source.fetch(:extension)
148
+ backend = BACKENDS.find { |candidate| candidate.supported_extension?(extension) }
149
+
150
+ raise Muze::UnsupportedFormatError, unsupported_format_message(extension) unless backend
151
+ if !source.fetch(:path?) && backend != WavifyBackend
152
+ raise Muze::UnsupportedFormatError, "IO/StringIO loading is currently supported for WAV input only"
64
153
  end
154
+ raise Muze::DependencyError, backend.installation_message(extension) unless backend.available?
155
+
156
+ backend
65
157
  end
66
158
  private_class_method :select_backend
67
159
 
@@ -85,6 +177,31 @@ module Muze
85
177
  end
86
178
  private_class_method :slice_by_time
87
179
 
180
+ def downmix(samples, mono:, weights:)
181
+ return samples if mono == false
182
+ return downmix_to_mono(samples) if mono == true || mono == :mean
183
+ return downmix_weighted(samples, weights) if mono == :weighted
184
+ return samples unless samples.first.is_a?(Array)
185
+
186
+ channel_index = mono == :left ? 0 : samples.first.length - 1
187
+ samples.map { |frame| frame.fetch(channel_index) }
188
+ end
189
+ private_class_method :downmix
190
+
191
+ def downmix_weighted(samples, weights)
192
+ return samples if samples.empty?
193
+ return samples unless samples.first.is_a?(Array)
194
+ raise Muze::ParameterError, "weights length must match channel count" unless weights.length == samples.first.length
195
+
196
+ weight_sum = weights.sum(0.0)
197
+ raise Muze::ParameterError, "weights must not sum to zero" if weight_sum.abs <= 1.0e-12
198
+
199
+ samples.map do |frame|
200
+ frame.each_with_index.sum { |sample, index| sample * weights[index] } / weight_sum
201
+ end
202
+ end
203
+ private_class_method :downmix_weighted
204
+
88
205
  def downmix_to_mono(samples)
89
206
  return samples if samples.empty?
90
207
  return samples unless samples.first.is_a?(Array)
@@ -94,24 +211,37 @@ module Muze
94
211
  private_class_method :downmix_to_mono
95
212
 
96
213
  def resample(samples, source_sr, target_sr)
97
- if samples.empty?
98
- []
99
- elsif samples.first.is_a?(Array)
100
- channel_count = samples.first.length
101
- channels = Array.new(channel_count) { [] }
102
- samples.each { |frame| frame.each_with_index { |sample, index| channels[index] << sample } }
103
-
104
- resampled_channels = channels.map do |channel_data|
105
- Muze::Core::Resample.resample(channel_data, orig_sr: source_sr, target_sr: target_sr, res_type: :sinc).to_a
106
- end
107
-
108
- target_length = resampled_channels.first.length
109
- Array.new(target_length) { |idx| resampled_channels.map { |channel| channel[idx] } }
214
+ return [] if samples.empty?
215
+
216
+ Muze::Core::Resample.resample(samples, orig_sr: source_sr, target_sr: target_sr, res_type: :sinc)
217
+ end
218
+ private_class_method :resample
219
+
220
+ def cast_signal(signal, dtype)
221
+ dtype_class(dtype).cast(signal)
222
+ end
223
+ private_class_method :cast_signal
224
+
225
+ def dtype_class(dtype)
226
+ case dtype
227
+ when :sfloat, :float32 then Numo::SFloat
228
+ when :dfloat, :float64 then Numo::DFloat
110
229
  else
111
- Muze::Core::Resample.resample(samples, orig_sr: source_sr, target_sr: target_sr, res_type: :sinc).to_a
230
+ return Numo::SFloat if dtype == Numo::SFloat
231
+ return Numo::DFloat if dtype == Numo::DFloat
232
+
233
+ raise Muze::ParameterError, "dtype must be :sfloat, :float32, :dfloat, :float64, Numo::SFloat, or Numo::DFloat"
112
234
  end
113
235
  end
114
- private_class_method :resample
236
+ private_class_method :dtype_class
237
+
238
+ def normalize_signal(signal)
239
+ peak = signal.abs.max
240
+ return signal if peak <= 0.0
241
+
242
+ (signal / peak).cast_to(signal.class)
243
+ end
244
+ private_class_method :normalize_signal
115
245
  end
116
246
  end
117
247
  end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "wavify/core/format"
4
+ require "wavify/core/sample_buffer"
5
+ require "wavify/codecs/base"
6
+ require "wavify/codecs/wav"
7
+
8
+ module Muze
9
+ module IO
10
+ # WAV writer for lightweight effects/analysis result inspection.
11
+ module AudioWriter
12
+ module_function
13
+
14
+ def write(path, y, sr:, normalize: false, format: :wav)
15
+ raise Muze::ParameterError, "sr must be positive" unless sr.is_a?(Integer) && sr.positive?
16
+ raise Muze::ParameterError, "normalize must be true or false" unless [true, false].include?(normalize)
17
+
18
+ format_label = format.to_s.downcase.to_sym
19
+ raise Muze::UnsupportedFormatError, "only WAV output is supported" unless %i[wav wave].include?(format_label)
20
+
21
+ signal = Muze::Core::Audio.validate_audio!(y, allow_empty: true)
22
+ signal = Muze::Core::Audio.normalize(signal) if normalize
23
+ channels = signal.ndim == 2 ? signal.shape[1] : 1
24
+ samples = flatten_samples(signal)
25
+ sample_format = Wavify::Core::Format.new(channels:, sample_rate: sr, bit_depth: 32, sample_format: :float)
26
+ buffer = Wavify::Core::SampleBuffer.new(samples, sample_format)
27
+ Wavify::Codecs::Wav.write(output_target(path), buffer)
28
+ path
29
+ rescue Muze::Error
30
+ raise
31
+ rescue SystemCallError, Wavify::Error => e
32
+ raise Muze::AudioLoadError, "Failed to write WAV output #{path}: #{e.message}"
33
+ end
34
+
35
+ def flatten_samples(signal)
36
+ return signal.to_a if signal.ndim == 1
37
+
38
+ signal.to_a.flat_map { |frame| frame.respond_to?(:to_a) ? frame.to_a : Array(frame) }
39
+ end
40
+ private_class_method :flatten_samples
41
+
42
+ def output_target(path)
43
+ path.respond_to?(:to_path) ? path.to_path : path
44
+ end
45
+ private_class_method :output_target
46
+ end
47
+ end
48
+ end
data/lib/muze/native.rb CHANGED
@@ -5,12 +5,16 @@ module Muze
5
5
  module Native
6
6
  module_function
7
7
 
8
- begin
9
- require "muze/muze_ext"
10
- EXTENSION_LOADED = true
11
- rescue LoadError
12
- EXTENSION_LOADED = false
13
- end
8
+ EXTENSION_LOADED = if ENV.fetch("MUZE_DISABLE_NATIVE", "0") == "1"
9
+ false
10
+ else
11
+ begin
12
+ require "muze/muze_ext"
13
+ true
14
+ rescue LoadError
15
+ false
16
+ end
17
+ end
14
18
 
15
19
  # @return [Boolean]
16
20
  def extension_loaded?
@@ -23,6 +27,11 @@ module Muze
23
27
  # @param hop_length [Integer]
24
28
  # @return [Array<Array<Float>>]
25
29
  def frame_slices(signal, frame_length, hop_length)
30
+ raise Muze::ParameterError, "signal must be an Array" unless signal.is_a?(Array)
31
+ unless frame_length.is_a?(Integer) && hop_length.is_a?(Integer) && frame_length.positive? && hop_length.positive?
32
+ raise Muze::ParameterError, "frame_length and hop_length must be positive"
33
+ end
34
+
26
35
  if signal.length <= frame_length
27
36
  return [signal + Array.new(frame_length - signal.length, 0.0)]
28
37
  end
@@ -37,9 +46,83 @@ module Muze
37
46
  # @param values [Array<Float>]
38
47
  # @return [Float]
39
48
  def median1d(values)
40
- sorted = values.sort
41
- sorted[sorted.length / 2] || 0.0
49
+ raise Muze::ParameterError, "values must be an Array" unless values.is_a?(Array)
50
+ return 0.0 if values.empty?
51
+
52
+ copy = values.map(&:to_f)
53
+ quickselect!(copy, copy.length / 2)
54
+ end
55
+
56
+ # @param values [Array<Float>]
57
+ # @param half [Integer]
58
+ # @return [Array<Float>]
59
+ def median_filter1d(values, half)
60
+ raise Muze::ParameterError, "values must be an Array" unless values.is_a?(Array)
61
+ raise Muze::ParameterError, "half must be non-negative" unless half.is_a?(Integer) && half >= 0
62
+ return [] if values.empty?
63
+
64
+ window = []
65
+ output = Array.new(values.length, 0.0)
66
+ values.length.times do |index|
67
+ remove_sorted_value(window, values[index - half - 1]) if index > half
68
+ entering = index + half
69
+ insert_sorted_value(window, values[entering]) if entering < values.length
70
+ output[index] = window[window.length / 2].to_f
71
+ end
72
+
73
+ output
74
+ end
75
+
76
+ def quickselect!(values, target)
77
+ left = 0
78
+ right = values.length - 1
79
+
80
+ loop do
81
+ return values[left] if left == right
82
+
83
+ pivot_index = partition!(values, left, right, (left + right) / 2)
84
+ if target == pivot_index
85
+ return values[target]
86
+ elsif target < pivot_index
87
+ right = pivot_index - 1
88
+ else
89
+ left = pivot_index + 1
90
+ end
91
+ end
92
+ end
93
+ private_class_method :quickselect!
94
+
95
+ def partition!(values, left, right, pivot_index)
96
+ pivot = values[pivot_index]
97
+ values[pivot_index], values[right] = values[right], values[pivot_index]
98
+ store_index = left
99
+
100
+ (left...right).each do |index|
101
+ next unless values[index] < pivot
102
+
103
+ values[store_index], values[index] = values[index], values[store_index]
104
+ store_index += 1
105
+ end
106
+
107
+ values[right], values[store_index] = values[store_index], values[right]
108
+ store_index
109
+ end
110
+ private_class_method :partition!
111
+
112
+ def insert_sorted_value(sorted, value)
113
+ index = sorted.bsearch_index { |item| item > value } || sorted.length
114
+ sorted.insert(index, value)
115
+ end
116
+ private_class_method :insert_sorted_value
117
+
118
+ def remove_sorted_value(sorted, value)
119
+ index = sorted.bsearch_index { |item| item >= value }
120
+ return unless index
121
+
122
+ index += 1 while index < sorted.length && sorted[index] != value
123
+ sorted.delete_at(index) if index < sorted.length
42
124
  end
125
+ private_class_method :remove_sorted_value
43
126
  end
44
127
  end
45
128
  end
@@ -11,24 +11,38 @@ module Muze
11
11
  # @param hop_length [Integer]
12
12
  # @param n_fft [Integer]
13
13
  # @return [Numo::SFloat] onset envelope per frame
14
- def onset_strength(y: nil, sr: 22_050, s: nil, hop_length: 512, n_fft: 2048)
14
+ def onset_strength(y: nil, sr: 22_050, s: nil, hop_length: 512, n_fft: 2048, lag: 1, log: false, max_size: 1, normalize: false)
15
+ validate_positive_integer!(sr, "sr")
16
+ validate_positive_integer!(hop_length, "hop_length")
17
+ validate_positive_integer!(n_fft, "n_fft")
18
+ validate_positive_integer!(lag, "lag")
19
+ validate_positive_integer!(max_size, "max_size")
20
+ raise Muze::ParameterError, "log must be true or false" unless [true, false].include?(log)
21
+ raise Muze::ParameterError, "normalize must be true or false" unless [true, false].include?(normalize)
22
+
15
23
  spectrum = if s
16
- Numo::SFloat.cast(s)
24
+ provided = Numo::SFloat.cast(s)
25
+ validate_finite_array!(provided.to_a.flatten, "s")
26
+ provided
17
27
  else
18
28
  Muze::Feature.melspectrogram(y:, sr:, n_fft:, hop_length:, n_mels: 40)
19
29
  end
20
30
 
21
31
  spectrum = spectrum.expand_dims(1) if spectrum.ndim == 1
32
+ spectrum = Muze.power_to_db(spectrum, ref: :max) if log
33
+ spectrum = local_max_filter(spectrum, max_size:) if max_size > 1
22
34
  _, frames = spectrum.shape
23
35
  envelope = Numo::SFloat.zeros(frames)
24
36
 
25
37
  frames.times do |frame_index|
26
- next if frame_index.zero?
38
+ next if frame_index < lag
27
39
 
28
- delta = spectrum[true, frame_index] - spectrum[true, frame_index - 1]
40
+ delta = spectrum[true, frame_index] - spectrum[true, frame_index - lag]
29
41
  envelope[frame_index] = delta.clip(0.0, Float::INFINITY).sum
30
42
  end
31
43
 
44
+ peak = envelope.max
45
+ envelope = envelope / peak if normalize && peak.positive?
32
46
  envelope
33
47
  end
34
48
 
@@ -39,29 +53,31 @@ module Muze
39
53
  # @param backtrack [Boolean]
40
54
  # @param units [Symbol] :frames, :samples, or :time
41
55
  # @return [Array<Integer, Float>]
42
- def onset_detect(y: nil, sr: 22_050, onset_envelope: nil, hop_length: 512, backtrack: false, units: :frames)
56
+ def onset_detect(y: nil, sr: 22_050, onset_envelope: nil, hop_length: 512, backtrack: false, units: :frames, pre_max: 1, post_max: 1, pre_avg: 1, post_avg: 1, delta: nil, wait: 0, adaptive: false, energy: nil)
57
+ validate_positive_integer!(sr, "sr")
58
+ validate_positive_integer!(hop_length, "hop_length")
59
+ validate_peak_picker_args!(pre_max:, post_max:, pre_avg:, post_avg:, wait:, delta:)
60
+ raise Muze::ParameterError, "backtrack must be true or false" unless [true, false].include?(backtrack)
61
+ raise Muze::ParameterError, "adaptive must be true or false" unless [true, false].include?(adaptive)
62
+
43
63
  envelope = if onset_envelope
44
64
  onset_envelope.is_a?(Numo::NArray) ? onset_envelope.to_a : Array(onset_envelope)
45
65
  else
46
- onset_strength(y:, sr:, hop_length:).to_a
66
+ onset_strength(y:, sr:, hop_length:).to_a
47
67
  end
68
+ validate_finite_array!(envelope, "onset_envelope")
48
69
 
49
70
  return [] if envelope.length < 3
50
71
 
51
- threshold = detection_threshold(envelope)
52
- peaks = detect_peaks(envelope, threshold)
53
- peaks = backtrack_onsets(envelope, peaks) if backtrack
54
-
55
- case units
56
- when :frames
57
- peaks
58
- when :samples
59
- peaks.map { |frame| frame * hop_length }
60
- when :time
61
- peaks.map { |frame| frame * hop_length.to_f / sr }
62
- else
63
- raise Muze::ParameterError, "units must be :frames, :samples, or :time"
72
+ threshold = delta || detection_threshold(envelope)
73
+ peaks = detect_peaks(envelope, threshold, pre_max:, post_max:, pre_avg:, post_avg:, wait:, adaptive:)
74
+ if backtrack
75
+ energy_curve = energy ? Array(energy) : envelope
76
+ validate_finite_array!(energy_curve, "energy")
77
+ peaks = backtrack_onsets(energy_curve, peaks)
64
78
  end
79
+
80
+ convert_units(peaks, units:, sr:, hop_length:)
65
81
  end
66
82
 
67
83
  def detection_threshold(envelope)
@@ -71,14 +87,22 @@ module Muze
71
87
  end
72
88
  private_class_method :detection_threshold
73
89
 
74
- def detect_peaks(envelope, threshold)
90
+ def detect_peaks(envelope, threshold, pre_max:, post_max:, pre_avg:, post_avg:, wait:, adaptive:)
75
91
  peaks = []
92
+ last_peak = -Float::INFINITY
76
93
  (1...(envelope.length - 1)).each do |index|
77
- next unless envelope[index] >= threshold
78
- next unless envelope[index] > envelope[index - 1]
79
- next unless envelope[index] >= envelope[index + 1]
94
+ local_max_start = [index - pre_max, 0].max
95
+ local_max_end = [index + post_max, envelope.length - 1].min
96
+ local_avg_start = [index - pre_avg, 0].max
97
+ local_avg_end = [index + post_avg, envelope.length - 1].min
98
+ local_threshold = adaptive ? average(envelope[local_avg_start..local_avg_end]) + threshold : threshold
99
+
100
+ next unless envelope[index] >= local_threshold
101
+ next unless envelope[index] >= envelope[local_max_start..local_max_end].max
102
+ next if index <= last_peak + wait
80
103
 
81
104
  peaks << index
105
+ last_peak = index
82
106
  end
83
107
  peaks
84
108
  end
@@ -93,5 +117,72 @@ module Muze
93
117
  end.uniq
94
118
  end
95
119
  private_class_method :backtrack_onsets
120
+
121
+ def convert_units(peaks, units:, sr:, hop_length:)
122
+ case units
123
+ when :frames
124
+ peaks
125
+ when :samples
126
+ peaks.map { |frame| frame * hop_length }
127
+ when :time
128
+ peaks.map { |frame| frame * hop_length.to_f / sr }
129
+ else
130
+ raise Muze::ParameterError, "units must be :frames, :samples, or :time"
131
+ end
132
+ end
133
+ private_class_method :convert_units
134
+
135
+ def local_max_filter(spectrum, max_size:)
136
+ rows, cols = spectrum.shape
137
+ half = max_size / 2
138
+ output = Numo::SFloat.zeros(rows, cols)
139
+
140
+ rows.times do |row|
141
+ cols.times do |col|
142
+ start_col = [col - half, 0].max
143
+ end_col = [col + half, cols - 1].min
144
+ output[row, col] = spectrum[row, start_col..end_col].max
145
+ end
146
+ end
147
+ output
148
+ end
149
+ private_class_method :local_max_filter
150
+
151
+ def average(values)
152
+ values.sum(0.0) / values.length
153
+ end
154
+ private_class_method :average
155
+
156
+ def validate_peak_picker_args!(pre_max:, post_max:, pre_avg:, post_avg:, wait:, delta:)
157
+ {
158
+ pre_max: pre_max,
159
+ post_max: post_max,
160
+ pre_avg: pre_avg,
161
+ post_avg: post_avg,
162
+ wait: wait
163
+ }.each do |label, value|
164
+ next if value.is_a?(Integer) && !value.negative?
165
+
166
+ raise Muze::ParameterError, "#{label} must be a non-negative integer"
167
+ end
168
+ return if delta.nil? || (delta.respond_to?(:finite?) && delta.finite? && !delta.negative?)
169
+
170
+ raise Muze::ParameterError, "delta must be non-negative"
171
+ end
172
+ private_class_method :validate_peak_picker_args!
173
+
174
+ def validate_positive_integer!(value, label)
175
+ return if value.is_a?(Integer) && value.positive?
176
+
177
+ raise Muze::ParameterError, "#{label} must be a positive integer"
178
+ end
179
+ private_class_method :validate_positive_integer!
180
+
181
+ def validate_finite_array!(values, label)
182
+ return if values.all? { |value| value.respond_to?(:finite?) && value.finite? }
183
+
184
+ raise Muze::ParameterError, "#{label} must contain only finite numeric values"
185
+ end
186
+ private_class_method :validate_finite_array!
96
187
  end
97
188
  end
data/lib/muze/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Muze
4
- VERSION = "0.1.0"
4
+ VERSION = "1.0.0"
5
5
  end