muze 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -1
- data/README.md +5 -0
- data/Rakefile +3 -0
- data/ext/muze/muze_ext.c +129 -12
- data/lib/muze/beat/beat_track.rb +93 -11
- data/lib/muze/core/audio.rb +129 -0
- data/lib/muze/core/cache.rb +38 -0
- data/lib/muze/core/dct.rb +24 -21
- data/lib/muze/core/frames.rb +31 -0
- data/lib/muze/core/matrix.rb +23 -0
- data/lib/muze/core/resample.rb +111 -19
- data/lib/muze/core/stft.rb +312 -52
- data/lib/muze/core/windows.rb +113 -17
- data/lib/muze/display/specshow.rb +307 -41
- data/lib/muze/effects/harmonic_percussive.rb +83 -18
- data/lib/muze/effects/streaming.rb +101 -0
- data/lib/muze/effects/time_stretch.rb +353 -36
- data/lib/muze/feature/aggregation.rb +49 -0
- data/lib/muze/feature/chroma.rb +43 -15
- data/lib/muze/feature/context.rb +81 -0
- data/lib/muze/feature/mfcc.rb +78 -38
- data/lib/muze/feature/spectral.rb +258 -39
- data/lib/muze/filters/chroma_filter.rb +21 -2
- data/lib/muze/filters/mel.rb +47 -1
- data/lib/muze/io/audio_loader/ffmpeg_backend.rb +179 -15
- data/lib/muze/io/audio_loader/wavify_backend.rb +118 -11
- data/lib/muze/io/audio_loader.rb +178 -48
- data/lib/muze/io/audio_writer.rb +48 -0
- data/lib/muze/native.rb +91 -8
- data/lib/muze/onset/onset_detect.rb +114 -23
- data/lib/muze/version.rb +1 -1
- data/lib/muze.rb +237 -60
- metadata +11 -21
- data/benchmarks/baseline.json +0 -24
- data/benchmarks/native_vs_ruby.rb +0 -23
- data/benchmarks/quality_metrics.rb +0 -265
- data/benchmarks/quality_thresholds.md +0 -28
- data/benchmarks/support/fixture_library.rb +0 -107
data/lib/muze/io/audio_loader.rb
CHANGED
|
@@ -9,31 +9,40 @@ module Muze
|
|
|
9
9
|
module AudioLoader
|
|
10
10
|
module_function
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
12
|
+
BACKENDS = [
|
|
13
|
+
WavifyBackend,
|
|
14
|
+
FFMPEGBackend
|
|
15
|
+
].freeze
|
|
16
|
+
SUPPORTED_FORMATS = BACKENDS.flat_map { |backend| backend::SUPPORTED_EXTENSIONS }.map { |ext| ext.delete_prefix(".") }.sort.freeze
|
|
17
|
+
|
|
18
|
+
# @param path [String, Pathname, IO]
|
|
19
|
+
# @param sr [Integer, nil] destination sample rate; nil preserves source rate
|
|
20
|
+
# @param mono [Boolean, Symbol]
|
|
17
21
|
# @param offset [Float] seconds from start
|
|
18
22
|
# @param duration [Float, nil] duration in seconds
|
|
23
|
+
# @param dtype [Class, Symbol]
|
|
24
|
+
# @param normalize [Boolean]
|
|
25
|
+
# @param format [Symbol, String, nil]
|
|
26
|
+
# @param weights [Array<Float>, nil]
|
|
27
|
+
# @param max_bytes [Integer, nil]
|
|
19
28
|
# @return [Array(Numo::SFloat, Integer)] waveform and sample rate
|
|
20
|
-
def load(path, sr: 22_050, mono: true, offset: 0.0, duration: nil)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
[
|
|
36
|
-
rescue Muze::AudioLoadError
|
|
29
|
+
def load(path, sr: 22_050, mono: true, offset: 0.0, duration: nil, dtype: Numo::SFloat, normalize: false, format: nil, weights: nil, max_bytes: nil)
|
|
30
|
+
source = resolve_source(path, format:)
|
|
31
|
+
validate_args!(sr:, mono:, offset:, duration:, dtype:, normalize:, weights:, max_bytes:)
|
|
32
|
+
validate_source_size!(source, max_bytes:)
|
|
33
|
+
|
|
34
|
+
backend = select_backend(source)
|
|
35
|
+
raw_samples, source_sr, _channels = backend.read(source.fetch(:input), offset:, duration:)
|
|
36
|
+
sliced = backend.applies_time_window? ? raw_samples : slice_by_time(raw_samples, source_sr, offset:, duration:)
|
|
37
|
+
|
|
38
|
+
signal = downmix(sliced, mono:, weights:)
|
|
39
|
+
target_sr = sr || source_sr
|
|
40
|
+
|
|
41
|
+
resampled = resample(signal, source_sr, target_sr)
|
|
42
|
+
output = cast_signal(resampled, dtype)
|
|
43
|
+
output = normalize_signal(output) if normalize
|
|
44
|
+
[output, target_sr]
|
|
45
|
+
rescue Muze::AudioLoadError, Muze::ParameterError
|
|
37
46
|
raise
|
|
38
47
|
rescue Muze::UnsupportedFormatError, Muze::DependencyError => e
|
|
39
48
|
raise Muze::AudioLoadError, e.message
|
|
@@ -41,27 +50,110 @@ module Muze
|
|
|
41
50
|
raise Muze::AudioLoadError, "Failed to load #{path}: #{e.message}"
|
|
42
51
|
end
|
|
43
52
|
|
|
44
|
-
|
|
45
|
-
|
|
53
|
+
# @param path [String, Pathname, IO]
|
|
54
|
+
# @param chunk_frames [Integer]
|
|
55
|
+
# @yieldparam chunk [Numo::SFloat, Numo::DFloat]
|
|
56
|
+
# @yieldparam sample_rate [Integer]
|
|
57
|
+
# @return [Enumerator, nil]
|
|
58
|
+
def load_stream(path, sr: nil, mono: true, offset: 0.0, duration: nil, dtype: Numo::SFloat, format: nil, weights: nil, max_bytes: nil, chunk_frames: 16_384)
|
|
59
|
+
return enum_for(__method__, path, sr:, mono:, offset:, duration:, dtype:, format:, weights:, max_bytes:, chunk_frames:) unless block_given?
|
|
60
|
+
|
|
61
|
+
source = resolve_source(path, format:)
|
|
62
|
+
validate_args!(sr:, mono:, offset:, duration:, dtype:, normalize: false, weights:, max_bytes:)
|
|
63
|
+
validate_stream_args!(chunk_frames:)
|
|
64
|
+
validate_source_size!(source, max_bytes:)
|
|
65
|
+
|
|
66
|
+
backend = select_backend(source)
|
|
67
|
+
backend.read_stream(source.fetch(:input), chunk_frames:, offset:, duration:) do |raw_samples, source_sr, _channels|
|
|
68
|
+
target_sr = sr || source_sr
|
|
69
|
+
signal = downmix(raw_samples, mono:, weights:)
|
|
70
|
+
resampled = resample(signal, source_sr, target_sr)
|
|
71
|
+
yield cast_signal(resampled, dtype), target_sr unless resampled.empty?
|
|
72
|
+
end
|
|
73
|
+
nil
|
|
74
|
+
rescue Muze::AudioLoadError, Muze::ParameterError
|
|
75
|
+
raise
|
|
76
|
+
rescue Muze::UnsupportedFormatError, Muze::DependencyError => e
|
|
77
|
+
raise Muze::AudioLoadError, e.message
|
|
78
|
+
rescue StandardError => e
|
|
79
|
+
raise Muze::AudioLoadError, "Failed to stream #{path}: #{e.message}"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# @param path [String, Pathname, IO]
|
|
83
|
+
# @return [Hash]
|
|
84
|
+
def info(path, format: nil)
|
|
85
|
+
source = resolve_source(path, format:)
|
|
86
|
+
|
|
87
|
+
select_backend(source).info(source.fetch(:input))
|
|
88
|
+
rescue Muze::AudioLoadError
|
|
89
|
+
raise
|
|
90
|
+
rescue Muze::UnsupportedFormatError, Muze::DependencyError => e
|
|
91
|
+
raise Muze::AudioLoadError, e.message
|
|
92
|
+
rescue StandardError => e
|
|
93
|
+
raise Muze::AudioLoadError, "Failed to inspect #{path}: #{e.message}"
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def resolve_source(path, format:)
|
|
97
|
+
resolved = path.respond_to?(:to_path) ? path.to_path : path
|
|
98
|
+
if resolved.is_a?(String)
|
|
99
|
+
raise Muze::AudioLoadError, "File not found: #{resolved}" unless File.exist?(resolved)
|
|
100
|
+
|
|
101
|
+
return { input: resolved, extension: normalized_extension(format || File.extname(resolved)), path?: true }
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
return { input: resolved, extension: normalized_extension(format || :wav), path?: false } if resolved.respond_to?(:read)
|
|
105
|
+
|
|
106
|
+
raise Muze::AudioLoadError, "Audio source must be a String, Pathname, or IO-like object"
|
|
107
|
+
end
|
|
108
|
+
private_class_method :resolve_source
|
|
109
|
+
|
|
110
|
+
def normalized_extension(format)
|
|
111
|
+
extension = format.to_s
|
|
112
|
+
extension = ".#{extension}" unless extension.start_with?(".")
|
|
113
|
+
extension.downcase
|
|
114
|
+
end
|
|
115
|
+
private_class_method :normalized_extension
|
|
116
|
+
|
|
117
|
+
def validate_args!(sr:, mono:, offset:, duration:, dtype:, normalize:, weights:, max_bytes:)
|
|
118
|
+
raise Muze::ParameterError, "sr must be positive or nil" unless sr.nil? || (sr.is_a?(Integer) && sr.positive?)
|
|
119
|
+
raise Muze::ParameterError, "mono must be true, false, :mean, :left, :right, or :weighted" unless [true, false, :mean, :left, :right, :weighted].include?(mono)
|
|
46
120
|
raise Muze::ParameterError, "offset must be >= 0" if offset.negative?
|
|
121
|
+
raise Muze::ParameterError, "normalize must be true or false" unless [true, false].include?(normalize)
|
|
122
|
+
raise Muze::ParameterError, "weights must be an Array when mono is :weighted" if mono == :weighted && !weights.is_a?(Array)
|
|
123
|
+
raise Muze::ParameterError, "max_bytes must be positive" if max_bytes && (!max_bytes.is_a?(Integer) || max_bytes <= 0)
|
|
124
|
+
dtype_class(dtype)
|
|
47
125
|
return if duration.nil? || duration.positive?
|
|
48
126
|
|
|
49
127
|
raise Muze::ParameterError, "duration must be positive"
|
|
50
128
|
end
|
|
51
129
|
private_class_method :validate_args!
|
|
52
130
|
|
|
53
|
-
def
|
|
54
|
-
|
|
131
|
+
def validate_stream_args!(chunk_frames:)
|
|
132
|
+
return if chunk_frames.is_a?(Integer) && chunk_frames.positive?
|
|
55
133
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
raise Muze::DependencyError, FFMPEGBackend.installation_message(extension) unless FFMPEGBackend.available?
|
|
134
|
+
raise Muze::ParameterError, "chunk_frames must be a positive integer"
|
|
135
|
+
end
|
|
136
|
+
private_class_method :validate_stream_args!
|
|
60
137
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
138
|
+
def validate_source_size!(source, max_bytes:)
|
|
139
|
+
return unless max_bytes && source.fetch(:path?)
|
|
140
|
+
|
|
141
|
+
size = File.size(source.fetch(:input))
|
|
142
|
+
raise Muze::AudioLoadError, "Audio file is too large (#{size} bytes > #{max_bytes} bytes)" if size > max_bytes
|
|
143
|
+
end
|
|
144
|
+
private_class_method :validate_source_size!
|
|
145
|
+
|
|
146
|
+
def select_backend(source)
|
|
147
|
+
extension = source.fetch(:extension)
|
|
148
|
+
backend = BACKENDS.find { |candidate| candidate.supported_extension?(extension) }
|
|
149
|
+
|
|
150
|
+
raise Muze::UnsupportedFormatError, unsupported_format_message(extension) unless backend
|
|
151
|
+
if !source.fetch(:path?) && backend != WavifyBackend
|
|
152
|
+
raise Muze::UnsupportedFormatError, "IO/StringIO loading is currently supported for WAV input only"
|
|
64
153
|
end
|
|
154
|
+
raise Muze::DependencyError, backend.installation_message(extension) unless backend.available?
|
|
155
|
+
|
|
156
|
+
backend
|
|
65
157
|
end
|
|
66
158
|
private_class_method :select_backend
|
|
67
159
|
|
|
@@ -85,6 +177,31 @@ module Muze
|
|
|
85
177
|
end
|
|
86
178
|
private_class_method :slice_by_time
|
|
87
179
|
|
|
180
|
+
def downmix(samples, mono:, weights:)
|
|
181
|
+
return samples if mono == false
|
|
182
|
+
return downmix_to_mono(samples) if mono == true || mono == :mean
|
|
183
|
+
return downmix_weighted(samples, weights) if mono == :weighted
|
|
184
|
+
return samples unless samples.first.is_a?(Array)
|
|
185
|
+
|
|
186
|
+
channel_index = mono == :left ? 0 : samples.first.length - 1
|
|
187
|
+
samples.map { |frame| frame.fetch(channel_index) }
|
|
188
|
+
end
|
|
189
|
+
private_class_method :downmix
|
|
190
|
+
|
|
191
|
+
def downmix_weighted(samples, weights)
|
|
192
|
+
return samples if samples.empty?
|
|
193
|
+
return samples unless samples.first.is_a?(Array)
|
|
194
|
+
raise Muze::ParameterError, "weights length must match channel count" unless weights.length == samples.first.length
|
|
195
|
+
|
|
196
|
+
weight_sum = weights.sum(0.0)
|
|
197
|
+
raise Muze::ParameterError, "weights must not sum to zero" if weight_sum.abs <= 1.0e-12
|
|
198
|
+
|
|
199
|
+
samples.map do |frame|
|
|
200
|
+
frame.each_with_index.sum { |sample, index| sample * weights[index] } / weight_sum
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
private_class_method :downmix_weighted
|
|
204
|
+
|
|
88
205
|
def downmix_to_mono(samples)
|
|
89
206
|
return samples if samples.empty?
|
|
90
207
|
return samples unless samples.first.is_a?(Array)
|
|
@@ -94,24 +211,37 @@ module Muze
|
|
|
94
211
|
private_class_method :downmix_to_mono
|
|
95
212
|
|
|
96
213
|
def resample(samples, source_sr, target_sr)
|
|
97
|
-
if samples.empty?
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
214
|
+
return [] if samples.empty?
|
|
215
|
+
|
|
216
|
+
Muze::Core::Resample.resample(samples, orig_sr: source_sr, target_sr: target_sr, res_type: :sinc)
|
|
217
|
+
end
|
|
218
|
+
private_class_method :resample
|
|
219
|
+
|
|
220
|
+
def cast_signal(signal, dtype)
|
|
221
|
+
dtype_class(dtype).cast(signal)
|
|
222
|
+
end
|
|
223
|
+
private_class_method :cast_signal
|
|
224
|
+
|
|
225
|
+
def dtype_class(dtype)
|
|
226
|
+
case dtype
|
|
227
|
+
when :sfloat, :float32 then Numo::SFloat
|
|
228
|
+
when :dfloat, :float64 then Numo::DFloat
|
|
110
229
|
else
|
|
111
|
-
|
|
230
|
+
return Numo::SFloat if dtype == Numo::SFloat
|
|
231
|
+
return Numo::DFloat if dtype == Numo::DFloat
|
|
232
|
+
|
|
233
|
+
raise Muze::ParameterError, "dtype must be :sfloat, :float32, :dfloat, :float64, Numo::SFloat, or Numo::DFloat"
|
|
112
234
|
end
|
|
113
235
|
end
|
|
114
|
-
private_class_method :
|
|
236
|
+
private_class_method :dtype_class
|
|
237
|
+
|
|
238
|
+
def normalize_signal(signal)
|
|
239
|
+
peak = signal.abs.max
|
|
240
|
+
return signal if peak <= 0.0
|
|
241
|
+
|
|
242
|
+
(signal / peak).cast_to(signal.class)
|
|
243
|
+
end
|
|
244
|
+
private_class_method :normalize_signal
|
|
115
245
|
end
|
|
116
246
|
end
|
|
117
247
|
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "wavify/core/format"
|
|
4
|
+
require "wavify/core/sample_buffer"
|
|
5
|
+
require "wavify/codecs/base"
|
|
6
|
+
require "wavify/codecs/wav"
|
|
7
|
+
|
|
8
|
+
module Muze
|
|
9
|
+
module IO
|
|
10
|
+
# WAV writer for lightweight effects/analysis result inspection.
|
|
11
|
+
module AudioWriter
|
|
12
|
+
module_function
|
|
13
|
+
|
|
14
|
+
def write(path, y, sr:, normalize: false, format: :wav)
|
|
15
|
+
raise Muze::ParameterError, "sr must be positive" unless sr.is_a?(Integer) && sr.positive?
|
|
16
|
+
raise Muze::ParameterError, "normalize must be true or false" unless [true, false].include?(normalize)
|
|
17
|
+
|
|
18
|
+
format_label = format.to_s.downcase.to_sym
|
|
19
|
+
raise Muze::UnsupportedFormatError, "only WAV output is supported" unless %i[wav wave].include?(format_label)
|
|
20
|
+
|
|
21
|
+
signal = Muze::Core::Audio.validate_audio!(y, allow_empty: true)
|
|
22
|
+
signal = Muze::Core::Audio.normalize(signal) if normalize
|
|
23
|
+
channels = signal.ndim == 2 ? signal.shape[1] : 1
|
|
24
|
+
samples = flatten_samples(signal)
|
|
25
|
+
sample_format = Wavify::Core::Format.new(channels:, sample_rate: sr, bit_depth: 32, sample_format: :float)
|
|
26
|
+
buffer = Wavify::Core::SampleBuffer.new(samples, sample_format)
|
|
27
|
+
Wavify::Codecs::Wav.write(output_target(path), buffer)
|
|
28
|
+
path
|
|
29
|
+
rescue Muze::Error
|
|
30
|
+
raise
|
|
31
|
+
rescue SystemCallError, Wavify::Error => e
|
|
32
|
+
raise Muze::AudioLoadError, "Failed to write WAV output #{path}: #{e.message}"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def flatten_samples(signal)
|
|
36
|
+
return signal.to_a if signal.ndim == 1
|
|
37
|
+
|
|
38
|
+
signal.to_a.flat_map { |frame| frame.respond_to?(:to_a) ? frame.to_a : Array(frame) }
|
|
39
|
+
end
|
|
40
|
+
private_class_method :flatten_samples
|
|
41
|
+
|
|
42
|
+
def output_target(path)
|
|
43
|
+
path.respond_to?(:to_path) ? path.to_path : path
|
|
44
|
+
end
|
|
45
|
+
private_class_method :output_target
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
data/lib/muze/native.rb
CHANGED
|
@@ -5,12 +5,16 @@ module Muze
|
|
|
5
5
|
module Native
|
|
6
6
|
module_function
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
8
|
+
EXTENSION_LOADED = if ENV.fetch("MUZE_DISABLE_NATIVE", "0") == "1"
|
|
9
|
+
false
|
|
10
|
+
else
|
|
11
|
+
begin
|
|
12
|
+
require "muze/muze_ext"
|
|
13
|
+
true
|
|
14
|
+
rescue LoadError
|
|
15
|
+
false
|
|
16
|
+
end
|
|
17
|
+
end
|
|
14
18
|
|
|
15
19
|
# @return [Boolean]
|
|
16
20
|
def extension_loaded?
|
|
@@ -23,6 +27,11 @@ module Muze
|
|
|
23
27
|
# @param hop_length [Integer]
|
|
24
28
|
# @return [Array<Array<Float>>]
|
|
25
29
|
def frame_slices(signal, frame_length, hop_length)
|
|
30
|
+
raise Muze::ParameterError, "signal must be an Array" unless signal.is_a?(Array)
|
|
31
|
+
unless frame_length.is_a?(Integer) && hop_length.is_a?(Integer) && frame_length.positive? && hop_length.positive?
|
|
32
|
+
raise Muze::ParameterError, "frame_length and hop_length must be positive"
|
|
33
|
+
end
|
|
34
|
+
|
|
26
35
|
if signal.length <= frame_length
|
|
27
36
|
return [signal + Array.new(frame_length - signal.length, 0.0)]
|
|
28
37
|
end
|
|
@@ -37,9 +46,83 @@ module Muze
|
|
|
37
46
|
# @param values [Array<Float>]
|
|
38
47
|
# @return [Float]
|
|
39
48
|
def median1d(values)
|
|
40
|
-
|
|
41
|
-
|
|
49
|
+
raise Muze::ParameterError, "values must be an Array" unless values.is_a?(Array)
|
|
50
|
+
return 0.0 if values.empty?
|
|
51
|
+
|
|
52
|
+
copy = values.map(&:to_f)
|
|
53
|
+
quickselect!(copy, copy.length / 2)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# @param values [Array<Float>]
|
|
57
|
+
# @param half [Integer]
|
|
58
|
+
# @return [Array<Float>]
|
|
59
|
+
def median_filter1d(values, half)
|
|
60
|
+
raise Muze::ParameterError, "values must be an Array" unless values.is_a?(Array)
|
|
61
|
+
raise Muze::ParameterError, "half must be non-negative" unless half.is_a?(Integer) && half >= 0
|
|
62
|
+
return [] if values.empty?
|
|
63
|
+
|
|
64
|
+
window = []
|
|
65
|
+
output = Array.new(values.length, 0.0)
|
|
66
|
+
values.length.times do |index|
|
|
67
|
+
remove_sorted_value(window, values[index - half - 1]) if index > half
|
|
68
|
+
entering = index + half
|
|
69
|
+
insert_sorted_value(window, values[entering]) if entering < values.length
|
|
70
|
+
output[index] = window[window.length / 2].to_f
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
output
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def quickselect!(values, target)
|
|
77
|
+
left = 0
|
|
78
|
+
right = values.length - 1
|
|
79
|
+
|
|
80
|
+
loop do
|
|
81
|
+
return values[left] if left == right
|
|
82
|
+
|
|
83
|
+
pivot_index = partition!(values, left, right, (left + right) / 2)
|
|
84
|
+
if target == pivot_index
|
|
85
|
+
return values[target]
|
|
86
|
+
elsif target < pivot_index
|
|
87
|
+
right = pivot_index - 1
|
|
88
|
+
else
|
|
89
|
+
left = pivot_index + 1
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
private_class_method :quickselect!
|
|
94
|
+
|
|
95
|
+
def partition!(values, left, right, pivot_index)
|
|
96
|
+
pivot = values[pivot_index]
|
|
97
|
+
values[pivot_index], values[right] = values[right], values[pivot_index]
|
|
98
|
+
store_index = left
|
|
99
|
+
|
|
100
|
+
(left...right).each do |index|
|
|
101
|
+
next unless values[index] < pivot
|
|
102
|
+
|
|
103
|
+
values[store_index], values[index] = values[index], values[store_index]
|
|
104
|
+
store_index += 1
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
values[right], values[store_index] = values[store_index], values[right]
|
|
108
|
+
store_index
|
|
109
|
+
end
|
|
110
|
+
private_class_method :partition!
|
|
111
|
+
|
|
112
|
+
def insert_sorted_value(sorted, value)
|
|
113
|
+
index = sorted.bsearch_index { |item| item > value } || sorted.length
|
|
114
|
+
sorted.insert(index, value)
|
|
115
|
+
end
|
|
116
|
+
private_class_method :insert_sorted_value
|
|
117
|
+
|
|
118
|
+
def remove_sorted_value(sorted, value)
|
|
119
|
+
index = sorted.bsearch_index { |item| item >= value }
|
|
120
|
+
return unless index
|
|
121
|
+
|
|
122
|
+
index += 1 while index < sorted.length && sorted[index] != value
|
|
123
|
+
sorted.delete_at(index) if index < sorted.length
|
|
42
124
|
end
|
|
125
|
+
private_class_method :remove_sorted_value
|
|
43
126
|
end
|
|
44
127
|
end
|
|
45
128
|
end
|
|
@@ -11,24 +11,38 @@ module Muze
|
|
|
11
11
|
# @param hop_length [Integer]
|
|
12
12
|
# @param n_fft [Integer]
|
|
13
13
|
# @return [Numo::SFloat] onset envelope per frame
|
|
14
|
-
def onset_strength(y: nil, sr: 22_050, s: nil, hop_length: 512, n_fft: 2048)
|
|
14
|
+
def onset_strength(y: nil, sr: 22_050, s: nil, hop_length: 512, n_fft: 2048, lag: 1, log: false, max_size: 1, normalize: false)
|
|
15
|
+
validate_positive_integer!(sr, "sr")
|
|
16
|
+
validate_positive_integer!(hop_length, "hop_length")
|
|
17
|
+
validate_positive_integer!(n_fft, "n_fft")
|
|
18
|
+
validate_positive_integer!(lag, "lag")
|
|
19
|
+
validate_positive_integer!(max_size, "max_size")
|
|
20
|
+
raise Muze::ParameterError, "log must be true or false" unless [true, false].include?(log)
|
|
21
|
+
raise Muze::ParameterError, "normalize must be true or false" unless [true, false].include?(normalize)
|
|
22
|
+
|
|
15
23
|
spectrum = if s
|
|
16
|
-
Numo::SFloat.cast(s)
|
|
24
|
+
provided = Numo::SFloat.cast(s)
|
|
25
|
+
validate_finite_array!(provided.to_a.flatten, "s")
|
|
26
|
+
provided
|
|
17
27
|
else
|
|
18
28
|
Muze::Feature.melspectrogram(y:, sr:, n_fft:, hop_length:, n_mels: 40)
|
|
19
29
|
end
|
|
20
30
|
|
|
21
31
|
spectrum = spectrum.expand_dims(1) if spectrum.ndim == 1
|
|
32
|
+
spectrum = Muze.power_to_db(spectrum, ref: :max) if log
|
|
33
|
+
spectrum = local_max_filter(spectrum, max_size:) if max_size > 1
|
|
22
34
|
_, frames = spectrum.shape
|
|
23
35
|
envelope = Numo::SFloat.zeros(frames)
|
|
24
36
|
|
|
25
37
|
frames.times do |frame_index|
|
|
26
|
-
next if frame_index
|
|
38
|
+
next if frame_index < lag
|
|
27
39
|
|
|
28
|
-
delta = spectrum[true, frame_index] - spectrum[true, frame_index -
|
|
40
|
+
delta = spectrum[true, frame_index] - spectrum[true, frame_index - lag]
|
|
29
41
|
envelope[frame_index] = delta.clip(0.0, Float::INFINITY).sum
|
|
30
42
|
end
|
|
31
43
|
|
|
44
|
+
peak = envelope.max
|
|
45
|
+
envelope = envelope / peak if normalize && peak.positive?
|
|
32
46
|
envelope
|
|
33
47
|
end
|
|
34
48
|
|
|
@@ -39,29 +53,31 @@ module Muze
|
|
|
39
53
|
# @param backtrack [Boolean]
|
|
40
54
|
# @param units [Symbol] :frames, :samples, or :time
|
|
41
55
|
# @return [Array<Integer, Float>]
|
|
42
|
-
def onset_detect(y: nil, sr: 22_050, onset_envelope: nil, hop_length: 512, backtrack: false, units: :frames)
|
|
56
|
+
def onset_detect(y: nil, sr: 22_050, onset_envelope: nil, hop_length: 512, backtrack: false, units: :frames, pre_max: 1, post_max: 1, pre_avg: 1, post_avg: 1, delta: nil, wait: 0, adaptive: false, energy: nil)
|
|
57
|
+
validate_positive_integer!(sr, "sr")
|
|
58
|
+
validate_positive_integer!(hop_length, "hop_length")
|
|
59
|
+
validate_peak_picker_args!(pre_max:, post_max:, pre_avg:, post_avg:, wait:, delta:)
|
|
60
|
+
raise Muze::ParameterError, "backtrack must be true or false" unless [true, false].include?(backtrack)
|
|
61
|
+
raise Muze::ParameterError, "adaptive must be true or false" unless [true, false].include?(adaptive)
|
|
62
|
+
|
|
43
63
|
envelope = if onset_envelope
|
|
44
64
|
onset_envelope.is_a?(Numo::NArray) ? onset_envelope.to_a : Array(onset_envelope)
|
|
45
65
|
else
|
|
46
|
-
|
|
66
|
+
onset_strength(y:, sr:, hop_length:).to_a
|
|
47
67
|
end
|
|
68
|
+
validate_finite_array!(envelope, "onset_envelope")
|
|
48
69
|
|
|
49
70
|
return [] if envelope.length < 3
|
|
50
71
|
|
|
51
|
-
threshold = detection_threshold(envelope)
|
|
52
|
-
peaks = detect_peaks(envelope, threshold)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
peaks
|
|
58
|
-
when :samples
|
|
59
|
-
peaks.map { |frame| frame * hop_length }
|
|
60
|
-
when :time
|
|
61
|
-
peaks.map { |frame| frame * hop_length.to_f / sr }
|
|
62
|
-
else
|
|
63
|
-
raise Muze::ParameterError, "units must be :frames, :samples, or :time"
|
|
72
|
+
threshold = delta || detection_threshold(envelope)
|
|
73
|
+
peaks = detect_peaks(envelope, threshold, pre_max:, post_max:, pre_avg:, post_avg:, wait:, adaptive:)
|
|
74
|
+
if backtrack
|
|
75
|
+
energy_curve = energy ? Array(energy) : envelope
|
|
76
|
+
validate_finite_array!(energy_curve, "energy")
|
|
77
|
+
peaks = backtrack_onsets(energy_curve, peaks)
|
|
64
78
|
end
|
|
79
|
+
|
|
80
|
+
convert_units(peaks, units:, sr:, hop_length:)
|
|
65
81
|
end
|
|
66
82
|
|
|
67
83
|
def detection_threshold(envelope)
|
|
@@ -71,14 +87,22 @@ module Muze
|
|
|
71
87
|
end
|
|
72
88
|
private_class_method :detection_threshold
|
|
73
89
|
|
|
74
|
-
def detect_peaks(envelope, threshold)
|
|
90
|
+
def detect_peaks(envelope, threshold, pre_max:, post_max:, pre_avg:, post_avg:, wait:, adaptive:)
|
|
75
91
|
peaks = []
|
|
92
|
+
last_peak = -Float::INFINITY
|
|
76
93
|
(1...(envelope.length - 1)).each do |index|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
94
|
+
local_max_start = [index - pre_max, 0].max
|
|
95
|
+
local_max_end = [index + post_max, envelope.length - 1].min
|
|
96
|
+
local_avg_start = [index - pre_avg, 0].max
|
|
97
|
+
local_avg_end = [index + post_avg, envelope.length - 1].min
|
|
98
|
+
local_threshold = adaptive ? average(envelope[local_avg_start..local_avg_end]) + threshold : threshold
|
|
99
|
+
|
|
100
|
+
next unless envelope[index] >= local_threshold
|
|
101
|
+
next unless envelope[index] >= envelope[local_max_start..local_max_end].max
|
|
102
|
+
next if index <= last_peak + wait
|
|
80
103
|
|
|
81
104
|
peaks << index
|
|
105
|
+
last_peak = index
|
|
82
106
|
end
|
|
83
107
|
peaks
|
|
84
108
|
end
|
|
@@ -93,5 +117,72 @@ module Muze
|
|
|
93
117
|
end.uniq
|
|
94
118
|
end
|
|
95
119
|
private_class_method :backtrack_onsets
|
|
120
|
+
|
|
121
|
+
def convert_units(peaks, units:, sr:, hop_length:)
|
|
122
|
+
case units
|
|
123
|
+
when :frames
|
|
124
|
+
peaks
|
|
125
|
+
when :samples
|
|
126
|
+
peaks.map { |frame| frame * hop_length }
|
|
127
|
+
when :time
|
|
128
|
+
peaks.map { |frame| frame * hop_length.to_f / sr }
|
|
129
|
+
else
|
|
130
|
+
raise Muze::ParameterError, "units must be :frames, :samples, or :time"
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
private_class_method :convert_units
|
|
134
|
+
|
|
135
|
+
def local_max_filter(spectrum, max_size:)
|
|
136
|
+
rows, cols = spectrum.shape
|
|
137
|
+
half = max_size / 2
|
|
138
|
+
output = Numo::SFloat.zeros(rows, cols)
|
|
139
|
+
|
|
140
|
+
rows.times do |row|
|
|
141
|
+
cols.times do |col|
|
|
142
|
+
start_col = [col - half, 0].max
|
|
143
|
+
end_col = [col + half, cols - 1].min
|
|
144
|
+
output[row, col] = spectrum[row, start_col..end_col].max
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
output
|
|
148
|
+
end
|
|
149
|
+
private_class_method :local_max_filter
|
|
150
|
+
|
|
151
|
+
def average(values)
|
|
152
|
+
values.sum(0.0) / values.length
|
|
153
|
+
end
|
|
154
|
+
private_class_method :average
|
|
155
|
+
|
|
156
|
+
def validate_peak_picker_args!(pre_max:, post_max:, pre_avg:, post_avg:, wait:, delta:)
|
|
157
|
+
{
|
|
158
|
+
pre_max: pre_max,
|
|
159
|
+
post_max: post_max,
|
|
160
|
+
pre_avg: pre_avg,
|
|
161
|
+
post_avg: post_avg,
|
|
162
|
+
wait: wait
|
|
163
|
+
}.each do |label, value|
|
|
164
|
+
next if value.is_a?(Integer) && !value.negative?
|
|
165
|
+
|
|
166
|
+
raise Muze::ParameterError, "#{label} must be a non-negative integer"
|
|
167
|
+
end
|
|
168
|
+
return if delta.nil? || (delta.respond_to?(:finite?) && delta.finite? && !delta.negative?)
|
|
169
|
+
|
|
170
|
+
raise Muze::ParameterError, "delta must be non-negative"
|
|
171
|
+
end
|
|
172
|
+
private_class_method :validate_peak_picker_args!
|
|
173
|
+
|
|
174
|
+
def validate_positive_integer!(value, label)
|
|
175
|
+
return if value.is_a?(Integer) && value.positive?
|
|
176
|
+
|
|
177
|
+
raise Muze::ParameterError, "#{label} must be a positive integer"
|
|
178
|
+
end
|
|
179
|
+
private_class_method :validate_positive_integer!
|
|
180
|
+
|
|
181
|
+
def validate_finite_array!(values, label)
|
|
182
|
+
return if values.all? { |value| value.respond_to?(:finite?) && value.finite? }
|
|
183
|
+
|
|
184
|
+
raise Muze::ParameterError, "#{label} must contain only finite numeric values"
|
|
185
|
+
end
|
|
186
|
+
private_class_method :validate_finite_array!
|
|
96
187
|
end
|
|
97
188
|
end
|
data/lib/muze/version.rb
CHANGED