muze 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -1
  3. data/README.md +5 -0
  4. data/Rakefile +3 -0
  5. data/ext/muze/muze_ext.c +129 -12
  6. data/lib/muze/beat/beat_track.rb +93 -11
  7. data/lib/muze/core/audio.rb +129 -0
  8. data/lib/muze/core/cache.rb +38 -0
  9. data/lib/muze/core/dct.rb +24 -21
  10. data/lib/muze/core/frames.rb +31 -0
  11. data/lib/muze/core/matrix.rb +23 -0
  12. data/lib/muze/core/resample.rb +111 -19
  13. data/lib/muze/core/stft.rb +312 -52
  14. data/lib/muze/core/windows.rb +113 -17
  15. data/lib/muze/display/specshow.rb +307 -41
  16. data/lib/muze/effects/harmonic_percussive.rb +83 -18
  17. data/lib/muze/effects/streaming.rb +101 -0
  18. data/lib/muze/effects/time_stretch.rb +353 -36
  19. data/lib/muze/feature/aggregation.rb +49 -0
  20. data/lib/muze/feature/chroma.rb +43 -15
  21. data/lib/muze/feature/context.rb +81 -0
  22. data/lib/muze/feature/mfcc.rb +78 -38
  23. data/lib/muze/feature/spectral.rb +258 -39
  24. data/lib/muze/filters/chroma_filter.rb +21 -2
  25. data/lib/muze/filters/mel.rb +47 -1
  26. data/lib/muze/io/audio_loader/ffmpeg_backend.rb +179 -15
  27. data/lib/muze/io/audio_loader/wavify_backend.rb +118 -11
  28. data/lib/muze/io/audio_loader.rb +178 -48
  29. data/lib/muze/io/audio_writer.rb +48 -0
  30. data/lib/muze/native.rb +91 -8
  31. data/lib/muze/onset/onset_detect.rb +114 -23
  32. data/lib/muze/version.rb +1 -1
  33. data/lib/muze.rb +237 -60
  34. metadata +11 -21
  35. data/benchmarks/baseline.json +0 -24
  36. data/benchmarks/native_vs_ruby.rb +0 -23
  37. data/benchmarks/quality_metrics.rb +0 -265
  38. data/benchmarks/quality_thresholds.md +0 -28
  39. data/benchmarks/support/fixture_library.rb +0 -107
@@ -4,6 +4,7 @@ module Muze
4
4
  # Filterbank generation utilities.
5
5
  module Filters
6
6
  module_function
7
+ MEL_CACHE = Muze::Core::BoundedCache.new(max_size: 64)
7
8
 
8
9
  # @param sr [Integer]
9
10
  # @param n_fft [Integer]
@@ -11,13 +12,39 @@ module Muze
11
12
  # @param fmin [Float]
12
13
  # @param fmax [Float, nil]
13
14
  # @param htk [Boolean]
15
+ # @param norm [Symbol, nil]
14
16
  # @return [Numo::SFloat] shape: [n_mels, 1 + n_fft/2]
15
- def mel(sr: 22_050, n_fft: 2048, n_mels: 128, fmin: 0.0, fmax: nil, htk: false)
17
+ def mel(sr: 22_050, n_fft: 2048, n_mels: 128, fmin: 0.0, fmax: nil, htk: false, norm: nil)
18
+ key = [sr, n_fft, n_mels, fmin, fmax, htk, norm]
19
+ MEL_CACHE.fetch(key) { build_mel(sr:, n_fft:, n_mels:, fmin:, fmax:, htk:, norm:) }.dup
20
+ end
21
+
22
+ # @param n_mels [Integer]
23
+ # @param fmin [Float]
24
+ # @param fmax [Float]
25
+ # @param htk [Boolean]
26
+ # @return [Numo::SFloat]
27
+ def mel_frequencies(n_mels:, fmin:, fmax:, htk: false)
28
+ validate_frequency_bounds!(sr: fmax * 2.0, fmin:, fmax:)
29
+ raise Muze::ParameterError, "n_mels must be positive" unless n_mels.positive?
30
+
31
+ mel_min = hz_to_mel(fmin, htk:)
32
+ mel_max = hz_to_mel(fmax, htk:)
33
+ Numo::SFloat.cast(
34
+ Array.new(n_mels) do |idx|
35
+ mel_to_hz(mel_min + ((mel_max - mel_min) * idx / [n_mels - 1, 1].max.to_f), htk:)
36
+ end
37
+ )
38
+ end
39
+
40
+ def build_mel(sr:, n_fft:, n_mels:, fmin:, fmax:, htk:, norm:)
16
41
  raise Muze::ParameterError, "sr must be positive" unless sr.positive?
17
42
  raise Muze::ParameterError, "n_fft must be positive" unless n_fft.positive?
18
43
  raise Muze::ParameterError, "n_mels must be positive" unless n_mels.positive?
44
+ raise Muze::ParameterError, "norm must be nil or :slaney" unless norm.nil? || norm == :slaney
19
45
 
20
46
  fmax ||= sr / 2.0
47
+ validate_frequency_bounds!(sr:, fmin:, fmax:)
21
48
  mel_min = hz_to_mel(fmin, htk:)
22
49
  mel_max = hz_to_mel(fmax, htk:)
23
50
 
@@ -47,10 +74,29 @@ module Muze
47
74
 
48
75
  matrix[mel_index, bin] = (right - bin).to_f / (right - center)
49
76
  end
77
+
78
+ if norm == :slaney
79
+ enorm = 2.0 / (hz_points[mel_index + 2] - hz_points[mel_index])
80
+ matrix[mel_index, true] = matrix[mel_index, true] * enorm
81
+ end
82
+ end
83
+
84
+ empty_filters = n_mels.times.select { |mel_index| matrix[mel_index, true].sum <= 0.0 }
85
+ unless empty_filters.empty?
86
+ raise Muze::ParameterError, "mel filterbank contains empty filters: #{empty_filters.join(', ')}"
50
87
  end
51
88
 
52
89
  matrix
53
90
  end
91
+ private_class_method :build_mel
92
+
93
+ def validate_frequency_bounds!(sr:, fmin:, fmax:)
94
+ raise Muze::ParameterError, "fmin must be >= 0" if fmin.negative?
95
+ raise Muze::ParameterError, "fmax must be positive" unless fmax.positive?
96
+ raise Muze::ParameterError, "fmin must be less than fmax" unless fmin < fmax
97
+ raise Muze::ParameterError, "fmax must be <= sr / 2" unless fmax <= (sr / 2.0)
98
+ end
99
+ private_class_method :validate_frequency_bounds!
54
100
 
55
101
  # @param hz [Float]
56
102
  # @param htk [Boolean]
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "json"
4
4
  require "open3"
5
+ require "timeout"
5
6
 
6
7
  module Muze
7
8
  module IO
@@ -11,6 +12,7 @@ module Muze
11
12
  module_function
12
13
 
13
14
  SUPPORTED_EXTENSIONS = %w[.flac .mp3 .ogg].freeze
15
+ DEFAULT_TIMEOUT_SECONDS = 30
14
16
  INSTALLATION_STEPS = [
15
17
  "Install ffmpeg and ensure both `ffmpeg` and `ffprobe` are available on PATH.",
16
18
  "macOS: `brew install ffmpeg`.",
@@ -25,7 +27,8 @@ module Muze
25
27
 
26
28
  # @return [Boolean]
27
29
  def available?
28
- command_available?("ffmpeg") && command_available?("ffprobe")
30
+ @available = command_available?("ffmpeg") && command_available?("ffprobe") if @available.nil?
31
+ @available
29
32
  end
30
33
 
31
34
  # @param extension [String]
@@ -35,23 +38,58 @@ module Muze
35
38
  "Unable to load #{format} because the FFmpeg backend is unavailable. #{INSTALLATION_STEPS.join(' ')}"
36
39
  end
37
40
 
41
+ # @return [Boolean]
42
+ def applies_time_window?
43
+ true
44
+ end
45
+
38
46
  # @param path [String]
47
+ # @param offset [Float]
48
+ # @param duration [Float, nil]
39
49
  # @return [Array(Array<Float>, Integer, Integer)]
40
- def read(path)
50
+ def read(path, offset: 0.0, duration: nil)
41
51
  raise Muze::DependencyError, installation_message(File.extname(path).downcase) unless available?
42
52
 
43
53
  source_sr, channels = probe_stream(path)
44
- [decode_samples(path, channels), source_sr, channels]
54
+ [decode_samples(path, channels, offset:, duration:), source_sr, channels]
55
+ end
56
+
57
+ # @yieldparam samples [Array<Float>, Array<Array<Float>>]
58
+ # @yieldparam sample_rate [Integer]
59
+ # @yieldparam channels [Integer]
60
+ # @return [void]
61
+ def read_stream(path, chunk_frames:, offset: 0.0, duration: nil)
62
+ raise Muze::DependencyError, installation_message(File.extname(path).downcase) unless available?
63
+
64
+ source_sr, channels = probe_stream(path)
65
+ stream_decoded_samples(path, source_sr:, channels:, chunk_frames:, offset:, duration:) do |samples|
66
+ yield samples, source_sr, channels
67
+ end
68
+ end
69
+
70
+ # @param path [String]
71
+ # @return [Hash]
72
+ def info(path)
73
+ raise Muze::DependencyError, installation_message(File.extname(path).downcase) unless available?
74
+
75
+ sample_rate, channels, duration = probe_stream(path, include_duration: true)
76
+ {
77
+ sample_rate: sample_rate,
78
+ channels: channels,
79
+ duration: duration,
80
+ format: File.extname(path).delete_prefix(".")
81
+ }
45
82
  end
46
83
 
47
84
  # @param path [String]
48
85
  # @return [Array(Integer, Integer)]
49
- def probe_stream(path)
50
- stdout, stderr, status = Open3.capture3(
86
+ def probe_stream(path, include_duration: false)
87
+ stream_entries = include_duration ? "stream=sample_rate,channels,duration:format=duration" : "stream=sample_rate,channels"
88
+ stdout, stderr, status = capture_with_timeout(
51
89
  "ffprobe",
52
90
  "-v", "error",
53
91
  "-select_streams", "a:0",
54
- "-show_entries", "stream=sample_rate,channels",
92
+ "-show_entries", stream_entries,
55
93
  "-of", "json",
56
94
  path
57
95
  )
@@ -60,14 +98,16 @@ module Muze
60
98
  raise Muze::AudioLoadError, "ffprobe failed for #{path}: #{stderr.strip}"
61
99
  end
62
100
 
63
- parse_probe_output(stdout, path)
101
+ parse_probe_output(stdout, path, stderr:, include_duration:)
64
102
  end
65
103
  private_class_method :probe_stream
66
104
 
67
105
  # @param raw_output [String]
68
106
  # @param path [String]
69
- # @return [Array(Integer, Integer)]
70
- def parse_probe_output(raw_output, path)
107
+ # @param stderr [String]
108
+ # @param include_duration [Boolean]
109
+ # @return [Array(Integer, Integer, Float)]
110
+ def parse_probe_output(raw_output, path, stderr:, include_duration:)
71
111
  data = JSON.parse(raw_output)
72
112
  stream = data.fetch("streams", []).first
73
113
  raise Muze::AudioLoadError, "No audio stream found in #{path}" unless stream
@@ -79,20 +119,33 @@ module Muze
79
119
  raise Muze::AudioLoadError, "Invalid stream metadata for #{path}: sample_rate=#{source_sr}, channels=#{channels}"
80
120
  end
81
121
 
82
- [source_sr, channels]
122
+ return [source_sr, channels] unless include_duration
123
+
124
+ duration = stream["duration"] || data.fetch("format", {})["duration"]
125
+ [source_sr, channels, duration ? duration.to_f : nil]
83
126
  rescue JSON::ParserError, KeyError, TypeError, ArgumentError => e
84
- raise Muze::AudioLoadError, "Failed to parse ffprobe output for #{path}: #{e.message}"
127
+ stderr_detail = stderr.to_s.strip
128
+ suffix = stderr_detail.empty? ? "" : " stderr: #{stderr_detail}"
129
+ raise Muze::AudioLoadError, "Failed to parse ffprobe output for #{path}: #{e.message}#{suffix}"
85
130
  end
86
131
  private_class_method :parse_probe_output
87
132
 
88
133
  # @param path [String]
89
134
  # @param channels [Integer]
135
+ # @param offset [Float]
136
+ # @param duration [Float, nil]
90
137
  # @return [Array<Float>, Array<Array<Float>>]
91
- def decode_samples(path, channels)
92
- raw_samples, stderr, status = Open3.capture3(
138
+ def decode_samples(path, channels, offset:, duration:)
139
+ floats, stderr, status = stream_float32le_with_timeout(
93
140
  "ffmpeg",
94
141
  "-v", "error",
142
+ "-nostdin",
143
+ *seek_args(offset),
95
144
  "-i", path,
145
+ "-map", "0:a:0",
146
+ "-vn",
147
+ "-sn",
148
+ *duration_args(duration),
96
149
  "-f", "f32le",
97
150
  "-acodec", "pcm_f32le",
98
151
  "pipe:1"
@@ -102,7 +155,6 @@ module Muze
102
155
  raise Muze::AudioLoadError, "ffmpeg failed for #{path}: #{stderr.strip}"
103
156
  end
104
157
 
105
- floats = raw_samples.unpack("e*")
106
158
  return floats if channels == 1
107
159
 
108
160
  unless (floats.length % channels).zero?
@@ -113,12 +165,124 @@ module Muze
113
165
  end
114
166
  private_class_method :decode_samples
115
167
 
168
+ def stream_decoded_samples(path, source_sr:, channels:, chunk_frames:, offset:, duration:)
169
+ pending = []
170
+ frame_sample_count = [chunk_frames * channels, 1].max
171
+ _, stderr, status = stream_float32le_with_timeout(
172
+ "ffmpeg",
173
+ "-v", "error",
174
+ "-nostdin",
175
+ *seek_args(offset),
176
+ "-i", path,
177
+ "-map", "0:a:0",
178
+ "-vn",
179
+ "-sn",
180
+ *duration_args(duration),
181
+ "-f", "f32le",
182
+ "-acodec", "pcm_f32le",
183
+ "pipe:1"
184
+ ) do |floats|
185
+ pending.concat(floats)
186
+ while pending.length >= frame_sample_count
187
+ yield samples_from_floats(pending.shift(frame_sample_count), channels)
188
+ end
189
+ end
190
+
191
+ unless status.success?
192
+ raise Muze::AudioLoadError, "ffmpeg failed for #{path}: #{stderr.strip}"
193
+ end
194
+
195
+ raise Muze::AudioLoadError, "Decoded samples are not divisible by channels (#{pending.length} / #{channels})" unless (pending.length % channels).zero?
196
+
197
+ yield samples_from_floats(pending, channels) unless pending.empty?
198
+ end
199
+ private_class_method :stream_decoded_samples
200
+
201
+ def samples_from_floats(floats, channels)
202
+ return floats if channels == 1
203
+
204
+ floats.each_slice(channels).map(&:dup)
205
+ end
206
+ private_class_method :samples_from_floats
207
+
208
+ def seek_args(offset)
209
+ offset.positive? ? ["-ss", offset.to_s] : []
210
+ end
211
+ private_class_method :seek_args
212
+
213
+ def duration_args(duration)
214
+ duration ? ["-t", duration.to_s] : []
215
+ end
216
+ private_class_method :duration_args
217
+
218
+ def capture_with_timeout(*command)
219
+ Timeout.timeout(DEFAULT_TIMEOUT_SECONDS) { Open3.capture3(*command) }
220
+ rescue Timeout::Error
221
+ raise Muze::AudioLoadError, "#{command.first} timed out after #{DEFAULT_TIMEOUT_SECONDS}s"
222
+ end
223
+ private_class_method :capture_with_timeout
224
+
225
+ def stream_float32le_with_timeout(*command)
226
+ floats = block_given? ? nil : []
227
+ stderr_data = +""
228
+ status = nil
229
+ wait_thread = nil
230
+ reader_error = nil
231
+ leftover = +"".b
232
+
233
+ Timeout.timeout(DEFAULT_TIMEOUT_SECONDS) do
234
+ Open3.popen3(*command) do |stdin, stdout, stderr, process_wait|
235
+ wait_thread = process_wait
236
+ stdin.close
237
+ stdout.binmode
238
+ reader = Thread.new do
239
+ until stdout.eof?
240
+ data = leftover + stdout.readpartial(16 * 1024)
241
+ whole_bytes = data.bytesize - (data.bytesize % 4)
242
+ decoded = whole_bytes.positive? ? data.byteslice(0, whole_bytes).unpack("e*") : []
243
+ block_given? ? yield(decoded) : floats.concat(decoded)
244
+ leftover = data.byteslice(whole_bytes, data.bytesize - whole_bytes) || +"".b
245
+ end
246
+ rescue EOFError
247
+ nil
248
+ rescue StandardError => e
249
+ reader_error = e
250
+ end
251
+ stderr_reader = Thread.new { stderr_data << stderr.read.to_s }
252
+ reader.join
253
+ stderr_reader.join
254
+ raise reader_error if reader_error
255
+ raise Muze::AudioLoadError, "ffmpeg emitted a partial f32 sample" unless leftover.empty?
256
+
257
+ status = process_wait.value
258
+ end
259
+ end
260
+ [floats, stderr_data, status]
261
+ rescue Timeout::Error
262
+ terminate_process(wait_thread)
263
+ raise Muze::AudioLoadError, "#{command.first} timed out after #{DEFAULT_TIMEOUT_SECONDS}s"
264
+ end
265
+ private_class_method :stream_float32le_with_timeout
266
+
267
+ def terminate_process(wait_thread)
268
+ return unless wait_thread&.pid
269
+
270
+ Process.kill("TERM", wait_thread.pid)
271
+ wait_thread.join(0.2)
272
+ Process.kill("KILL", wait_thread.pid) if wait_thread.alive?
273
+ rescue Errno::ESRCH
274
+ nil
275
+ end
276
+ private_class_method :terminate_process
277
+
116
278
  # @param command [String]
117
279
  # @return [Boolean]
118
280
  def command_available?(command)
119
- system(command, "-version", out: File::NULL, err: File::NULL)
281
+ Timeout.timeout(5) { system(command, "-version", out: File::NULL, err: File::NULL) }
120
282
  rescue Errno::ENOENT
121
283
  false
284
+ rescue Timeout::Error
285
+ false
122
286
  end
123
287
  private_class_method :command_available?
124
288
  end
@@ -27,25 +27,132 @@ module Muze
27
27
  true
28
28
  end
29
29
 
30
+ # @return [Boolean]
31
+ def applies_time_window?
32
+ true
33
+ end
34
+
35
+ # @param extension [String]
36
+ # @return [String]
37
+ def installation_message(extension)
38
+ "Unable to load #{extension.delete_prefix('.')} because the WAV backend is unavailable."
39
+ end
40
+
30
41
  # @param path [String]
42
+ # @param offset [Float]
43
+ # @param duration [Float, nil]
31
44
  # @return [Array(Array<Float>, Integer, Integer)]
32
- def read(path)
33
- buffer = Wavify::Codecs::Wav.read(path)
34
- float_format = buffer.format.with(sample_format: :float, bit_depth: 32)
35
- converted = buffer.convert(float_format)
45
+ def read(path, offset: 0.0, duration: nil)
46
+ metadata = Wavify::Codecs::Wav.metadata(path)
47
+ source_format = metadata.fetch(:format)
48
+ float_format = source_format.with(sample_format: :float, bit_depth: 32)
49
+ samples = stream_samples(path, float_format:, offset:, duration:)
50
+
51
+ [samples_from_interleaved(samples, float_format.channels), float_format.sample_rate, float_format.channels]
52
+ end
53
+
54
+ # @yieldparam samples [Array<Float>, Array<Array<Float>>]
55
+ # @yieldparam sample_rate [Integer]
56
+ # @yieldparam channels [Integer]
57
+ # @return [void]
58
+ def read_stream(path, chunk_frames:, offset: 0.0, duration: nil)
59
+ metadata = Wavify::Codecs::Wav.metadata(path)
60
+ source_format = metadata.fetch(:format)
61
+ float_format = source_format.with(sample_format: :float, bit_depth: 32)
62
+
63
+ stream_sample_chunks(path, float_format:, offset:, duration:, chunk_frames:) do |samples|
64
+ yield samples_from_interleaved(samples, float_format.channels), float_format.sample_rate, float_format.channels
65
+ end
66
+ end
67
+
68
+ # @param path [String]
69
+ # @return [Hash]
70
+ def info(path)
71
+ metadata = Wavify::Codecs::Wav.metadata(path)
72
+ format = metadata.fetch(:format)
73
+ {
74
+ sample_rate: format.sample_rate,
75
+ channels: format.channels,
76
+ duration: metadata.fetch(:duration).total_seconds,
77
+ format: format_label(path)
78
+ }
79
+ end
80
+
81
+ # @return [Array<Float>]
82
+ def stream_samples(path, float_format:, offset:, duration:)
83
+ start_frame = (offset * float_format.sample_rate).floor
84
+ end_frame = duration ? start_frame + (duration * float_format.sample_rate).floor : nil
85
+ samples = []
86
+ current_frame = 0
87
+
88
+ Wavify::Codecs::Wav.stream_read(path) do |chunk|
89
+ converted = chunk.format == float_format ? chunk : chunk.convert(float_format)
90
+ chunk_frames = converted.sample_frame_count
91
+ chunk_start = current_frame
92
+ chunk_end = current_frame + chunk_frames
93
+ copy_start = [start_frame, chunk_start].max
94
+ copy_end = end_frame ? [end_frame, chunk_end].min : chunk_end
95
+
96
+ if copy_start < copy_end
97
+ local_start = copy_start - chunk_start
98
+ local_length = copy_end - copy_start
99
+ samples.concat(converted.slice(local_start, local_length).samples)
100
+ end
101
+
102
+ current_frame = chunk_end
103
+ break if end_frame && current_frame >= end_frame
104
+ end
105
+
106
+ samples
107
+ end
108
+ private_class_method :stream_samples
36
109
 
37
- samples = samples_from_buffer(converted)
38
- [samples, converted.format.sample_rate, converted.format.channels]
110
+ def stream_sample_chunks(path, float_format:, offset:, duration:, chunk_frames:)
111
+ start_frame = (offset * float_format.sample_rate).floor
112
+ end_frame = duration ? start_frame + (duration * float_format.sample_rate).floor : nil
113
+ current_frame = 0
114
+ chunk_samples = [chunk_frames * float_format.channels, 1].max
115
+ pending = []
116
+
117
+ Wavify::Codecs::Wav.stream_read(path) do |chunk|
118
+ converted = chunk.format == float_format ? chunk : chunk.convert(float_format)
119
+ chunk_frames_count = converted.sample_frame_count
120
+ chunk_start = current_frame
121
+ chunk_end = current_frame + chunk_frames_count
122
+ copy_start = [start_frame, chunk_start].max
123
+ copy_end = end_frame ? [end_frame, chunk_end].min : chunk_end
124
+
125
+ if copy_start < copy_end
126
+ local_start = copy_start - chunk_start
127
+ local_length = copy_end - copy_start
128
+ pending.concat(converted.slice(local_start, local_length).samples)
129
+ while pending.length >= chunk_samples
130
+ yield pending.shift(chunk_samples)
131
+ end
132
+ end
133
+
134
+ current_frame = chunk_end
135
+ break if end_frame && current_frame >= end_frame
136
+ end
137
+
138
+ yield pending unless pending.empty?
39
139
  end
140
+ private_class_method :stream_sample_chunks
40
141
 
41
- # @param buffer [Wavify::Core::SampleBuffer]
42
142
  # @return [Array<Float>, Array<Array<Float>>]
43
- def samples_from_buffer(buffer)
44
- return buffer.samples if buffer.format.channels == 1
143
+ def samples_from_interleaved(samples, channels)
144
+ return samples if channels == 1
145
+
146
+ samples.each_slice(channels).map(&:dup)
147
+ end
148
+ private_class_method :samples_from_interleaved
149
+
150
+ def format_label(source)
151
+ return File.extname(source).delete_prefix(".") if source.is_a?(String)
45
152
 
46
- buffer.samples.each_slice(buffer.format.channels).map(&:dup)
153
+ "wav"
47
154
  end
48
- private_class_method :samples_from_buffer
155
+ private_class_method :format_label
49
156
  end
50
157
  end
51
158
  end