muze 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -1
- data/README.md +5 -0
- data/Rakefile +3 -0
- data/ext/muze/muze_ext.c +129 -12
- data/lib/muze/beat/beat_track.rb +93 -11
- data/lib/muze/core/audio.rb +129 -0
- data/lib/muze/core/cache.rb +38 -0
- data/lib/muze/core/dct.rb +24 -21
- data/lib/muze/core/frames.rb +31 -0
- data/lib/muze/core/matrix.rb +23 -0
- data/lib/muze/core/resample.rb +111 -19
- data/lib/muze/core/stft.rb +312 -52
- data/lib/muze/core/windows.rb +113 -17
- data/lib/muze/display/specshow.rb +307 -41
- data/lib/muze/effects/harmonic_percussive.rb +83 -18
- data/lib/muze/effects/streaming.rb +101 -0
- data/lib/muze/effects/time_stretch.rb +353 -36
- data/lib/muze/feature/aggregation.rb +49 -0
- data/lib/muze/feature/chroma.rb +43 -15
- data/lib/muze/feature/context.rb +81 -0
- data/lib/muze/feature/mfcc.rb +78 -38
- data/lib/muze/feature/spectral.rb +258 -39
- data/lib/muze/filters/chroma_filter.rb +21 -2
- data/lib/muze/filters/mel.rb +47 -1
- data/lib/muze/io/audio_loader/ffmpeg_backend.rb +179 -15
- data/lib/muze/io/audio_loader/wavify_backend.rb +118 -11
- data/lib/muze/io/audio_loader.rb +178 -48
- data/lib/muze/io/audio_writer.rb +48 -0
- data/lib/muze/native.rb +91 -8
- data/lib/muze/onset/onset_detect.rb +114 -23
- data/lib/muze/version.rb +1 -1
- data/lib/muze.rb +237 -60
- metadata +11 -21
- data/benchmarks/baseline.json +0 -24
- data/benchmarks/native_vs_ruby.rb +0 -23
- data/benchmarks/quality_metrics.rb +0 -265
- data/benchmarks/quality_thresholds.md +0 -28
- data/benchmarks/support/fixture_library.rb +0 -107
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
module Core
|
|
5
|
+
# Small dense matrix helpers used by feature extractors.
|
|
6
|
+
module Matrix
|
|
7
|
+
module_function
|
|
8
|
+
|
|
9
|
+
def multiply(left, right)
|
|
10
|
+
left_matrix = Numo::SFloat.cast(left)
|
|
11
|
+
right_matrix = Numo::SFloat.cast(right)
|
|
12
|
+
left_matrix = left_matrix.expand_dims(1) if left_matrix.ndim == 1
|
|
13
|
+
right_matrix = right_matrix.expand_dims(1) if right_matrix.ndim == 1
|
|
14
|
+
|
|
15
|
+
_, left_cols = left_matrix.shape
|
|
16
|
+
right_rows, = right_matrix.shape
|
|
17
|
+
raise Muze::ParameterError, "Matrix dimensions do not align" unless left_cols == right_rows
|
|
18
|
+
|
|
19
|
+
left_matrix.dot(right_matrix).cast_to(Numo::SFloat)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
data/lib/muze/core/resample.rb
CHANGED
|
@@ -10,16 +10,32 @@ module Muze
|
|
|
10
10
|
# @param y [Numo::SFloat, Array<Float>] waveform signal
|
|
11
11
|
# @param orig_sr [Integer] source sampling rate
|
|
12
12
|
# @param target_sr [Integer] destination sampling rate
|
|
13
|
-
# @param res_type [Symbol] :linear or :
|
|
13
|
+
# @param res_type [Symbol] :nearest, :linear, :sinc, or :polyphase
|
|
14
|
+
# @param target_length [Integer, nil]
|
|
15
|
+
# @param taps [Integer]
|
|
16
|
+
# @param beta [Float]
|
|
17
|
+
# @param cutoff [Float, nil]
|
|
14
18
|
# @return [Numo::SFloat] resampled waveform
|
|
15
|
-
def resample(y, orig_sr:, target_sr:, res_type: :sinc)
|
|
19
|
+
def resample(y, orig_sr:, target_sr:, res_type: :sinc, target_length: nil, taps: 16, beta: 8.6, cutoff: nil)
|
|
16
20
|
validate_sample_rates!(orig_sr, target_sr)
|
|
17
|
-
|
|
18
|
-
|
|
21
|
+
validate_resample_options!(target_length:, taps:, beta:, cutoff:)
|
|
22
|
+
|
|
23
|
+
signal = Muze::Core::Audio.validate_audio!(y, allow_empty: true)
|
|
24
|
+
return signal if signal.empty?
|
|
25
|
+
|
|
26
|
+
if signal.ndim == 2
|
|
27
|
+
return resample_channels(signal, orig_sr:, target_sr:, res_type:, target_length:, taps:, beta:, cutoff:)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
source = signal.to_a
|
|
31
|
+
return adjust_length(source, target_length) if orig_sr == target_sr && target_length
|
|
32
|
+
return signal if orig_sr == target_sr
|
|
19
33
|
|
|
20
34
|
case res_type
|
|
21
|
-
when :
|
|
22
|
-
when :
|
|
35
|
+
when :nearest then nearest_resample(source, orig_sr, target_sr, target_length:)
|
|
36
|
+
when :linear then linear_resample(source, orig_sr, target_sr, target_length:)
|
|
37
|
+
when :sinc then sinc_resample(source, orig_sr, target_sr, target_length:, taps:, beta:, cutoff:)
|
|
38
|
+
when :polyphase then polyphase_resample(source, orig_sr, target_sr, target_length:, taps:, beta:, cutoff:)
|
|
23
39
|
else
|
|
24
40
|
raise Muze::ParameterError, "Unsupported res_type: #{res_type}"
|
|
25
41
|
end
|
|
@@ -32,17 +48,73 @@ module Muze
|
|
|
32
48
|
end
|
|
33
49
|
private_class_method :validate_sample_rates!
|
|
34
50
|
|
|
35
|
-
def
|
|
51
|
+
def validate_resample_options!(target_length:, taps:, beta:, cutoff:)
|
|
52
|
+
raise Muze::ParameterError, "target_length must be positive" if target_length && (!target_length.is_a?(Integer) || target_length <= 0)
|
|
53
|
+
raise Muze::ParameterError, "taps must be positive" unless taps.is_a?(Integer) && taps.positive?
|
|
54
|
+
raise Muze::ParameterError, "beta must be finite and non-negative" unless beta.respond_to?(:finite?) && beta.finite? && !beta.negative?
|
|
55
|
+
return if cutoff.nil? || (cutoff.respond_to?(:finite?) && cutoff.finite? && cutoff.positive? && cutoff <= 1.0)
|
|
56
|
+
|
|
57
|
+
raise Muze::ParameterError, "cutoff must be > 0 and <= 1"
|
|
58
|
+
end
|
|
59
|
+
private_class_method :validate_resample_options!
|
|
60
|
+
|
|
61
|
+
def resample_channels(signal, orig_sr:, target_sr:, res_type:, target_length:, taps:, beta:, cutoff:)
|
|
62
|
+
frames, channels = signal.shape
|
|
63
|
+
return signal if frames.zero? || channels.zero?
|
|
64
|
+
|
|
65
|
+
resampled_channels = channels.times.map do |channel_index|
|
|
66
|
+
resample(
|
|
67
|
+
signal[true, channel_index],
|
|
68
|
+
orig_sr:,
|
|
69
|
+
target_sr:,
|
|
70
|
+
res_type:,
|
|
71
|
+
target_length:,
|
|
72
|
+
taps:,
|
|
73
|
+
beta:,
|
|
74
|
+
cutoff:
|
|
75
|
+
).to_a
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
output_length = resampled_channels.first.length
|
|
79
|
+
output = Numo::SFloat.zeros(output_length, channels)
|
|
80
|
+
channels.times do |channel_index|
|
|
81
|
+
output[true, channel_index] = Numo::SFloat.cast(resampled_channels[channel_index])
|
|
82
|
+
end
|
|
83
|
+
output
|
|
84
|
+
end
|
|
85
|
+
private_class_method :resample_channels
|
|
86
|
+
|
|
87
|
+
def output_length(source_length, orig_sr, target_sr, target_length:)
|
|
88
|
+
target_length || [(source_length * target_sr.to_f / orig_sr).round, 1].max
|
|
89
|
+
end
|
|
90
|
+
private_class_method :output_length
|
|
91
|
+
|
|
92
|
+
def nearest_resample(signal, orig_sr, target_sr, target_length:)
|
|
36
93
|
source_length = signal.length
|
|
37
94
|
return Numo::SFloat.cast(signal) if source_length <= 1
|
|
38
95
|
|
|
39
|
-
|
|
40
|
-
|
|
96
|
+
target_size = output_length(source_length, orig_sr, target_sr, target_length:)
|
|
97
|
+
scale = source_length.to_f / target_size
|
|
98
|
+
output = Array.new(target_size) do |index|
|
|
99
|
+
source_index = [(index * scale).round, source_length - 1].min
|
|
100
|
+
signal[source_index]
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
Numo::SFloat.cast(output)
|
|
104
|
+
end
|
|
105
|
+
private_class_method :nearest_resample
|
|
106
|
+
|
|
107
|
+
def linear_resample(signal, orig_sr, target_sr, target_length:)
|
|
108
|
+
source_length = signal.length
|
|
109
|
+
return Numo::SFloat.cast(signal) if source_length <= 1
|
|
41
110
|
|
|
42
|
-
|
|
43
|
-
|
|
111
|
+
target_size = output_length(source_length, orig_sr, target_sr, target_length:)
|
|
112
|
+
return Numo::SFloat.cast(signal[0, target_size]) if target_size <= 1
|
|
44
113
|
|
|
45
|
-
|
|
114
|
+
scale = (source_length - 1).to_f / (target_size - 1)
|
|
115
|
+
output = Array.new(target_size, 0.0)
|
|
116
|
+
|
|
117
|
+
target_size.times do |index|
|
|
46
118
|
source_position = index * scale
|
|
47
119
|
left = source_position.floor
|
|
48
120
|
right = [left + 1, source_length - 1].min
|
|
@@ -54,17 +126,15 @@ module Muze
|
|
|
54
126
|
end
|
|
55
127
|
private_class_method :linear_resample
|
|
56
128
|
|
|
57
|
-
def sinc_resample(signal, orig_sr, target_sr)
|
|
129
|
+
def sinc_resample(signal, orig_sr, target_sr, target_length:, taps:, beta:, cutoff:)
|
|
58
130
|
ratio = target_sr.to_f / orig_sr
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
beta = 8.6
|
|
62
|
-
cutoff = [ratio, 1.0].min
|
|
131
|
+
target_size = output_length(signal.length, orig_sr, target_sr, target_length:)
|
|
132
|
+
cutoff ||= [ratio, 1.0].min
|
|
63
133
|
|
|
64
134
|
i0_beta = bessel_i0(beta)
|
|
65
|
-
output = Array.new(
|
|
135
|
+
output = Array.new(target_size, 0.0)
|
|
66
136
|
|
|
67
|
-
|
|
137
|
+
target_size.times do |index|
|
|
68
138
|
source_position = index / ratio
|
|
69
139
|
left = source_position.floor - taps + 1
|
|
70
140
|
right = source_position.floor + taps
|
|
@@ -92,6 +162,28 @@ module Muze
|
|
|
92
162
|
end
|
|
93
163
|
private_class_method :sinc_resample
|
|
94
164
|
|
|
165
|
+
def polyphase_resample(signal, orig_sr, target_sr, target_length:, taps:, beta:, cutoff:)
|
|
166
|
+
divisor = orig_sr.gcd(target_sr)
|
|
167
|
+
up = target_sr / divisor
|
|
168
|
+
down = orig_sr / divisor
|
|
169
|
+
return sinc_resample(signal, orig_sr, target_sr, target_length:, taps:, beta:, cutoff:) if up > 32 || down > 32
|
|
170
|
+
|
|
171
|
+
expanded = Array.new(signal.length * up, 0.0)
|
|
172
|
+
signal.each_with_index { |sample, index| expanded[index * up] = sample }
|
|
173
|
+
filtered = sinc_resample(expanded, orig_sr * up, orig_sr * up, target_length: expanded.length, taps:, beta:, cutoff: cutoff || (1.0 / [up, down].max))
|
|
174
|
+
decimated = filtered.to_a.each_slice(down).map(&:first)
|
|
175
|
+
adjust_length(decimated, target_length || output_length(signal.length, orig_sr, target_sr, target_length:))
|
|
176
|
+
end
|
|
177
|
+
private_class_method :polyphase_resample
|
|
178
|
+
|
|
179
|
+
def adjust_length(signal, target_length)
|
|
180
|
+
return Numo::SFloat.cast(signal) unless target_length
|
|
181
|
+
return Numo::SFloat.cast(signal[0, target_length]) if signal.length >= target_length
|
|
182
|
+
|
|
183
|
+
Numo::SFloat.cast(signal + Array.new(target_length - signal.length, 0.0))
|
|
184
|
+
end
|
|
185
|
+
private_class_method :adjust_length
|
|
186
|
+
|
|
95
187
|
def sinc(value)
|
|
96
188
|
return 1.0 if value.abs < EPSILON
|
|
97
189
|
|