muze 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Usage: bundle exec ruby examples/feature_report.rb path/to/audio.wav
5
+
6
+ require_relative "../lib/muze"
7
+
8
+ input_path = ARGV[0]
9
+ abort("Usage: bundle exec ruby examples/feature_report.rb path/to/audio.wav") unless input_path
10
+
11
+ target_sr = 22_050
12
+ n_fft = 1024
13
+ hop_length = 256
14
+ n_mfcc = 13
15
+ n_mels = 40
16
+
17
+ y, sr = Muze.load(input_path, sr: target_sr, mono: true)
18
+ mfcc = Muze.mfcc(y:, sr:, n_mfcc:, n_fft:, hop_length:, n_mels:)
19
+ delta = Muze.delta(mfcc, order: 1, width: 9)
20
+ centroid = Muze.spectral_centroid(y:, sr:, n_fft:, hop_length:)
21
+ bandwidth = Muze.spectral_bandwidth(y:, sr:, n_fft:, hop_length:)
22
+ rolloff = Muze.spectral_rolloff(y:, sr:, n_fft:, hop_length:)
23
+ flatness = Muze.spectral_flatness(y:, n_fft:, hop_length:)
24
+ zcr = Muze.zero_crossing_rate(y, frame_length: n_fft, hop_length:)
25
+ rms = Muze.rms(y:, frame_length: n_fft, hop_length:)
26
+
27
+ puts "Input: #{input_path}"
28
+ puts "Sample rate: #{sr} Hz"
29
+ puts format("Duration: %.2f s", y.size.to_f / sr)
30
+ puts "MFCC shape: #{mfcc.shape.join(' x ')}"
31
+ puts "Delta shape: #{delta.shape.join(' x ')}"
32
+ puts format("Mean spectral centroid: %.2f Hz", centroid.mean.to_f)
33
+ puts format("Mean spectral bandwidth: %.2f Hz", bandwidth.mean.to_f)
34
+ puts format("Mean spectral rolloff: %.2f Hz", rolloff.mean.to_f)
35
+ puts format("Mean spectral flatness: %.4f", flatness.mean.to_f)
36
+ puts format("Mean zero-crossing rate: %.4f", zcr.mean.to_f)
37
+ puts format("Mean RMS: %.4f", rms.mean.to_f)
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Usage: bundle exec ruby examples/hpss_demo.rb path/to/audio.wav [output_prefix]
5
+
6
+ require "fileutils"
7
+ require_relative "../lib/muze"
8
+
9
+ def render_mel(signal, sr:, hop_length:, output_path:)
10
+ mel = Muze.melspectrogram(y: signal, sr:, n_fft: 2048, hop_length:, n_mels: 128)
11
+ Muze.specshow(Muze.power_to_db(mel), sr:, hop_length:, y_axis: :mel, output: output_path)
12
+ end
13
+
14
+ input_path = ARGV[0]
15
+ abort("Usage: bundle exec ruby examples/hpss_demo.rb path/to/audio.wav [output_prefix]") unless input_path
16
+
17
+ sample_name = File.basename(input_path, ".*")
18
+ output_prefix = ARGV[1] || File.expand_path("output/#{sample_name}", __dir__)
19
+ FileUtils.mkdir_p(File.dirname(output_prefix))
20
+
21
+ target_sr = 22_050
22
+ hop_length = 512
23
+
24
+ y, sr = Muze.load(input_path, sr: target_sr, mono: true)
25
+ harmonic, percussive = Muze.hpss(y, kernel_size: 31, n_fft: 2048, hop_length:)
26
+
27
+ harmonic_wave_path = "#{output_prefix}_harmonic_wave.svg"
28
+ percussive_wave_path = "#{output_prefix}_percussive_wave.svg"
29
+ harmonic_mel_path = "#{output_prefix}_harmonic_mel.svg"
30
+ percussive_mel_path = "#{output_prefix}_percussive_mel.svg"
31
+
32
+ Muze.waveshow(harmonic, sr:, output: harmonic_wave_path)
33
+ Muze.waveshow(percussive, sr:, output: percussive_wave_path)
34
+ render_mel(harmonic, sr:, hop_length:, output_path: harmonic_mel_path)
35
+ render_mel(percussive, sr:, hop_length:, output_path: percussive_mel_path)
36
+
37
+ harmonic_rms = Muze.rms(y: harmonic, frame_length: 2048, hop_length:).mean.to_f
38
+ percussive_rms = Muze.rms(y: percussive, frame_length: 2048, hop_length:).mean.to_f
39
+
40
+ puts "Input: #{input_path}"
41
+ puts format("Harmonic RMS: %.4f", harmonic_rms)
42
+ puts format("Percussive RMS: %.4f", percussive_rms)
43
+ puts "Wrote: #{harmonic_wave_path}"
44
+ puts "Wrote: #{percussive_wave_path}"
45
+ puts "Wrote: #{harmonic_mel_path}"
46
+ puts "Wrote: #{percussive_mel_path}"
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Usage: bundle exec ruby examples/load_and_specshow.rb path/to/audio.wav [output.svg]
5
+
6
+ require "fileutils"
7
+ require_relative "../lib/muze"
8
+
9
+ input_path = ARGV[0]
10
+ abort("Usage: bundle exec ruby examples/load_and_specshow.rb path/to/audio.wav [output.svg]") unless input_path
11
+
12
+ sample_name = File.basename(input_path, ".*")
13
+ output_path = ARGV[1] || File.expand_path("output/#{sample_name}_mel_spectrogram.svg", __dir__)
14
+ FileUtils.mkdir_p(File.dirname(output_path))
15
+
16
+ target_sr = 22_050
17
+ hop_length = 512
18
+ n_fft = 2048
19
+ n_mels = 128
20
+
21
+ y, sr = Muze.load(input_path, sr: target_sr, mono: true)
22
+ mel = Muze.melspectrogram(y:, sr:, n_fft:, hop_length:, n_mels:)
23
+ mel_db = Muze.power_to_db(mel)
24
+ Muze.specshow(mel_db, sr:, hop_length:, y_axis: :mel, output: output_path)
25
+
26
+ puts "Input: #{input_path}"
27
+ puts "Sample rate: #{sr} Hz"
28
+ puts format("Duration: %.2f s", y.size.to_f / sr)
29
+ puts "Mel shape: #{mel.shape.join(' x ')}"
30
+ puts "Wrote: #{output_path}"
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+
5
+ extension_name = "muze/muze_ext"
6
+ create_makefile(extension_name)
@@ -0,0 +1,75 @@
1
+ #include "ruby.h"
2
+
3
+ static VALUE mMuze;
4
+ static VALUE mNative;
5
+
6
+ static VALUE native_frame_slices(VALUE self, VALUE rb_signal, VALUE rb_frame_length, VALUE rb_hop_length) {
7
+ Check_Type(rb_signal, T_ARRAY);
8
+ const long signal_length = RARRAY_LEN(rb_signal);
9
+ const long frame_length = NUM2LONG(rb_frame_length);
10
+ const long hop_length = NUM2LONG(rb_hop_length);
11
+
12
+ if (frame_length <= 0 || hop_length <= 0) {
13
+ rb_raise(rb_eArgError, "frame_length and hop_length must be positive");
14
+ }
15
+
16
+ if (signal_length <= frame_length) {
17
+ VALUE frame = rb_ary_new2(frame_length);
18
+ for (long i = 0; i < frame_length; i++) {
19
+ VALUE sample = i < signal_length ? rb_ary_entry(rb_signal, i) : DBL2NUM(0.0);
20
+ rb_ary_push(frame, sample);
21
+ }
22
+ VALUE single = rb_ary_new();
23
+ rb_ary_push(single, frame);
24
+ return single;
25
+ }
26
+
27
+ const long frame_count = ((signal_length - frame_length) / hop_length) + 1;
28
+ VALUE frames = rb_ary_new2(frame_count);
29
+
30
+ for (long frame_idx = 0; frame_idx < frame_count; frame_idx++) {
31
+ long start = frame_idx * hop_length;
32
+ VALUE frame = rb_ary_new2(frame_length);
33
+
34
+ for (long i = 0; i < frame_length; i++) {
35
+ rb_ary_push(frame, rb_ary_entry(rb_signal, start + i));
36
+ }
37
+
38
+ rb_ary_push(frames, frame);
39
+ }
40
+
41
+ return frames;
42
+ }
43
+
44
+ static int cmp_double(const void *a, const void *b) {
45
+ const double left = *(const double *)a;
46
+ const double right = *(const double *)b;
47
+ if (left < right) return -1;
48
+ if (left > right) return 1;
49
+ return 0;
50
+ }
51
+
52
+ static VALUE native_median1d(VALUE self, VALUE rb_values) {
53
+ Check_Type(rb_values, T_ARRAY);
54
+ const long count = RARRAY_LEN(rb_values);
55
+ if (count == 0) return DBL2NUM(0.0);
56
+
57
+ double *values = ALLOC_N(double, count);
58
+
59
+ for (long i = 0; i < count; i++) {
60
+ values[i] = NUM2DBL(rb_ary_entry(rb_values, i));
61
+ }
62
+
63
+ qsort(values, count, sizeof(double), cmp_double);
64
+ const double median = values[count / 2];
65
+ xfree(values);
66
+ return DBL2NUM(median);
67
+ }
68
+
69
+ void Init_muze_ext(void) {
70
+ mMuze = rb_define_module("Muze");
71
+ mNative = rb_define_module_under(mMuze, "Native");
72
+
73
+ rb_define_singleton_method(mNative, "frame_slices", native_frame_slices, 3);
74
+ rb_define_singleton_method(mNative, "median1d", native_median1d, 1);
75
+ }
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Muze
4
+ # Beat tracking functions.
5
+ module Beat
6
+ module_function
7
+
8
+ # @param y [Numo::SFloat, Array<Float>, nil]
9
+ # @param sr [Integer]
10
+ # @param onset_envelope [Numo::SFloat, Array<Float>, nil]
11
+ # @param hop_length [Integer]
12
+ # @param start_bpm [Float]
13
+ # @param tightness [Integer]
14
+ # @return [Array(Float, Array<Integer>)] estimated tempo and beat frames
15
+ def beat_track(y: nil, sr: 22_050, onset_envelope: nil, hop_length: 512, start_bpm: 120.0, tightness: 100)
16
+ envelope = if onset_envelope
17
+ onset_envelope.is_a?(Numo::NArray) ? onset_envelope.to_a : Array(onset_envelope)
18
+ else
19
+ Muze::Onset.onset_strength(y:, sr:, hop_length:).to_a
20
+ end
21
+
22
+ tempo = estimate_tempo(envelope, sr:, hop_length:, start_bpm:)
23
+ beats = track_beats(envelope, tempo:, sr:, hop_length:, tightness:)
24
+ [tempo, beats]
25
+ end
26
+
27
+ # @param y [Numo::SFloat, Array<Float>, nil]
28
+ # @param onset_envelope [Numo::SFloat, Array<Float>, nil]
29
+ # @param sr [Integer]
30
+ # @param hop_length [Integer]
31
+ # @param win_length [Integer]
32
+ # @return [Numo::SFloat]
33
+ def tempogram(y: nil, onset_envelope: nil, sr: 22_050, hop_length: 512, win_length: 384)
34
+ Muze::Feature.tempogram(y:, onset_envelope:, sr:, hop_length:, win_length:)
35
+ end
36
+
37
+ def estimate_tempo(envelope, sr:, hop_length:, start_bpm:)
38
+ return start_bpm if envelope.length < 4
39
+
40
+ min_bpm = 30.0
41
+ max_bpm = 240.0
42
+ min_lag = [(sr * 60.0 / (hop_length * max_bpm)).round, 1].max
43
+ max_lag = [(sr * 60.0 / (hop_length * min_bpm)).round, envelope.length - 1].min
44
+ return start_bpm if min_lag >= max_lag
45
+
46
+ best_lag = min_lag
47
+ best_score = -Float::INFINITY
48
+
49
+ (min_lag..max_lag).each do |lag|
50
+ score = 0.0
51
+ (lag...envelope.length).each { |index| score += envelope[index] * envelope[index - lag] }
52
+ next unless score > best_score
53
+
54
+ best_score = score
55
+ best_lag = lag
56
+ end
57
+
58
+ 60.0 * sr / (hop_length * best_lag)
59
+ end
60
+ private_class_method :estimate_tempo
61
+
62
+ def track_beats(envelope, tempo:, sr:, hop_length:, tightness:)
63
+ interval = [(60.0 * sr / (tempo * hop_length)).round, 1].max
64
+ peaks = Muze::Onset.onset_detect(onset_envelope: envelope, backtrack: false)
65
+ return [] if peaks.empty?
66
+
67
+ beats = [peaks.first]
68
+ target = peaks.first + interval
69
+
70
+ while target < envelope.length
71
+ candidates = peaks.select { |peak| (peak - target).abs <= search_radius(interval, tightness) }
72
+ beats << select_beat_candidate(candidates, target:, interval:, envelope:, tightness:)
73
+ target += interval
74
+ end
75
+
76
+ beats.uniq
77
+ end
78
+ private_class_method :track_beats
79
+
80
+ def search_radius(interval, tightness)
81
+ normalized = normalized_tightness(tightness)
82
+ radius_scale = 1.0 - (0.4 * normalized)
83
+ [(interval * radius_scale).round, 1].max
84
+ end
85
+ private_class_method :search_radius
86
+
87
+ def select_beat_candidate(candidates, target:, interval:, envelope:, tightness:)
88
+ return target unless candidates.any?
89
+
90
+ penalty_weight = 1.0 + (4.0 * normalized_tightness(tightness))
91
+ candidates.max_by do |candidate|
92
+ strength = envelope[candidate] || 0.0
93
+ normalized_distance = (candidate - target).abs / interval.to_f
94
+ strength - (penalty_weight * normalized_distance)
95
+ end
96
+ end
97
+ private_class_method :select_beat_candidate
98
+
99
+ def normalized_tightness(tightness)
100
+ value = tightness.to_f
101
+ return 0.0 if value <= 0.0
102
+
103
+ [value / 100.0, 4.0].min / 4.0
104
+ end
105
+ private_class_method :normalized_tightness
106
+ end
107
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Muze
4
+ module Core
5
+ # DCT utilities.
6
+ module DCT
7
+ module_function
8
+
9
+ # @param x [Numo::NArray]
10
+ # @param type [Integer]
11
+ # @param n [Integer, nil]
12
+ # @param axis [Integer]
13
+ # @param norm [Symbol, nil]
14
+ # @return [Numo::SFloat]
15
+ def dct(x, type: 2, n: nil, axis: 0, norm: :ortho)
16
+ raise Muze::ParameterError, "only DCT type 2 is supported" unless type == 2
17
+ raise Muze::ParameterError, "axis must be 0 or 1" unless [0, 1].include?(axis)
18
+
19
+ matrix = Numo::SFloat.cast(x)
20
+ matrix = matrix.expand_dims(1) if matrix.ndim == 1
21
+ matrix = matrix.transpose if axis == 1
22
+
23
+ rows, cols = matrix.shape
24
+ target_length = n || rows
25
+ result = Numo::SFloat.zeros(target_length, cols)
26
+
27
+ cols.times do |col|
28
+ signal = matrix[true, col].to_a
29
+ transformed = dct_vector(signal, target_length, norm:)
30
+ target_length.times { |idx| result[idx, col] = transformed[idx] }
31
+ end
32
+
33
+ axis == 1 ? result.transpose : result
34
+ end
35
+
36
+ def dct_vector(signal, n, norm:)
37
+ padded = if signal.length >= n
38
+ signal[0, n]
39
+ else
40
+ signal + Array.new(n - signal.length, 0.0)
41
+ end
42
+
43
+ Array.new(n) do |k|
44
+ sum = 0.0
45
+ n.times do |idx|
46
+ sum += padded[idx] * Math.cos(Math::PI * (idx + 0.5) * k / n)
47
+ end
48
+
49
+ normalize_dct(sum, k, n, norm)
50
+ end
51
+ end
52
+ private_class_method :dct_vector
53
+
54
+ def normalize_dct(value, index, length, norm)
55
+ return value * 2.0 unless norm == :ortho
56
+
57
+ scale = index.zero? ? Math.sqrt(1.0 / length) : Math.sqrt(2.0 / length)
58
+ value * scale
59
+ end
60
+ private_class_method :normalize_dct
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Muze
4
+ module Core
5
+ # Resampling utilities.
6
+ module Resample
7
+ EPSILON = 1.0e-12
8
+ module_function
9
+
10
+ # @param y [Numo::SFloat, Array<Float>] waveform signal
11
+ # @param orig_sr [Integer] source sampling rate
12
+ # @param target_sr [Integer] destination sampling rate
13
+ # @param res_type [Symbol] :linear or :sinc
14
+ # @return [Numo::SFloat] resampled waveform
15
+ def resample(y, orig_sr:, target_sr:, res_type: :sinc)
16
+ validate_sample_rates!(orig_sr, target_sr)
17
+ signal = y.is_a?(Numo::NArray) ? y.to_a : Array(y)
18
+ return Numo::SFloat.cast(signal) if signal.empty? || orig_sr == target_sr
19
+
20
+ case res_type
21
+ when :linear then linear_resample(signal, orig_sr, target_sr)
22
+ when :sinc then sinc_resample(signal, orig_sr, target_sr)
23
+ else
24
+ raise Muze::ParameterError, "Unsupported res_type: #{res_type}"
25
+ end
26
+ end
27
+
28
+ def validate_sample_rates!(orig_sr, target_sr)
29
+ return if orig_sr.is_a?(Integer) && target_sr.is_a?(Integer) && orig_sr.positive? && target_sr.positive?
30
+
31
+ raise Muze::ParameterError, "orig_sr and target_sr must be positive integers"
32
+ end
33
+ private_class_method :validate_sample_rates!
34
+
35
+ def linear_resample(signal, orig_sr, target_sr)
36
+ source_length = signal.length
37
+ return Numo::SFloat.cast(signal) if source_length <= 1
38
+
39
+ target_length = [(source_length * target_sr.to_f / orig_sr).round, 1].max
40
+ return Numo::SFloat.cast(signal[0, target_length]) if target_length <= 1
41
+
42
+ scale = (source_length - 1).to_f / (target_length - 1)
43
+ output = Array.new(target_length, 0.0)
44
+
45
+ target_length.times do |index|
46
+ source_position = index * scale
47
+ left = source_position.floor
48
+ right = [left + 1, source_length - 1].min
49
+ alpha = source_position - left
50
+ output[index] = ((1.0 - alpha) * signal[left]) + (alpha * signal[right])
51
+ end
52
+
53
+ Numo::SFloat.cast(output)
54
+ end
55
+ private_class_method :linear_resample
56
+
57
+ def sinc_resample(signal, orig_sr, target_sr)
58
+ ratio = target_sr.to_f / orig_sr
59
+ target_length = [(signal.length * ratio).round, 1].max
60
+ taps = 16
61
+ beta = 8.6
62
+ cutoff = [ratio, 1.0].min
63
+
64
+ i0_beta = bessel_i0(beta)
65
+ output = Array.new(target_length, 0.0)
66
+
67
+ target_length.times do |index|
68
+ source_position = index / ratio
69
+ left = source_position.floor - taps + 1
70
+ right = source_position.floor + taps
71
+
72
+ sum = 0.0
73
+ weight_sum = 0.0
74
+
75
+ (left..right).each do |sample_index|
76
+ next if sample_index.negative? || sample_index >= signal.length
77
+
78
+ distance = source_position - sample_index
79
+ normalized = distance / taps.to_f
80
+ next if normalized.abs > 1.0
81
+
82
+ window = bessel_i0(beta * Math.sqrt(1.0 - (normalized * normalized))) / i0_beta
83
+ weight = cutoff * sinc(cutoff * distance) * window
84
+ sum += signal[sample_index] * weight
85
+ weight_sum += weight
86
+ end
87
+
88
+ output[index] = weight_sum.abs > EPSILON ? (sum / weight_sum) : 0.0
89
+ end
90
+
91
+ Numo::SFloat.cast(output)
92
+ end
93
+ private_class_method :sinc_resample
94
+
95
+ def sinc(value)
96
+ return 1.0 if value.abs < EPSILON
97
+
98
+ x = Math::PI * value
99
+ Math.sin(x) / x
100
+ end
101
+ private_class_method :sinc
102
+
103
+ # Approximation of modified Bessel function I0.
104
+ def bessel_i0(value)
105
+ sum = 1.0
106
+ term = 1.0
107
+ k = 1
108
+
109
+ loop do
110
+ term *= ((value / 2.0)**2) / (k * k)
111
+ sum += term
112
+ break if term < 1.0e-12
113
+
114
+ k += 1
115
+ end
116
+
117
+ sum
118
+ end
119
+ private_class_method :bessel_i0
120
+ end
121
+ end
122
+ end