muze 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +4 -0
- data/CHANGELOG.md +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +93 -0
- data/Rakefile +30 -0
- data/benchmarks/baseline.json +24 -0
- data/benchmarks/native_vs_ruby.rb +23 -0
- data/benchmarks/quality_metrics.rb +265 -0
- data/benchmarks/quality_thresholds.md +28 -0
- data/benchmarks/support/fixture_library.rb +107 -0
- data/examples/beat_tracking.rb +26 -0
- data/examples/chroma_svg.rb +33 -0
- data/examples/feature_report.rb +37 -0
- data/examples/hpss_demo.rb +46 -0
- data/examples/load_and_specshow.rb +30 -0
- data/ext/muze/extconf.rb +6 -0
- data/ext/muze/muze_ext.c +75 -0
- data/lib/muze/beat/beat_track.rb +107 -0
- data/lib/muze/core/dct.rb +63 -0
- data/lib/muze/core/resample.rb +122 -0
- data/lib/muze/core/stft.rb +231 -0
- data/lib/muze/core/windows.rb +69 -0
- data/lib/muze/display/specshow.rb +100 -0
- data/lib/muze/effects/harmonic_percussive.rb +62 -0
- data/lib/muze/effects/time_stretch.rb +171 -0
- data/lib/muze/errors.rb +18 -0
- data/lib/muze/feature/chroma.rb +68 -0
- data/lib/muze/feature/mfcc.rb +120 -0
- data/lib/muze/feature/spectral.rb +266 -0
- data/lib/muze/filters/chroma_filter.rb +54 -0
- data/lib/muze/filters/mel.rb +91 -0
- data/lib/muze/io/audio_loader/ffmpeg_backend.rb +127 -0
- data/lib/muze/io/audio_loader/wavify_backend.rb +52 -0
- data/lib/muze/io/audio_loader.rb +117 -0
- data/lib/muze/native.rb +45 -0
- data/lib/muze/onset/onset_detect.rb +97 -0
- data/lib/muze/version.rb +5 -0
- data/lib/muze.rb +251 -0
- metadata +132 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Usage: bundle exec ruby examples/feature_report.rb path/to/audio.wav
|
|
5
|
+
|
|
6
|
+
require_relative "../lib/muze"
|
|
7
|
+
|
|
8
|
+
input_path = ARGV[0]
|
|
9
|
+
abort("Usage: bundle exec ruby examples/feature_report.rb path/to/audio.wav") unless input_path
|
|
10
|
+
|
|
11
|
+
target_sr = 22_050
|
|
12
|
+
n_fft = 1024
|
|
13
|
+
hop_length = 256
|
|
14
|
+
n_mfcc = 13
|
|
15
|
+
n_mels = 40
|
|
16
|
+
|
|
17
|
+
y, sr = Muze.load(input_path, sr: target_sr, mono: true)
|
|
18
|
+
mfcc = Muze.mfcc(y:, sr:, n_mfcc:, n_fft:, hop_length:, n_mels:)
|
|
19
|
+
delta = Muze.delta(mfcc, order: 1, width: 9)
|
|
20
|
+
centroid = Muze.spectral_centroid(y:, sr:, n_fft:, hop_length:)
|
|
21
|
+
bandwidth = Muze.spectral_bandwidth(y:, sr:, n_fft:, hop_length:)
|
|
22
|
+
rolloff = Muze.spectral_rolloff(y:, sr:, n_fft:, hop_length:)
|
|
23
|
+
flatness = Muze.spectral_flatness(y:, n_fft:, hop_length:)
|
|
24
|
+
zcr = Muze.zero_crossing_rate(y, frame_length: n_fft, hop_length:)
|
|
25
|
+
rms = Muze.rms(y:, frame_length: n_fft, hop_length:)
|
|
26
|
+
|
|
27
|
+
puts "Input: #{input_path}"
|
|
28
|
+
puts "Sample rate: #{sr} Hz"
|
|
29
|
+
puts format("Duration: %.2f s", y.size.to_f / sr)
|
|
30
|
+
puts "MFCC shape: #{mfcc.shape.join(' x ')}"
|
|
31
|
+
puts "Delta shape: #{delta.shape.join(' x ')}"
|
|
32
|
+
puts format("Mean spectral centroid: %.2f Hz", centroid.mean.to_f)
|
|
33
|
+
puts format("Mean spectral bandwidth: %.2f Hz", bandwidth.mean.to_f)
|
|
34
|
+
puts format("Mean spectral rolloff: %.2f Hz", rolloff.mean.to_f)
|
|
35
|
+
puts format("Mean spectral flatness: %.4f", flatness.mean.to_f)
|
|
36
|
+
puts format("Mean zero-crossing rate: %.4f", zcr.mean.to_f)
|
|
37
|
+
puts format("Mean RMS: %.4f", rms.mean.to_f)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Usage: bundle exec ruby examples/hpss_demo.rb path/to/audio.wav [output_prefix]
|
|
5
|
+
|
|
6
|
+
require "fileutils"
|
|
7
|
+
require_relative "../lib/muze"
|
|
8
|
+
|
|
9
|
+
def render_mel(signal, sr:, hop_length:, output_path:)
|
|
10
|
+
mel = Muze.melspectrogram(y: signal, sr:, n_fft: 2048, hop_length:, n_mels: 128)
|
|
11
|
+
Muze.specshow(Muze.power_to_db(mel), sr:, hop_length:, y_axis: :mel, output: output_path)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
input_path = ARGV[0]
|
|
15
|
+
abort("Usage: bundle exec ruby examples/hpss_demo.rb path/to/audio.wav [output_prefix]") unless input_path
|
|
16
|
+
|
|
17
|
+
sample_name = File.basename(input_path, ".*")
|
|
18
|
+
output_prefix = ARGV[1] || File.expand_path("output/#{sample_name}", __dir__)
|
|
19
|
+
FileUtils.mkdir_p(File.dirname(output_prefix))
|
|
20
|
+
|
|
21
|
+
target_sr = 22_050
|
|
22
|
+
hop_length = 512
|
|
23
|
+
|
|
24
|
+
y, sr = Muze.load(input_path, sr: target_sr, mono: true)
|
|
25
|
+
harmonic, percussive = Muze.hpss(y, kernel_size: 31, n_fft: 2048, hop_length:)
|
|
26
|
+
|
|
27
|
+
harmonic_wave_path = "#{output_prefix}_harmonic_wave.svg"
|
|
28
|
+
percussive_wave_path = "#{output_prefix}_percussive_wave.svg"
|
|
29
|
+
harmonic_mel_path = "#{output_prefix}_harmonic_mel.svg"
|
|
30
|
+
percussive_mel_path = "#{output_prefix}_percussive_mel.svg"
|
|
31
|
+
|
|
32
|
+
Muze.waveshow(harmonic, sr:, output: harmonic_wave_path)
|
|
33
|
+
Muze.waveshow(percussive, sr:, output: percussive_wave_path)
|
|
34
|
+
render_mel(harmonic, sr:, hop_length:, output_path: harmonic_mel_path)
|
|
35
|
+
render_mel(percussive, sr:, hop_length:, output_path: percussive_mel_path)
|
|
36
|
+
|
|
37
|
+
harmonic_rms = Muze.rms(y: harmonic, frame_length: 2048, hop_length:).mean.to_f
|
|
38
|
+
percussive_rms = Muze.rms(y: percussive, frame_length: 2048, hop_length:).mean.to_f
|
|
39
|
+
|
|
40
|
+
puts "Input: #{input_path}"
|
|
41
|
+
puts format("Harmonic RMS: %.4f", harmonic_rms)
|
|
42
|
+
puts format("Percussive RMS: %.4f", percussive_rms)
|
|
43
|
+
puts "Wrote: #{harmonic_wave_path}"
|
|
44
|
+
puts "Wrote: #{percussive_wave_path}"
|
|
45
|
+
puts "Wrote: #{harmonic_mel_path}"
|
|
46
|
+
puts "Wrote: #{percussive_mel_path}"
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Usage: bundle exec ruby examples/load_and_specshow.rb path/to/audio.wav [output.svg]
|
|
5
|
+
|
|
6
|
+
require "fileutils"
|
|
7
|
+
require_relative "../lib/muze"
|
|
8
|
+
|
|
9
|
+
input_path = ARGV[0]
|
|
10
|
+
abort("Usage: bundle exec ruby examples/load_and_specshow.rb path/to/audio.wav [output.svg]") unless input_path
|
|
11
|
+
|
|
12
|
+
sample_name = File.basename(input_path, ".*")
|
|
13
|
+
output_path = ARGV[1] || File.expand_path("output/#{sample_name}_mel_spectrogram.svg", __dir__)
|
|
14
|
+
FileUtils.mkdir_p(File.dirname(output_path))
|
|
15
|
+
|
|
16
|
+
target_sr = 22_050
|
|
17
|
+
hop_length = 512
|
|
18
|
+
n_fft = 2048
|
|
19
|
+
n_mels = 128
|
|
20
|
+
|
|
21
|
+
y, sr = Muze.load(input_path, sr: target_sr, mono: true)
|
|
22
|
+
mel = Muze.melspectrogram(y:, sr:, n_fft:, hop_length:, n_mels:)
|
|
23
|
+
mel_db = Muze.power_to_db(mel)
|
|
24
|
+
Muze.specshow(mel_db, sr:, hop_length:, y_axis: :mel, output: output_path)
|
|
25
|
+
|
|
26
|
+
puts "Input: #{input_path}"
|
|
27
|
+
puts "Sample rate: #{sr} Hz"
|
|
28
|
+
puts format("Duration: %.2f s", y.size.to_f / sr)
|
|
29
|
+
puts "Mel shape: #{mel.shape.join(' x ')}"
|
|
30
|
+
puts "Wrote: #{output_path}"
|
data/ext/muze/extconf.rb
ADDED
data/ext/muze/muze_ext.c
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
#include "ruby.h"
|
|
2
|
+
|
|
3
|
+
static VALUE mMuze;
|
|
4
|
+
static VALUE mNative;
|
|
5
|
+
|
|
6
|
+
static VALUE native_frame_slices(VALUE self, VALUE rb_signal, VALUE rb_frame_length, VALUE rb_hop_length) {
|
|
7
|
+
Check_Type(rb_signal, T_ARRAY);
|
|
8
|
+
const long signal_length = RARRAY_LEN(rb_signal);
|
|
9
|
+
const long frame_length = NUM2LONG(rb_frame_length);
|
|
10
|
+
const long hop_length = NUM2LONG(rb_hop_length);
|
|
11
|
+
|
|
12
|
+
if (frame_length <= 0 || hop_length <= 0) {
|
|
13
|
+
rb_raise(rb_eArgError, "frame_length and hop_length must be positive");
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
if (signal_length <= frame_length) {
|
|
17
|
+
VALUE frame = rb_ary_new2(frame_length);
|
|
18
|
+
for (long i = 0; i < frame_length; i++) {
|
|
19
|
+
VALUE sample = i < signal_length ? rb_ary_entry(rb_signal, i) : DBL2NUM(0.0);
|
|
20
|
+
rb_ary_push(frame, sample);
|
|
21
|
+
}
|
|
22
|
+
VALUE single = rb_ary_new();
|
|
23
|
+
rb_ary_push(single, frame);
|
|
24
|
+
return single;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const long frame_count = ((signal_length - frame_length) / hop_length) + 1;
|
|
28
|
+
VALUE frames = rb_ary_new2(frame_count);
|
|
29
|
+
|
|
30
|
+
for (long frame_idx = 0; frame_idx < frame_count; frame_idx++) {
|
|
31
|
+
long start = frame_idx * hop_length;
|
|
32
|
+
VALUE frame = rb_ary_new2(frame_length);
|
|
33
|
+
|
|
34
|
+
for (long i = 0; i < frame_length; i++) {
|
|
35
|
+
rb_ary_push(frame, rb_ary_entry(rb_signal, start + i));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
rb_ary_push(frames, frame);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return frames;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
static int cmp_double(const void *a, const void *b) {
|
|
45
|
+
const double left = *(const double *)a;
|
|
46
|
+
const double right = *(const double *)b;
|
|
47
|
+
if (left < right) return -1;
|
|
48
|
+
if (left > right) return 1;
|
|
49
|
+
return 0;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
static VALUE native_median1d(VALUE self, VALUE rb_values) {
|
|
53
|
+
Check_Type(rb_values, T_ARRAY);
|
|
54
|
+
const long count = RARRAY_LEN(rb_values);
|
|
55
|
+
if (count == 0) return DBL2NUM(0.0);
|
|
56
|
+
|
|
57
|
+
double *values = ALLOC_N(double, count);
|
|
58
|
+
|
|
59
|
+
for (long i = 0; i < count; i++) {
|
|
60
|
+
values[i] = NUM2DBL(rb_ary_entry(rb_values, i));
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
qsort(values, count, sizeof(double), cmp_double);
|
|
64
|
+
const double median = values[count / 2];
|
|
65
|
+
xfree(values);
|
|
66
|
+
return DBL2NUM(median);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
void Init_muze_ext(void) {
|
|
70
|
+
mMuze = rb_define_module("Muze");
|
|
71
|
+
mNative = rb_define_module_under(mMuze, "Native");
|
|
72
|
+
|
|
73
|
+
rb_define_singleton_method(mNative, "frame_slices", native_frame_slices, 3);
|
|
74
|
+
rb_define_singleton_method(mNative, "median1d", native_median1d, 1);
|
|
75
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
# Beat tracking functions.
|
|
5
|
+
module Beat
|
|
6
|
+
module_function
|
|
7
|
+
|
|
8
|
+
# @param y [Numo::SFloat, Array<Float>, nil]
|
|
9
|
+
# @param sr [Integer]
|
|
10
|
+
# @param onset_envelope [Numo::SFloat, Array<Float>, nil]
|
|
11
|
+
# @param hop_length [Integer]
|
|
12
|
+
# @param start_bpm [Float]
|
|
13
|
+
# @param tightness [Integer]
|
|
14
|
+
# @return [Array(Float, Array<Integer>)] estimated tempo and beat frames
|
|
15
|
+
def beat_track(y: nil, sr: 22_050, onset_envelope: nil, hop_length: 512, start_bpm: 120.0, tightness: 100)
|
|
16
|
+
envelope = if onset_envelope
|
|
17
|
+
onset_envelope.is_a?(Numo::NArray) ? onset_envelope.to_a : Array(onset_envelope)
|
|
18
|
+
else
|
|
19
|
+
Muze::Onset.onset_strength(y:, sr:, hop_length:).to_a
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
tempo = estimate_tempo(envelope, sr:, hop_length:, start_bpm:)
|
|
23
|
+
beats = track_beats(envelope, tempo:, sr:, hop_length:, tightness:)
|
|
24
|
+
[tempo, beats]
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# @param y [Numo::SFloat, Array<Float>, nil]
|
|
28
|
+
# @param onset_envelope [Numo::SFloat, Array<Float>, nil]
|
|
29
|
+
# @param sr [Integer]
|
|
30
|
+
# @param hop_length [Integer]
|
|
31
|
+
# @param win_length [Integer]
|
|
32
|
+
# @return [Numo::SFloat]
|
|
33
|
+
def tempogram(y: nil, onset_envelope: nil, sr: 22_050, hop_length: 512, win_length: 384)
|
|
34
|
+
Muze::Feature.tempogram(y:, onset_envelope:, sr:, hop_length:, win_length:)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def estimate_tempo(envelope, sr:, hop_length:, start_bpm:)
|
|
38
|
+
return start_bpm if envelope.length < 4
|
|
39
|
+
|
|
40
|
+
min_bpm = 30.0
|
|
41
|
+
max_bpm = 240.0
|
|
42
|
+
min_lag = [(sr * 60.0 / (hop_length * max_bpm)).round, 1].max
|
|
43
|
+
max_lag = [(sr * 60.0 / (hop_length * min_bpm)).round, envelope.length - 1].min
|
|
44
|
+
return start_bpm if min_lag >= max_lag
|
|
45
|
+
|
|
46
|
+
best_lag = min_lag
|
|
47
|
+
best_score = -Float::INFINITY
|
|
48
|
+
|
|
49
|
+
(min_lag..max_lag).each do |lag|
|
|
50
|
+
score = 0.0
|
|
51
|
+
(lag...envelope.length).each { |index| score += envelope[index] * envelope[index - lag] }
|
|
52
|
+
next unless score > best_score
|
|
53
|
+
|
|
54
|
+
best_score = score
|
|
55
|
+
best_lag = lag
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
60.0 * sr / (hop_length * best_lag)
|
|
59
|
+
end
|
|
60
|
+
private_class_method :estimate_tempo
|
|
61
|
+
|
|
62
|
+
def track_beats(envelope, tempo:, sr:, hop_length:, tightness:)
|
|
63
|
+
interval = [(60.0 * sr / (tempo * hop_length)).round, 1].max
|
|
64
|
+
peaks = Muze::Onset.onset_detect(onset_envelope: envelope, backtrack: false)
|
|
65
|
+
return [] if peaks.empty?
|
|
66
|
+
|
|
67
|
+
beats = [peaks.first]
|
|
68
|
+
target = peaks.first + interval
|
|
69
|
+
|
|
70
|
+
while target < envelope.length
|
|
71
|
+
candidates = peaks.select { |peak| (peak - target).abs <= search_radius(interval, tightness) }
|
|
72
|
+
beats << select_beat_candidate(candidates, target:, interval:, envelope:, tightness:)
|
|
73
|
+
target += interval
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
beats.uniq
|
|
77
|
+
end
|
|
78
|
+
private_class_method :track_beats
|
|
79
|
+
|
|
80
|
+
def search_radius(interval, tightness)
|
|
81
|
+
normalized = normalized_tightness(tightness)
|
|
82
|
+
radius_scale = 1.0 - (0.4 * normalized)
|
|
83
|
+
[(interval * radius_scale).round, 1].max
|
|
84
|
+
end
|
|
85
|
+
private_class_method :search_radius
|
|
86
|
+
|
|
87
|
+
def select_beat_candidate(candidates, target:, interval:, envelope:, tightness:)
|
|
88
|
+
return target unless candidates.any?
|
|
89
|
+
|
|
90
|
+
penalty_weight = 1.0 + (4.0 * normalized_tightness(tightness))
|
|
91
|
+
candidates.max_by do |candidate|
|
|
92
|
+
strength = envelope[candidate] || 0.0
|
|
93
|
+
normalized_distance = (candidate - target).abs / interval.to_f
|
|
94
|
+
strength - (penalty_weight * normalized_distance)
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
private_class_method :select_beat_candidate
|
|
98
|
+
|
|
99
|
+
def normalized_tightness(tightness)
|
|
100
|
+
value = tightness.to_f
|
|
101
|
+
return 0.0 if value <= 0.0
|
|
102
|
+
|
|
103
|
+
[value / 100.0, 4.0].min / 4.0
|
|
104
|
+
end
|
|
105
|
+
private_class_method :normalized_tightness
|
|
106
|
+
end
|
|
107
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
module Core
|
|
5
|
+
# DCT utilities.
|
|
6
|
+
module DCT
|
|
7
|
+
module_function
|
|
8
|
+
|
|
9
|
+
# @param x [Numo::NArray]
|
|
10
|
+
# @param type [Integer]
|
|
11
|
+
# @param n [Integer, nil]
|
|
12
|
+
# @param axis [Integer]
|
|
13
|
+
# @param norm [Symbol, nil]
|
|
14
|
+
# @return [Numo::SFloat]
|
|
15
|
+
def dct(x, type: 2, n: nil, axis: 0, norm: :ortho)
|
|
16
|
+
raise Muze::ParameterError, "only DCT type 2 is supported" unless type == 2
|
|
17
|
+
raise Muze::ParameterError, "axis must be 0 or 1" unless [0, 1].include?(axis)
|
|
18
|
+
|
|
19
|
+
matrix = Numo::SFloat.cast(x)
|
|
20
|
+
matrix = matrix.expand_dims(1) if matrix.ndim == 1
|
|
21
|
+
matrix = matrix.transpose if axis == 1
|
|
22
|
+
|
|
23
|
+
rows, cols = matrix.shape
|
|
24
|
+
target_length = n || rows
|
|
25
|
+
result = Numo::SFloat.zeros(target_length, cols)
|
|
26
|
+
|
|
27
|
+
cols.times do |col|
|
|
28
|
+
signal = matrix[true, col].to_a
|
|
29
|
+
transformed = dct_vector(signal, target_length, norm:)
|
|
30
|
+
target_length.times { |idx| result[idx, col] = transformed[idx] }
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
axis == 1 ? result.transpose : result
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def dct_vector(signal, n, norm:)
|
|
37
|
+
padded = if signal.length >= n
|
|
38
|
+
signal[0, n]
|
|
39
|
+
else
|
|
40
|
+
signal + Array.new(n - signal.length, 0.0)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
Array.new(n) do |k|
|
|
44
|
+
sum = 0.0
|
|
45
|
+
n.times do |idx|
|
|
46
|
+
sum += padded[idx] * Math.cos(Math::PI * (idx + 0.5) * k / n)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
normalize_dct(sum, k, n, norm)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
private_class_method :dct_vector
|
|
53
|
+
|
|
54
|
+
def normalize_dct(value, index, length, norm)
|
|
55
|
+
return value * 2.0 unless norm == :ortho
|
|
56
|
+
|
|
57
|
+
scale = index.zero? ? Math.sqrt(1.0 / length) : Math.sqrt(2.0 / length)
|
|
58
|
+
value * scale
|
|
59
|
+
end
|
|
60
|
+
private_class_method :normalize_dct
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Muze
|
|
4
|
+
module Core
|
|
5
|
+
# Resampling utilities.
|
|
6
|
+
module Resample
|
|
7
|
+
EPSILON = 1.0e-12
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
# @param y [Numo::SFloat, Array<Float>] waveform signal
|
|
11
|
+
# @param orig_sr [Integer] source sampling rate
|
|
12
|
+
# @param target_sr [Integer] destination sampling rate
|
|
13
|
+
# @param res_type [Symbol] :linear or :sinc
|
|
14
|
+
# @return [Numo::SFloat] resampled waveform
|
|
15
|
+
def resample(y, orig_sr:, target_sr:, res_type: :sinc)
|
|
16
|
+
validate_sample_rates!(orig_sr, target_sr)
|
|
17
|
+
signal = y.is_a?(Numo::NArray) ? y.to_a : Array(y)
|
|
18
|
+
return Numo::SFloat.cast(signal) if signal.empty? || orig_sr == target_sr
|
|
19
|
+
|
|
20
|
+
case res_type
|
|
21
|
+
when :linear then linear_resample(signal, orig_sr, target_sr)
|
|
22
|
+
when :sinc then sinc_resample(signal, orig_sr, target_sr)
|
|
23
|
+
else
|
|
24
|
+
raise Muze::ParameterError, "Unsupported res_type: #{res_type}"
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def validate_sample_rates!(orig_sr, target_sr)
|
|
29
|
+
return if orig_sr.is_a?(Integer) && target_sr.is_a?(Integer) && orig_sr.positive? && target_sr.positive?
|
|
30
|
+
|
|
31
|
+
raise Muze::ParameterError, "orig_sr and target_sr must be positive integers"
|
|
32
|
+
end
|
|
33
|
+
private_class_method :validate_sample_rates!
|
|
34
|
+
|
|
35
|
+
def linear_resample(signal, orig_sr, target_sr)
|
|
36
|
+
source_length = signal.length
|
|
37
|
+
return Numo::SFloat.cast(signal) if source_length <= 1
|
|
38
|
+
|
|
39
|
+
target_length = [(source_length * target_sr.to_f / orig_sr).round, 1].max
|
|
40
|
+
return Numo::SFloat.cast(signal[0, target_length]) if target_length <= 1
|
|
41
|
+
|
|
42
|
+
scale = (source_length - 1).to_f / (target_length - 1)
|
|
43
|
+
output = Array.new(target_length, 0.0)
|
|
44
|
+
|
|
45
|
+
target_length.times do |index|
|
|
46
|
+
source_position = index * scale
|
|
47
|
+
left = source_position.floor
|
|
48
|
+
right = [left + 1, source_length - 1].min
|
|
49
|
+
alpha = source_position - left
|
|
50
|
+
output[index] = ((1.0 - alpha) * signal[left]) + (alpha * signal[right])
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
Numo::SFloat.cast(output)
|
|
54
|
+
end
|
|
55
|
+
private_class_method :linear_resample
|
|
56
|
+
|
|
57
|
+
def sinc_resample(signal, orig_sr, target_sr)
|
|
58
|
+
ratio = target_sr.to_f / orig_sr
|
|
59
|
+
target_length = [(signal.length * ratio).round, 1].max
|
|
60
|
+
taps = 16
|
|
61
|
+
beta = 8.6
|
|
62
|
+
cutoff = [ratio, 1.0].min
|
|
63
|
+
|
|
64
|
+
i0_beta = bessel_i0(beta)
|
|
65
|
+
output = Array.new(target_length, 0.0)
|
|
66
|
+
|
|
67
|
+
target_length.times do |index|
|
|
68
|
+
source_position = index / ratio
|
|
69
|
+
left = source_position.floor - taps + 1
|
|
70
|
+
right = source_position.floor + taps
|
|
71
|
+
|
|
72
|
+
sum = 0.0
|
|
73
|
+
weight_sum = 0.0
|
|
74
|
+
|
|
75
|
+
(left..right).each do |sample_index|
|
|
76
|
+
next if sample_index.negative? || sample_index >= signal.length
|
|
77
|
+
|
|
78
|
+
distance = source_position - sample_index
|
|
79
|
+
normalized = distance / taps.to_f
|
|
80
|
+
next if normalized.abs > 1.0
|
|
81
|
+
|
|
82
|
+
window = bessel_i0(beta * Math.sqrt(1.0 - (normalized * normalized))) / i0_beta
|
|
83
|
+
weight = cutoff * sinc(cutoff * distance) * window
|
|
84
|
+
sum += signal[sample_index] * weight
|
|
85
|
+
weight_sum += weight
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
output[index] = weight_sum.abs > EPSILON ? (sum / weight_sum) : 0.0
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
Numo::SFloat.cast(output)
|
|
92
|
+
end
|
|
93
|
+
private_class_method :sinc_resample
|
|
94
|
+
|
|
95
|
+
def sinc(value)
|
|
96
|
+
return 1.0 if value.abs < EPSILON
|
|
97
|
+
|
|
98
|
+
x = Math::PI * value
|
|
99
|
+
Math.sin(x) / x
|
|
100
|
+
end
|
|
101
|
+
private_class_method :sinc
|
|
102
|
+
|
|
103
|
+
# Approximation of modified Bessel function I0.
|
|
104
|
+
def bessel_i0(value)
|
|
105
|
+
sum = 1.0
|
|
106
|
+
term = 1.0
|
|
107
|
+
k = 1
|
|
108
|
+
|
|
109
|
+
loop do
|
|
110
|
+
term *= ((value / 2.0)**2) / (k * k)
|
|
111
|
+
sum += term
|
|
112
|
+
break if term < 1.0e-12
|
|
113
|
+
|
|
114
|
+
k += 1
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
sum
|
|
118
|
+
end
|
|
119
|
+
private_class_method :bessel_i0
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|