sonus 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +314 -0
- data/Rakefile +8 -0
- data/benchmark/extract_all_features.rb +17 -0
- data/examples/analyze_wav.rb +25 -0
- data/examples/example_helper.rb +84 -0
- data/examples/extract_basic.rb +23 -0
- data/examples/extract_spectral_flux.rb +37 -0
- data/examples/read_wav.rb +19 -0
- data/lib/sonus/analyzer.rb +109 -0
- data/lib/sonus/dsp/fft.rb +55 -0
- data/lib/sonus/dsp/fft_backend/fftw.rb +103 -0
- data/lib/sonus/dsp/fft_backend/ruby.rb +80 -0
- data/lib/sonus/dsp/mel_filter_bank.rb +98 -0
- data/lib/sonus/dsp/windowing.rb +47 -0
- data/lib/sonus/error.rb +9 -0
- data/lib/sonus/features/amplitude_spectrum.rb +14 -0
- data/lib/sonus/features/base.rb +27 -0
- data/lib/sonus/features/buffer.rb +13 -0
- data/lib/sonus/features/chroma.rb +35 -0
- data/lib/sonus/features/complex_spectrum.rb +13 -0
- data/lib/sonus/features/energy.rb +13 -0
- data/lib/sonus/features/loudness.rb +57 -0
- data/lib/sonus/features/mel_bands.rb +27 -0
- data/lib/sonus/features/mfcc.rb +30 -0
- data/lib/sonus/features/perceptual_sharpness.rb +26 -0
- data/lib/sonus/features/perceptual_spread.rb +18 -0
- data/lib/sonus/features/power_spectrum.rb +13 -0
- data/lib/sonus/features/rms.rb +16 -0
- data/lib/sonus/features/spectral_centroid.rb +30 -0
- data/lib/sonus/features/spectral_crest.rb +19 -0
- data/lib/sonus/features/spectral_flatness.rb +22 -0
- data/lib/sonus/features/spectral_flux.rb +24 -0
- data/lib/sonus/features/spectral_kurtosis.rb +34 -0
- data/lib/sonus/features/spectral_rolloff.rb +26 -0
- data/lib/sonus/features/spectral_skewness.rb +34 -0
- data/lib/sonus/features/spectral_slope.rb +25 -0
- data/lib/sonus/features/spectral_spread.rb +32 -0
- data/lib/sonus/features/windowed_signal.rb +16 -0
- data/lib/sonus/features/zcr.rb +17 -0
- data/lib/sonus/features.rb +109 -0
- data/lib/sonus/version.rb +5 -0
- data/lib/sonus/wav/format.rb +15 -0
- data/lib/sonus/wav/reader.rb +224 -0
- data/lib/sonus.rb +204 -0
- metadata +160 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module DSP
|
|
5
|
+
module FFT
|
|
6
|
+
module_function
|
|
7
|
+
|
|
8
|
+
def fft(signal)
|
|
9
|
+
case backend
|
|
10
|
+
when :fftw
|
|
11
|
+
FFTBackend::FFTW.fft(signal)
|
|
12
|
+
when :ruby
|
|
13
|
+
FFTBackend::Ruby.fft(signal)
|
|
14
|
+
else
|
|
15
|
+
raise Sonus::FFTBackendError, "Unknown FFT backend: #{backend}"
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def amplitude_spectrum(signal)
|
|
20
|
+
complex = fft(signal)
|
|
21
|
+
complex.first((complex.length / 2) + 1).map(&:abs)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def backend
|
|
25
|
+
@backend ||= autodetect_backend
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def backend=(value)
|
|
29
|
+
normalized = value.to_sym
|
|
30
|
+
|
|
31
|
+
case normalized
|
|
32
|
+
when :ruby
|
|
33
|
+
@backend = :ruby
|
|
34
|
+
when :fftw
|
|
35
|
+
unless FFTBackend::FFTW.available?
|
|
36
|
+
raise Sonus::FFTBackendError, "FFTW backend requested but unavailable"
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
@backend = :fftw
|
|
40
|
+
else
|
|
41
|
+
raise ArgumentError, "Unsupported backend: #{value}"
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def reset_backend!
|
|
46
|
+
remove_instance_variable(:@backend) if instance_variable_defined?(:@backend)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def autodetect_backend
|
|
50
|
+
FFTBackend::FFTW.available? ? :fftw : :ruby
|
|
51
|
+
end
|
|
52
|
+
private_class_method :autodetect_backend
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "thread"
|
|
4
|
+
|
|
5
|
+
module Sonus
|
|
6
|
+
module DSP
|
|
7
|
+
module FFTBackend
|
|
8
|
+
module FFTW
|
|
9
|
+
DEFAULT_PRECISION = :double
|
|
10
|
+
PLAN_FLAGS = [:estimate].freeze
|
|
11
|
+
|
|
12
|
+
Context = Struct.new(:input, :output, :plan, keyword_init: true)
|
|
13
|
+
|
|
14
|
+
@mutex = Mutex.new
|
|
15
|
+
@contexts = {}
|
|
16
|
+
|
|
17
|
+
module_function
|
|
18
|
+
|
|
19
|
+
def available?
|
|
20
|
+
fftw3
|
|
21
|
+
::FFTW3.double_available?
|
|
22
|
+
rescue LoadError, StandardError
|
|
23
|
+
false
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def fft(signal)
|
|
27
|
+
raise Sonus::FFTBackendError, "FFTW backend is not available" unless available?
|
|
28
|
+
|
|
29
|
+
size = Sonus::DSP::FFTBackend::Ruby.next_power_of_two(signal.length)
|
|
30
|
+
return [] if size.zero?
|
|
31
|
+
|
|
32
|
+
context = context_for(size)
|
|
33
|
+
write_input(context.input, signal, size)
|
|
34
|
+
context.plan.execute
|
|
35
|
+
read_output(context.output, size)
|
|
36
|
+
rescue Sonus::FFTBackendError
|
|
37
|
+
raise
|
|
38
|
+
rescue StandardError => error
|
|
39
|
+
raise Sonus::FFTBackendError, "FFTW execution failed: #{error.message}"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def reset!
|
|
43
|
+
@mutex.synchronize do
|
|
44
|
+
@contexts.each_value do |context|
|
|
45
|
+
context.plan&.destroy!
|
|
46
|
+
context.input&.free!
|
|
47
|
+
context.output&.free!
|
|
48
|
+
end
|
|
49
|
+
@contexts.clear
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def fftw3
|
|
54
|
+
require "fftw3" unless defined?(::FFTW3)
|
|
55
|
+
::FFTW3
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def context_for(size)
|
|
59
|
+
@mutex.synchronize do
|
|
60
|
+
@contexts[size] ||= build_context(size)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def build_context(size)
|
|
65
|
+
library = fftw3
|
|
66
|
+
input = library::AlignedMemory.new(size, type: :real, precision: DEFAULT_PRECISION)
|
|
67
|
+
output = library::AlignedMemory.new((size / 2) + 1, type: :complex, precision: DEFAULT_PRECISION)
|
|
68
|
+
plan = library::Plan.dft_r2c_1d(
|
|
69
|
+
size,
|
|
70
|
+
input,
|
|
71
|
+
output,
|
|
72
|
+
flags: PLAN_FLAGS,
|
|
73
|
+
precision: DEFAULT_PRECISION
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
Context.new(input: input, output: output, plan: plan)
|
|
77
|
+
rescue StandardError
|
|
78
|
+
input&.free!
|
|
79
|
+
output&.free!
|
|
80
|
+
raise
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def write_input(buffer, signal, size)
|
|
84
|
+
padded = signal.first(size).map(&:to_f)
|
|
85
|
+
padded.fill(0.0, padded.length...size)
|
|
86
|
+
buffer.write(padded)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def read_output(buffer, size)
|
|
90
|
+
output = buffer.read
|
|
91
|
+
spectrum = Array.new(size) { Complex(0.0, 0.0) }
|
|
92
|
+
spectrum[0, output.length] = output
|
|
93
|
+
|
|
94
|
+
(1...(output.length - 1)).each do |index|
|
|
95
|
+
spectrum[size - index] = spectrum[index].conjugate
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
spectrum
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module DSP
|
|
5
|
+
module FFTBackend
|
|
6
|
+
module Ruby
|
|
7
|
+
module_function
|
|
8
|
+
|
|
9
|
+
def fft(signal)
|
|
10
|
+
padded = pad_to_power_of_two(signal)
|
|
11
|
+
size = padded.length
|
|
12
|
+
return [] if size.zero?
|
|
13
|
+
|
|
14
|
+
spectrum = padded.map { |value| Complex(value.to_f, 0.0) }
|
|
15
|
+
bit_reversal_permute!(spectrum)
|
|
16
|
+
|
|
17
|
+
half_size = 1
|
|
18
|
+
while half_size < size
|
|
19
|
+
phase_shift_step = Complex.polar(1.0, -Math::PI / half_size)
|
|
20
|
+
step = half_size * 2
|
|
21
|
+
|
|
22
|
+
(0...size).step(step) do |start|
|
|
23
|
+
phase_shift = Complex(1.0, 0.0)
|
|
24
|
+
|
|
25
|
+
half_size.times do |index|
|
|
26
|
+
even_index = start + index
|
|
27
|
+
odd_index = even_index + half_size
|
|
28
|
+
|
|
29
|
+
odd_value = phase_shift * spectrum[odd_index]
|
|
30
|
+
even_value = spectrum[even_index]
|
|
31
|
+
|
|
32
|
+
spectrum[even_index] = even_value + odd_value
|
|
33
|
+
spectrum[odd_index] = even_value - odd_value
|
|
34
|
+
|
|
35
|
+
phase_shift *= phase_shift_step
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
half_size = step
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
spectrum
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def next_power_of_two(value)
|
|
46
|
+
return 1 if value <= 1
|
|
47
|
+
|
|
48
|
+
power = 1
|
|
49
|
+
power <<= 1 while power < value
|
|
50
|
+
power
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def pad_to_power_of_two(signal)
|
|
54
|
+
return [] if signal.empty?
|
|
55
|
+
|
|
56
|
+
target_size = next_power_of_two(signal.length)
|
|
57
|
+
padded = signal.first(target_size).map(&:to_f)
|
|
58
|
+
padded.fill(0.0, padded.length...target_size)
|
|
59
|
+
padded
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def bit_reversal_permute!(array)
|
|
63
|
+
size = array.length
|
|
64
|
+
j = 0
|
|
65
|
+
|
|
66
|
+
(1...(size - 1)).each do |index|
|
|
67
|
+
bit = size >> 1
|
|
68
|
+
while j >= bit
|
|
69
|
+
j -= bit
|
|
70
|
+
bit >>= 1
|
|
71
|
+
end
|
|
72
|
+
j += bit
|
|
73
|
+
|
|
74
|
+
array[index], array[j] = array[j], array[index] if index < j
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module DSP
|
|
5
|
+
module MelFilterBank
|
|
6
|
+
@cache = {}
|
|
7
|
+
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
def hz_to_mel(hz)
|
|
11
|
+
2595.0 * Math.log10(1.0 + (hz.to_f / 700.0))
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def mel_to_hz(mel)
|
|
15
|
+
700.0 * ((10**(mel.to_f / 2595.0)) - 1.0)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def create(num_filters:, fft_size:, sample_rate:, freq_min: 0.0, freq_max: nil)
|
|
19
|
+
normalized_num_filters = Integer(num_filters)
|
|
20
|
+
normalized_fft_size = Integer(fft_size)
|
|
21
|
+
normalized_sample_rate = Float(sample_rate)
|
|
22
|
+
normalized_freq_min = Float(freq_min)
|
|
23
|
+
normalized_freq_max = freq_max.nil? ? normalized_sample_rate / 2.0 : Float(freq_max)
|
|
24
|
+
|
|
25
|
+
if normalized_num_filters <= 0
|
|
26
|
+
raise ArgumentError, "num_filters must be positive"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
if normalized_fft_size <= 0
|
|
30
|
+
raise ArgumentError, "fft_size must be positive"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
key = [
|
|
34
|
+
normalized_num_filters,
|
|
35
|
+
normalized_fft_size,
|
|
36
|
+
normalized_sample_rate,
|
|
37
|
+
normalized_freq_min,
|
|
38
|
+
normalized_freq_max
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
@cache[key] ||= build_filter_bank(
|
|
42
|
+
num_filters: normalized_num_filters,
|
|
43
|
+
fft_size: normalized_fft_size,
|
|
44
|
+
sample_rate: normalized_sample_rate,
|
|
45
|
+
freq_min: normalized_freq_min,
|
|
46
|
+
freq_max: normalized_freq_max
|
|
47
|
+
)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def build_filter_bank(num_filters:, fft_size:, sample_rate:, freq_min:, freq_max:)
|
|
51
|
+
mel_min = hz_to_mel(freq_min)
|
|
52
|
+
mel_max = hz_to_mel(freq_max)
|
|
53
|
+
mel_points = Array.new(num_filters + 2) do |index|
|
|
54
|
+
mel_min + ((mel_max - mel_min) * index / (num_filters + 1.0))
|
|
55
|
+
end
|
|
56
|
+
hz_points = mel_points.map { |mel| mel_to_hz(mel) }
|
|
57
|
+
|
|
58
|
+
max_bin = (fft_size / 2)
|
|
59
|
+
bin_points = hz_points.map do |hz|
|
|
60
|
+
bin = ((fft_size + 1) * hz / sample_rate).floor
|
|
61
|
+
[[bin, 0].max, max_bin].min
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
filters = Array.new(num_filters) { Array.new(max_bin + 1, 0.0) }
|
|
65
|
+
|
|
66
|
+
num_filters.times do |filter_index|
|
|
67
|
+
left = bin_points[filter_index]
|
|
68
|
+
center = bin_points[filter_index + 1]
|
|
69
|
+
right = bin_points[filter_index + 2]
|
|
70
|
+
|
|
71
|
+
fill_rising_slope(filters[filter_index], left, center)
|
|
72
|
+
fill_falling_slope(filters[filter_index], center, right)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
filters
|
|
76
|
+
end
|
|
77
|
+
private_class_method :build_filter_bank
|
|
78
|
+
|
|
79
|
+
def fill_rising_slope(filter, left, center)
|
|
80
|
+
return if center <= left
|
|
81
|
+
|
|
82
|
+
(left...center).each do |bin|
|
|
83
|
+
filter[bin] = (bin - left).to_f / (center - left)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
private_class_method :fill_rising_slope
|
|
87
|
+
|
|
88
|
+
def fill_falling_slope(filter, center, right)
|
|
89
|
+
return if right <= center
|
|
90
|
+
|
|
91
|
+
(center...right).each do |bin|
|
|
92
|
+
filter[bin] = (right - bin).to_f / (right - center)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
private_class_method :fill_falling_slope
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module DSP
|
|
5
|
+
module Windowing
|
|
6
|
+
module_function
|
|
7
|
+
|
|
8
|
+
def apply(signal, type = :hanning)
|
|
9
|
+
signal = signal.map(&:to_f)
|
|
10
|
+
size = signal.length
|
|
11
|
+
return signal.dup if size <= 1
|
|
12
|
+
|
|
13
|
+
case type.to_sym
|
|
14
|
+
when :rect
|
|
15
|
+
signal.dup
|
|
16
|
+
when :hanning, :hann
|
|
17
|
+
signal.each_with_index.map { |value, index| value * hanning(size, index) }
|
|
18
|
+
when :hamming
|
|
19
|
+
signal.each_with_index.map { |value, index| value * hamming(size, index) }
|
|
20
|
+
when :blackman
|
|
21
|
+
signal.each_with_index.map { |value, index| value * blackman(size, index) }
|
|
22
|
+
when :sine
|
|
23
|
+
signal.each_with_index.map { |value, index| value * sine(size, index) }
|
|
24
|
+
else
|
|
25
|
+
raise ArgumentError, "Unsupported windowing function: #{type}"
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def hanning(size, index)
|
|
30
|
+
0.5 * (1.0 - Math.cos((2.0 * Math::PI * index) / (size - 1)))
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def hamming(size, index)
|
|
34
|
+
0.54 - (0.46 * Math.cos((2.0 * Math::PI * index) / (size - 1)))
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def blackman(size, index)
|
|
38
|
+
ratio = (2.0 * Math::PI * index) / (size - 1)
|
|
39
|
+
0.42 - (0.5 * Math.cos(ratio)) + (0.08 * Math.cos(2.0 * ratio))
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def sine(size, index)
|
|
43
|
+
Math.sin((Math::PI * index) / (size - 1))
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
data/lib/sonus/error.rb
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class AmplitudeSpectrum < Base
|
|
6
|
+
feature :amplitude_spectrum, dependencies: [:complex_spectrum]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
spectrum = context[:cache].fetch(:complex_spectrum)
|
|
10
|
+
spectrum.first((spectrum.length / 2) + 1).map(&:abs)
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class Base
|
|
6
|
+
class << self
|
|
7
|
+
attr_reader :feature_key, :dependencies
|
|
8
|
+
|
|
9
|
+
def feature(key, dependencies: [], public_feature: true)
|
|
10
|
+
@feature_key = key.to_sym
|
|
11
|
+
@dependencies = dependencies.map(&:to_sym)
|
|
12
|
+
@public_feature = public_feature
|
|
13
|
+
|
|
14
|
+
Sonus::Features.register(self)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def public_feature?
|
|
18
|
+
@public_feature != false
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def extract(_context)
|
|
22
|
+
raise NotImplementedError, "Feature extract must be implemented"
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class Chroma < Base
|
|
6
|
+
feature :chroma, dependencies: [:amplitude_spectrum]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
amplitudes = context[:cache].fetch(:amplitude_spectrum)
|
|
10
|
+
chroma_bands = Integer(context.fetch(:number_of_chroma_bands, 12))
|
|
11
|
+
return Array.new(chroma_bands, 0.0) if amplitudes.empty?
|
|
12
|
+
|
|
13
|
+
fft_size = [(amplitudes.length - 1) * 2, 1].max
|
|
14
|
+
sample_rate = context.fetch(:sample_rate).to_f
|
|
15
|
+
|
|
16
|
+
chroma = Array.new(chroma_bands, 0.0)
|
|
17
|
+
|
|
18
|
+
amplitudes.each_with_index do |amplitude, index|
|
|
19
|
+
frequency = (index * sample_rate) / fft_size
|
|
20
|
+
next if frequency <= 0.0
|
|
21
|
+
|
|
22
|
+
midi_note = 69.0 + (12.0 * Math.log2(frequency / 440.0))
|
|
23
|
+
base_chroma = midi_note.round % 12
|
|
24
|
+
band_index = ((base_chroma / 12.0) * chroma_bands).floor % chroma_bands
|
|
25
|
+
chroma[band_index] += amplitude
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
total = chroma.sum
|
|
29
|
+
return chroma if total.zero?
|
|
30
|
+
|
|
31
|
+
chroma.map { |value| value / total }
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class ComplexSpectrum < Base
|
|
6
|
+
feature :complex_spectrum, dependencies: [:windowed_signal]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
Sonus::DSP::FFT.fft(context[:cache].fetch(:windowed_signal))
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class Loudness < Base
|
|
6
|
+
feature :loudness, dependencies: [:amplitude_spectrum]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
amplitudes = context[:cache].fetch(:amplitude_spectrum)
|
|
10
|
+
return { specific: [], total: 0.0 } if amplitudes.empty?
|
|
11
|
+
|
|
12
|
+
bark_bands = Integer(context.fetch(:number_of_bark_bands, Sonus::DEFAULT_BARK_BANDS))
|
|
13
|
+
bark_scale = create_bark_scale(amplitudes.length, context.fetch(:sample_rate))
|
|
14
|
+
limits = bark_band_limits(bark_scale, bark_bands)
|
|
15
|
+
|
|
16
|
+
specific = Array.new(bark_bands, 0.0)
|
|
17
|
+
bark_bands.times do |index|
|
|
18
|
+
sum = 0.0
|
|
19
|
+
(limits[index]...limits[index + 1]).each { |bin| sum += amplitudes[bin] }
|
|
20
|
+
specific[index] = sum**0.23
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
{ specific: specific, total: specific.sum }
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def self.create_bark_scale(length, sample_rate)
|
|
27
|
+
fft_size = [(length - 1) * 2, 1].max
|
|
28
|
+
frequency_step = sample_rate.to_f / fft_size
|
|
29
|
+
|
|
30
|
+
Array.new(length) do |index|
|
|
31
|
+
frequency = index * frequency_step
|
|
32
|
+
(13.0 * Math.atan(0.00076 * frequency)) + (3.5 * Math.atan((frequency / 7500.0)**2))
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
private_class_method :create_bark_scale
|
|
36
|
+
|
|
37
|
+
def self.bark_band_limits(bark_scale, bark_bands)
|
|
38
|
+
limits = Array.new(bark_bands + 1, 0)
|
|
39
|
+
current_band = 1
|
|
40
|
+
max_bark = bark_scale.last
|
|
41
|
+
current_band_end = max_bark / bark_bands
|
|
42
|
+
|
|
43
|
+
bark_scale.each_with_index do |value, index|
|
|
44
|
+
while value > current_band_end && current_band < bark_bands
|
|
45
|
+
limits[current_band] = index
|
|
46
|
+
current_band += 1
|
|
47
|
+
current_band_end = (current_band * max_bark) / bark_bands
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
limits[bark_bands] = bark_scale.length - 1
|
|
52
|
+
limits
|
|
53
|
+
end
|
|
54
|
+
private_class_method :bark_band_limits
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class MelBands < Base
|
|
6
|
+
feature :mel_bands, dependencies: [:power_spectrum]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
power_spectrum = context[:cache].fetch(:power_spectrum)
|
|
10
|
+
return [] if power_spectrum.empty?
|
|
11
|
+
|
|
12
|
+
mel_filters = Sonus::DSP::MelFilterBank.create(
|
|
13
|
+
num_filters: context.fetch(:number_of_mel_filters, Sonus::DEFAULT_MEL_FILTERS),
|
|
14
|
+
fft_size: [(power_spectrum.length - 1) * 2, 1].max,
|
|
15
|
+
sample_rate: context.fetch(:sample_rate)
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
mel_filters.map do |filter|
|
|
19
|
+
energy = power_spectrum.each_with_index.sum do |value, index|
|
|
20
|
+
value * (filter[index] || 0.0)
|
|
21
|
+
end
|
|
22
|
+
Math.log(energy + 1.0)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class MFCC < Base
|
|
6
|
+
feature :mfcc, dependencies: [:mel_bands]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
mel_bands = context[:cache].fetch(:mel_bands)
|
|
10
|
+
return [] if mel_bands.empty?
|
|
11
|
+
|
|
12
|
+
requested = Integer(context.fetch(:number_of_mfcc_coefficients, Sonus::DEFAULT_MFCC_COEFFICIENTS))
|
|
13
|
+
coefficient_count = [[requested, 1].max, 40].min
|
|
14
|
+
dct(mel_bands, coefficient_count)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def self.dct(values, coefficient_count)
|
|
18
|
+
length = values.length
|
|
19
|
+
return [] if length.zero?
|
|
20
|
+
|
|
21
|
+
Array.new(coefficient_count) do |k|
|
|
22
|
+
values.each_with_index.sum do |value, index|
|
|
23
|
+
value * Math.cos((Math::PI / length) * (index + 0.5) * k)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
private_class_method :dct
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class PerceptualSharpness < Base
|
|
6
|
+
feature :perceptual_sharpness, dependencies: [:loudness]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
loudness = context[:cache].fetch(:loudness)
|
|
10
|
+
specific = loudness.fetch(:specific)
|
|
11
|
+
total = loudness.fetch(:total)
|
|
12
|
+
return 0.0 if total.zero?
|
|
13
|
+
|
|
14
|
+
output = specific.each_index.sum do |index|
|
|
15
|
+
if index < 15
|
|
16
|
+
(index + 1) * (specific[index + 1] || 0.0)
|
|
17
|
+
else
|
|
18
|
+
0.066 * Math.exp(0.171 * (index + 1))
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
output * (0.11 / total)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class PerceptualSpread < Base
|
|
6
|
+
feature :perceptual_spread, dependencies: [:loudness]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
loudness = context[:cache].fetch(:loudness)
|
|
10
|
+
total = loudness.fetch(:total)
|
|
11
|
+
return 0.0 if total.zero?
|
|
12
|
+
|
|
13
|
+
spread = (total - loudness.fetch(:specific).max) / total
|
|
14
|
+
spread * spread
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class PowerSpectrum < Base
|
|
6
|
+
feature :power_spectrum, dependencies: [:amplitude_spectrum]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
context[:cache].fetch(:amplitude_spectrum).map { |value| value * value }
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|