sonus 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +314 -0
- data/Rakefile +8 -0
- data/benchmark/extract_all_features.rb +17 -0
- data/examples/analyze_wav.rb +25 -0
- data/examples/example_helper.rb +84 -0
- data/examples/extract_basic.rb +23 -0
- data/examples/extract_spectral_flux.rb +37 -0
- data/examples/read_wav.rb +19 -0
- data/lib/sonus/analyzer.rb +109 -0
- data/lib/sonus/dsp/fft.rb +55 -0
- data/lib/sonus/dsp/fft_backend/fftw.rb +103 -0
- data/lib/sonus/dsp/fft_backend/ruby.rb +80 -0
- data/lib/sonus/dsp/mel_filter_bank.rb +98 -0
- data/lib/sonus/dsp/windowing.rb +47 -0
- data/lib/sonus/error.rb +9 -0
- data/lib/sonus/features/amplitude_spectrum.rb +14 -0
- data/lib/sonus/features/base.rb +27 -0
- data/lib/sonus/features/buffer.rb +13 -0
- data/lib/sonus/features/chroma.rb +35 -0
- data/lib/sonus/features/complex_spectrum.rb +13 -0
- data/lib/sonus/features/energy.rb +13 -0
- data/lib/sonus/features/loudness.rb +57 -0
- data/lib/sonus/features/mel_bands.rb +27 -0
- data/lib/sonus/features/mfcc.rb +30 -0
- data/lib/sonus/features/perceptual_sharpness.rb +26 -0
- data/lib/sonus/features/perceptual_spread.rb +18 -0
- data/lib/sonus/features/power_spectrum.rb +13 -0
- data/lib/sonus/features/rms.rb +16 -0
- data/lib/sonus/features/spectral_centroid.rb +30 -0
- data/lib/sonus/features/spectral_crest.rb +19 -0
- data/lib/sonus/features/spectral_flatness.rb +22 -0
- data/lib/sonus/features/spectral_flux.rb +24 -0
- data/lib/sonus/features/spectral_kurtosis.rb +34 -0
- data/lib/sonus/features/spectral_rolloff.rb +26 -0
- data/lib/sonus/features/spectral_skewness.rb +34 -0
- data/lib/sonus/features/spectral_slope.rb +25 -0
- data/lib/sonus/features/spectral_spread.rb +32 -0
- data/lib/sonus/features/windowed_signal.rb +16 -0
- data/lib/sonus/features/zcr.rb +17 -0
- data/lib/sonus/features.rb +109 -0
- data/lib/sonus/version.rb +5 -0
- data/lib/sonus/wav/format.rb +15 -0
- data/lib/sonus/wav/reader.rb +224 -0
- data/lib/sonus.rb +204 -0
- metadata +160 -0
data/lib/sonus.rb
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "sonus/version"
|
|
4
|
+
require_relative "sonus/error"
|
|
5
|
+
require_relative "sonus/dsp/windowing"
|
|
6
|
+
require_relative "sonus/dsp/fft_backend/ruby"
|
|
7
|
+
require_relative "sonus/dsp/fft_backend/fftw"
|
|
8
|
+
require_relative "sonus/dsp/fft"
|
|
9
|
+
require_relative "sonus/dsp/mel_filter_bank"
|
|
10
|
+
require_relative "sonus/wav/format"
|
|
11
|
+
require_relative "sonus/wav/reader"
|
|
12
|
+
require_relative "sonus/features"
|
|
13
|
+
require_relative "sonus/analyzer"
|
|
14
|
+
|
|
15
|
+
module Sonus
|
|
16
|
+
DEFAULT_BUFFER_SIZE = 512
|
|
17
|
+
DEFAULT_SAMPLE_RATE = 44_100
|
|
18
|
+
DEFAULT_WINDOWING_FUNCTION = :hanning
|
|
19
|
+
DEFAULT_MFCC_COEFFICIENTS = 13
|
|
20
|
+
DEFAULT_MEL_FILTERS = 26
|
|
21
|
+
DEFAULT_CHROMA_BANDS = 12
|
|
22
|
+
DEFAULT_BARK_BANDS = 24
|
|
23
|
+
|
|
24
|
+
class << self
|
|
25
|
+
def extract(
|
|
26
|
+
signal,
|
|
27
|
+
features,
|
|
28
|
+
buffer_size: DEFAULT_BUFFER_SIZE,
|
|
29
|
+
sample_rate: DEFAULT_SAMPLE_RATE,
|
|
30
|
+
windowing_function: DEFAULT_WINDOWING_FUNCTION,
|
|
31
|
+
number_of_mfcc_coefficients: DEFAULT_MFCC_COEFFICIENTS,
|
|
32
|
+
number_of_mel_filters: DEFAULT_MEL_FILTERS,
|
|
33
|
+
mel_bands: nil,
|
|
34
|
+
number_of_chroma_bands: DEFAULT_CHROMA_BANDS,
|
|
35
|
+
chroma_bands: nil,
|
|
36
|
+
number_of_bark_bands: DEFAULT_BARK_BANDS,
|
|
37
|
+
previous_signal: nil,
|
|
38
|
+
prev_amplitude_spectrum: nil
|
|
39
|
+
)
|
|
40
|
+
result, = extract_with_cache(
|
|
41
|
+
signal,
|
|
42
|
+
features,
|
|
43
|
+
buffer_size: buffer_size,
|
|
44
|
+
sample_rate: sample_rate,
|
|
45
|
+
windowing_function: windowing_function,
|
|
46
|
+
number_of_mfcc_coefficients: number_of_mfcc_coefficients,
|
|
47
|
+
number_of_mel_filters: number_of_mel_filters,
|
|
48
|
+
mel_bands: mel_bands,
|
|
49
|
+
number_of_chroma_bands: number_of_chroma_bands,
|
|
50
|
+
chroma_bands: chroma_bands,
|
|
51
|
+
number_of_bark_bands: number_of_bark_bands,
|
|
52
|
+
previous_signal: previous_signal,
|
|
53
|
+
prev_amplitude_spectrum: prev_amplitude_spectrum
|
|
54
|
+
)
|
|
55
|
+
result
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def extract_with_cache(
|
|
59
|
+
signal,
|
|
60
|
+
features,
|
|
61
|
+
buffer_size: DEFAULT_BUFFER_SIZE,
|
|
62
|
+
sample_rate: DEFAULT_SAMPLE_RATE,
|
|
63
|
+
windowing_function: DEFAULT_WINDOWING_FUNCTION,
|
|
64
|
+
number_of_mfcc_coefficients: DEFAULT_MFCC_COEFFICIENTS,
|
|
65
|
+
number_of_mel_filters: DEFAULT_MEL_FILTERS,
|
|
66
|
+
mel_bands: nil,
|
|
67
|
+
number_of_chroma_bands: DEFAULT_CHROMA_BANDS,
|
|
68
|
+
chroma_bands: nil,
|
|
69
|
+
number_of_bark_bands: DEFAULT_BARK_BANDS,
|
|
70
|
+
previous_signal: nil,
|
|
71
|
+
prev_amplitude_spectrum: nil
|
|
72
|
+
)
|
|
73
|
+
normalized_features = normalize_features(features)
|
|
74
|
+
normalized_buffer_size = Integer(buffer_size)
|
|
75
|
+
normalized_sample_rate = Float(sample_rate)
|
|
76
|
+
normalized_mel_filters = normalize_or_default(mel_bands, number_of_mel_filters)
|
|
77
|
+
normalized_chroma_bands = normalize_or_default(chroma_bands, number_of_chroma_bands)
|
|
78
|
+
|
|
79
|
+
validate_options!(
|
|
80
|
+
buffer_size: normalized_buffer_size,
|
|
81
|
+
sample_rate: normalized_sample_rate,
|
|
82
|
+
number_of_mfcc_coefficients: number_of_mfcc_coefficients,
|
|
83
|
+
number_of_mel_filters: normalized_mel_filters,
|
|
84
|
+
number_of_chroma_bands: normalized_chroma_bands,
|
|
85
|
+
number_of_bark_bands: number_of_bark_bands
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
validated_signal = validate_signal!(signal)
|
|
89
|
+
frame = normalize_signal_to_frame(validated_signal, normalized_buffer_size)
|
|
90
|
+
normalized_previous_signal = normalize_previous_signal(previous_signal, normalized_buffer_size)
|
|
91
|
+
previous_amplitude = resolve_previous_amplitude_spectrum(
|
|
92
|
+
prev_amplitude_spectrum: prev_amplitude_spectrum,
|
|
93
|
+
previous_signal: normalized_previous_signal,
|
|
94
|
+
windowing_function: windowing_function
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
context = {
|
|
98
|
+
buffer: frame,
|
|
99
|
+
buffer_size: normalized_buffer_size,
|
|
100
|
+
sample_rate: normalized_sample_rate,
|
|
101
|
+
windowing_function: windowing_function.to_sym,
|
|
102
|
+
number_of_mfcc_coefficients: Integer(number_of_mfcc_coefficients),
|
|
103
|
+
number_of_mel_filters: Integer(normalized_mel_filters),
|
|
104
|
+
number_of_chroma_bands: Integer(normalized_chroma_bands),
|
|
105
|
+
number_of_bark_bands: Integer(number_of_bark_bands),
|
|
106
|
+
previous_signal: normalized_previous_signal,
|
|
107
|
+
prev_amplitude_spectrum: previous_amplitude
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
Features.compute(normalized_features, context, return_cache: true)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def available_features
|
|
114
|
+
Features.public_feature_keys
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def list_available_feature_extractors
|
|
118
|
+
available_features
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def windowing(signal, function_name)
|
|
122
|
+
DSP::Windowing.apply(validate_signal!(signal), function_name)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
private
|
|
126
|
+
|
|
127
|
+
def validate_options!(
|
|
128
|
+
buffer_size:,
|
|
129
|
+
sample_rate:,
|
|
130
|
+
number_of_mfcc_coefficients:,
|
|
131
|
+
number_of_mel_filters:,
|
|
132
|
+
number_of_chroma_bands:,
|
|
133
|
+
number_of_bark_bands:
|
|
134
|
+
)
|
|
135
|
+
raise ArgumentError, "buffer_size must be positive" unless buffer_size.positive?
|
|
136
|
+
raise ArgumentError, "sample_rate must be positive" unless sample_rate.positive?
|
|
137
|
+
raise ArgumentError, "number_of_mfcc_coefficients must be positive" unless Integer(number_of_mfcc_coefficients).positive?
|
|
138
|
+
raise ArgumentError, "number_of_mel_filters must be positive" unless Integer(number_of_mel_filters).positive?
|
|
139
|
+
raise ArgumentError, "number_of_chroma_bands must be positive" unless Integer(number_of_chroma_bands).positive?
|
|
140
|
+
raise ArgumentError, "number_of_bark_bands must be positive" unless Integer(number_of_bark_bands).positive?
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def normalize_features(features)
|
|
144
|
+
feature_list = Array(features)
|
|
145
|
+
raise InvalidFeatureError, "At least one feature must be provided" if feature_list.empty?
|
|
146
|
+
|
|
147
|
+
feature_list.map do |feature|
|
|
148
|
+
feature.to_sym
|
|
149
|
+
rescue NoMethodError
|
|
150
|
+
raise InvalidFeatureError, "Feature names must be symbols or strings"
|
|
151
|
+
end.uniq
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def validate_signal!(signal)
|
|
155
|
+
unless signal.is_a?(Array) || signal.respond_to?(:to_a)
|
|
156
|
+
raise InvalidSignalError, "Signal must be an Array of Numeric values"
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
signal_values = signal.to_a
|
|
160
|
+
|
|
161
|
+
if signal_values.empty?
|
|
162
|
+
raise InvalidSignalError, "Signal cannot be empty"
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
signal_values.each do |value|
|
|
166
|
+
unless value.is_a?(Numeric)
|
|
167
|
+
raise InvalidSignalError, "Signal contains a non-numeric value"
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
next if value.finite?
|
|
171
|
+
|
|
172
|
+
raise InvalidSignalError, "Signal contains NaN or Infinity"
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
signal_values.map(&:to_f)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def normalize_signal_to_frame(signal, buffer_size)
|
|
179
|
+
frame = signal.first(buffer_size)
|
|
180
|
+
return frame if frame.length == buffer_size
|
|
181
|
+
|
|
182
|
+
frame + Array.new(buffer_size - frame.length, 0.0)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def normalize_previous_signal(signal, buffer_size)
|
|
186
|
+
return nil if signal.nil?
|
|
187
|
+
|
|
188
|
+
normalized = validate_signal!(signal)
|
|
189
|
+
normalize_signal_to_frame(normalized, buffer_size)
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def resolve_previous_amplitude_spectrum(prev_amplitude_spectrum:, previous_signal:, windowing_function:)
|
|
193
|
+
return prev_amplitude_spectrum.map(&:to_f) if prev_amplitude_spectrum
|
|
194
|
+
return nil unless previous_signal
|
|
195
|
+
|
|
196
|
+
windowed = DSP::Windowing.apply(previous_signal, windowing_function)
|
|
197
|
+
DSP::FFT.amplitude_spectrum(windowed)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def normalize_or_default(alias_value, default_value)
|
|
201
|
+
alias_value.nil? ? default_value : alias_value
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: sonus
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Yudai Takada
|
|
8
|
+
bindir: exe
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: fftw3-ruby
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - "~>"
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: 0.1.0
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: 0.1.0
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: benchmark-ips
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - "~>"
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '2.12'
|
|
33
|
+
type: :development
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - "~>"
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '2.12'
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: rake
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - "~>"
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '13.0'
|
|
47
|
+
type: :development
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - "~>"
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '13.0'
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: rspec
|
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - "~>"
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: '3.0'
|
|
61
|
+
type: :development
|
|
62
|
+
prerelease: false
|
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - "~>"
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: '3.0'
|
|
68
|
+
- !ruby/object:Gem::Dependency
|
|
69
|
+
name: yard
|
|
70
|
+
requirement: !ruby/object:Gem::Requirement
|
|
71
|
+
requirements:
|
|
72
|
+
- - "~>"
|
|
73
|
+
- !ruby/object:Gem::Version
|
|
74
|
+
version: '0.9'
|
|
75
|
+
type: :development
|
|
76
|
+
prerelease: false
|
|
77
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
78
|
+
requirements:
|
|
79
|
+
- - "~>"
|
|
80
|
+
- !ruby/object:Gem::Version
|
|
81
|
+
version: '0.9'
|
|
82
|
+
description: Sonus extracts time-domain, spectral, and perceptual audio features in
|
|
83
|
+
pure Ruby with optional FFTW acceleration.
|
|
84
|
+
email:
|
|
85
|
+
- t.yudai92@gmail.com
|
|
86
|
+
executables: []
|
|
87
|
+
extensions: []
|
|
88
|
+
extra_rdoc_files: []
|
|
89
|
+
files:
|
|
90
|
+
- CHANGELOG.md
|
|
91
|
+
- LICENSE.txt
|
|
92
|
+
- README.md
|
|
93
|
+
- Rakefile
|
|
94
|
+
- benchmark/extract_all_features.rb
|
|
95
|
+
- examples/analyze_wav.rb
|
|
96
|
+
- examples/example_helper.rb
|
|
97
|
+
- examples/extract_basic.rb
|
|
98
|
+
- examples/extract_spectral_flux.rb
|
|
99
|
+
- examples/read_wav.rb
|
|
100
|
+
- lib/sonus.rb
|
|
101
|
+
- lib/sonus/analyzer.rb
|
|
102
|
+
- lib/sonus/dsp/fft.rb
|
|
103
|
+
- lib/sonus/dsp/fft_backend/fftw.rb
|
|
104
|
+
- lib/sonus/dsp/fft_backend/ruby.rb
|
|
105
|
+
- lib/sonus/dsp/mel_filter_bank.rb
|
|
106
|
+
- lib/sonus/dsp/windowing.rb
|
|
107
|
+
- lib/sonus/error.rb
|
|
108
|
+
- lib/sonus/features.rb
|
|
109
|
+
- lib/sonus/features/amplitude_spectrum.rb
|
|
110
|
+
- lib/sonus/features/base.rb
|
|
111
|
+
- lib/sonus/features/buffer.rb
|
|
112
|
+
- lib/sonus/features/chroma.rb
|
|
113
|
+
- lib/sonus/features/complex_spectrum.rb
|
|
114
|
+
- lib/sonus/features/energy.rb
|
|
115
|
+
- lib/sonus/features/loudness.rb
|
|
116
|
+
- lib/sonus/features/mel_bands.rb
|
|
117
|
+
- lib/sonus/features/mfcc.rb
|
|
118
|
+
- lib/sonus/features/perceptual_sharpness.rb
|
|
119
|
+
- lib/sonus/features/perceptual_spread.rb
|
|
120
|
+
- lib/sonus/features/power_spectrum.rb
|
|
121
|
+
- lib/sonus/features/rms.rb
|
|
122
|
+
- lib/sonus/features/spectral_centroid.rb
|
|
123
|
+
- lib/sonus/features/spectral_crest.rb
|
|
124
|
+
- lib/sonus/features/spectral_flatness.rb
|
|
125
|
+
- lib/sonus/features/spectral_flux.rb
|
|
126
|
+
- lib/sonus/features/spectral_kurtosis.rb
|
|
127
|
+
- lib/sonus/features/spectral_rolloff.rb
|
|
128
|
+
- lib/sonus/features/spectral_skewness.rb
|
|
129
|
+
- lib/sonus/features/spectral_slope.rb
|
|
130
|
+
- lib/sonus/features/spectral_spread.rb
|
|
131
|
+
- lib/sonus/features/windowed_signal.rb
|
|
132
|
+
- lib/sonus/features/zcr.rb
|
|
133
|
+
- lib/sonus/version.rb
|
|
134
|
+
- lib/sonus/wav/format.rb
|
|
135
|
+
- lib/sonus/wav/reader.rb
|
|
136
|
+
homepage: https://github.com/ydah/sonus
|
|
137
|
+
licenses:
|
|
138
|
+
- MIT
|
|
139
|
+
metadata:
|
|
140
|
+
homepage_uri: https://github.com/ydah/sonus
|
|
141
|
+
source_code_uri: https://github.com/ydah/sonus/tree/main
|
|
142
|
+
changelog_uri: https://github.com/ydah/sonus/blob/main/CHANGELOG.md
|
|
143
|
+
rdoc_options: []
|
|
144
|
+
require_paths:
|
|
145
|
+
- lib
|
|
146
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
147
|
+
requirements:
|
|
148
|
+
- - ">="
|
|
149
|
+
- !ruby/object:Gem::Version
|
|
150
|
+
version: '3.1'
|
|
151
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
152
|
+
requirements:
|
|
153
|
+
- - ">="
|
|
154
|
+
- !ruby/object:Gem::Version
|
|
155
|
+
version: '0'
|
|
156
|
+
requirements: []
|
|
157
|
+
rubygems_version: 4.0.6
|
|
158
|
+
specification_version: 4
|
|
159
|
+
summary: Audio feature extraction library for Ruby
|
|
160
|
+
test_files: []
|