sonus 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +5 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +314 -0
  5. data/Rakefile +8 -0
  6. data/benchmark/extract_all_features.rb +17 -0
  7. data/examples/analyze_wav.rb +25 -0
  8. data/examples/example_helper.rb +84 -0
  9. data/examples/extract_basic.rb +23 -0
  10. data/examples/extract_spectral_flux.rb +37 -0
  11. data/examples/read_wav.rb +19 -0
  12. data/lib/sonus/analyzer.rb +109 -0
  13. data/lib/sonus/dsp/fft.rb +55 -0
  14. data/lib/sonus/dsp/fft_backend/fftw.rb +103 -0
  15. data/lib/sonus/dsp/fft_backend/ruby.rb +80 -0
  16. data/lib/sonus/dsp/mel_filter_bank.rb +98 -0
  17. data/lib/sonus/dsp/windowing.rb +47 -0
  18. data/lib/sonus/error.rb +9 -0
  19. data/lib/sonus/features/amplitude_spectrum.rb +14 -0
  20. data/lib/sonus/features/base.rb +27 -0
  21. data/lib/sonus/features/buffer.rb +13 -0
  22. data/lib/sonus/features/chroma.rb +35 -0
  23. data/lib/sonus/features/complex_spectrum.rb +13 -0
  24. data/lib/sonus/features/energy.rb +13 -0
  25. data/lib/sonus/features/loudness.rb +57 -0
  26. data/lib/sonus/features/mel_bands.rb +27 -0
  27. data/lib/sonus/features/mfcc.rb +30 -0
  28. data/lib/sonus/features/perceptual_sharpness.rb +26 -0
  29. data/lib/sonus/features/perceptual_spread.rb +18 -0
  30. data/lib/sonus/features/power_spectrum.rb +13 -0
  31. data/lib/sonus/features/rms.rb +16 -0
  32. data/lib/sonus/features/spectral_centroid.rb +30 -0
  33. data/lib/sonus/features/spectral_crest.rb +19 -0
  34. data/lib/sonus/features/spectral_flatness.rb +22 -0
  35. data/lib/sonus/features/spectral_flux.rb +24 -0
  36. data/lib/sonus/features/spectral_kurtosis.rb +34 -0
  37. data/lib/sonus/features/spectral_rolloff.rb +26 -0
  38. data/lib/sonus/features/spectral_skewness.rb +34 -0
  39. data/lib/sonus/features/spectral_slope.rb +25 -0
  40. data/lib/sonus/features/spectral_spread.rb +32 -0
  41. data/lib/sonus/features/windowed_signal.rb +16 -0
  42. data/lib/sonus/features/zcr.rb +17 -0
  43. data/lib/sonus/features.rb +109 -0
  44. data/lib/sonus/version.rb +5 -0
  45. data/lib/sonus/wav/format.rb +15 -0
  46. data/lib/sonus/wav/reader.rb +224 -0
  47. data/lib/sonus.rb +204 -0
  48. metadata +160 -0
data/lib/sonus.rb ADDED
@@ -0,0 +1,204 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "sonus/version"
4
+ require_relative "sonus/error"
5
+ require_relative "sonus/dsp/windowing"
6
+ require_relative "sonus/dsp/fft_backend/ruby"
7
+ require_relative "sonus/dsp/fft_backend/fftw"
8
+ require_relative "sonus/dsp/fft"
9
+ require_relative "sonus/dsp/mel_filter_bank"
10
+ require_relative "sonus/wav/format"
11
+ require_relative "sonus/wav/reader"
12
+ require_relative "sonus/features"
13
+ require_relative "sonus/analyzer"
14
+
15
+ module Sonus
16
+ DEFAULT_BUFFER_SIZE = 512
17
+ DEFAULT_SAMPLE_RATE = 44_100
18
+ DEFAULT_WINDOWING_FUNCTION = :hanning
19
+ DEFAULT_MFCC_COEFFICIENTS = 13
20
+ DEFAULT_MEL_FILTERS = 26
21
+ DEFAULT_CHROMA_BANDS = 12
22
+ DEFAULT_BARK_BANDS = 24
23
+
24
+ class << self
25
+ def extract(
26
+ signal,
27
+ features,
28
+ buffer_size: DEFAULT_BUFFER_SIZE,
29
+ sample_rate: DEFAULT_SAMPLE_RATE,
30
+ windowing_function: DEFAULT_WINDOWING_FUNCTION,
31
+ number_of_mfcc_coefficients: DEFAULT_MFCC_COEFFICIENTS,
32
+ number_of_mel_filters: DEFAULT_MEL_FILTERS,
33
+ mel_bands: nil,
34
+ number_of_chroma_bands: DEFAULT_CHROMA_BANDS,
35
+ chroma_bands: nil,
36
+ number_of_bark_bands: DEFAULT_BARK_BANDS,
37
+ previous_signal: nil,
38
+ prev_amplitude_spectrum: nil
39
+ )
40
+ result, = extract_with_cache(
41
+ signal,
42
+ features,
43
+ buffer_size: buffer_size,
44
+ sample_rate: sample_rate,
45
+ windowing_function: windowing_function,
46
+ number_of_mfcc_coefficients: number_of_mfcc_coefficients,
47
+ number_of_mel_filters: number_of_mel_filters,
48
+ mel_bands: mel_bands,
49
+ number_of_chroma_bands: number_of_chroma_bands,
50
+ chroma_bands: chroma_bands,
51
+ number_of_bark_bands: number_of_bark_bands,
52
+ previous_signal: previous_signal,
53
+ prev_amplitude_spectrum: prev_amplitude_spectrum
54
+ )
55
+ result
56
+ end
57
+
58
+ def extract_with_cache(
59
+ signal,
60
+ features,
61
+ buffer_size: DEFAULT_BUFFER_SIZE,
62
+ sample_rate: DEFAULT_SAMPLE_RATE,
63
+ windowing_function: DEFAULT_WINDOWING_FUNCTION,
64
+ number_of_mfcc_coefficients: DEFAULT_MFCC_COEFFICIENTS,
65
+ number_of_mel_filters: DEFAULT_MEL_FILTERS,
66
+ mel_bands: nil,
67
+ number_of_chroma_bands: DEFAULT_CHROMA_BANDS,
68
+ chroma_bands: nil,
69
+ number_of_bark_bands: DEFAULT_BARK_BANDS,
70
+ previous_signal: nil,
71
+ prev_amplitude_spectrum: nil
72
+ )
73
+ normalized_features = normalize_features(features)
74
+ normalized_buffer_size = Integer(buffer_size)
75
+ normalized_sample_rate = Float(sample_rate)
76
+ normalized_mel_filters = normalize_or_default(mel_bands, number_of_mel_filters)
77
+ normalized_chroma_bands = normalize_or_default(chroma_bands, number_of_chroma_bands)
78
+
79
+ validate_options!(
80
+ buffer_size: normalized_buffer_size,
81
+ sample_rate: normalized_sample_rate,
82
+ number_of_mfcc_coefficients: number_of_mfcc_coefficients,
83
+ number_of_mel_filters: normalized_mel_filters,
84
+ number_of_chroma_bands: normalized_chroma_bands,
85
+ number_of_bark_bands: number_of_bark_bands
86
+ )
87
+
88
+ validated_signal = validate_signal!(signal)
89
+ frame = normalize_signal_to_frame(validated_signal, normalized_buffer_size)
90
+ normalized_previous_signal = normalize_previous_signal(previous_signal, normalized_buffer_size)
91
+ previous_amplitude = resolve_previous_amplitude_spectrum(
92
+ prev_amplitude_spectrum: prev_amplitude_spectrum,
93
+ previous_signal: normalized_previous_signal,
94
+ windowing_function: windowing_function
95
+ )
96
+
97
+ context = {
98
+ buffer: frame,
99
+ buffer_size: normalized_buffer_size,
100
+ sample_rate: normalized_sample_rate,
101
+ windowing_function: windowing_function.to_sym,
102
+ number_of_mfcc_coefficients: Integer(number_of_mfcc_coefficients),
103
+ number_of_mel_filters: Integer(normalized_mel_filters),
104
+ number_of_chroma_bands: Integer(normalized_chroma_bands),
105
+ number_of_bark_bands: Integer(number_of_bark_bands),
106
+ previous_signal: normalized_previous_signal,
107
+ prev_amplitude_spectrum: previous_amplitude
108
+ }
109
+
110
+ Features.compute(normalized_features, context, return_cache: true)
111
+ end
112
+
113
+ def available_features
114
+ Features.public_feature_keys
115
+ end
116
+
117
+ def list_available_feature_extractors
118
+ available_features
119
+ end
120
+
121
+ def windowing(signal, function_name)
122
+ DSP::Windowing.apply(validate_signal!(signal), function_name)
123
+ end
124
+
125
+ private
126
+
127
+ def validate_options!(
128
+ buffer_size:,
129
+ sample_rate:,
130
+ number_of_mfcc_coefficients:,
131
+ number_of_mel_filters:,
132
+ number_of_chroma_bands:,
133
+ number_of_bark_bands:
134
+ )
135
+ raise ArgumentError, "buffer_size must be positive" unless buffer_size.positive?
136
+ raise ArgumentError, "sample_rate must be positive" unless sample_rate.positive?
137
+ raise ArgumentError, "number_of_mfcc_coefficients must be positive" unless Integer(number_of_mfcc_coefficients).positive?
138
+ raise ArgumentError, "number_of_mel_filters must be positive" unless Integer(number_of_mel_filters).positive?
139
+ raise ArgumentError, "number_of_chroma_bands must be positive" unless Integer(number_of_chroma_bands).positive?
140
+ raise ArgumentError, "number_of_bark_bands must be positive" unless Integer(number_of_bark_bands).positive?
141
+ end
142
+
143
+ def normalize_features(features)
144
+ feature_list = Array(features)
145
+ raise InvalidFeatureError, "At least one feature must be provided" if feature_list.empty?
146
+
147
+ feature_list.map do |feature|
148
+ feature.to_sym
149
+ rescue NoMethodError
150
+ raise InvalidFeatureError, "Feature names must be symbols or strings"
151
+ end.uniq
152
+ end
153
+
154
+ def validate_signal!(signal)
155
+ unless signal.is_a?(Array) || signal.respond_to?(:to_a)
156
+ raise InvalidSignalError, "Signal must be an Array of Numeric values"
157
+ end
158
+
159
+ signal_values = signal.to_a
160
+
161
+ if signal_values.empty?
162
+ raise InvalidSignalError, "Signal cannot be empty"
163
+ end
164
+
165
+ signal_values.each do |value|
166
+ unless value.is_a?(Numeric)
167
+ raise InvalidSignalError, "Signal contains a non-numeric value"
168
+ end
169
+
170
+ next if value.finite?
171
+
172
+ raise InvalidSignalError, "Signal contains NaN or Infinity"
173
+ end
174
+
175
+ signal_values.map(&:to_f)
176
+ end
177
+
178
+ def normalize_signal_to_frame(signal, buffer_size)
179
+ frame = signal.first(buffer_size)
180
+ return frame if frame.length == buffer_size
181
+
182
+ frame + Array.new(buffer_size - frame.length, 0.0)
183
+ end
184
+
185
+ def normalize_previous_signal(signal, buffer_size)
186
+ return nil if signal.nil?
187
+
188
+ normalized = validate_signal!(signal)
189
+ normalize_signal_to_frame(normalized, buffer_size)
190
+ end
191
+
192
+ def resolve_previous_amplitude_spectrum(prev_amplitude_spectrum:, previous_signal:, windowing_function:)
193
+ return prev_amplitude_spectrum.map(&:to_f) if prev_amplitude_spectrum
194
+ return nil unless previous_signal
195
+
196
+ windowed = DSP::Windowing.apply(previous_signal, windowing_function)
197
+ DSP::FFT.amplitude_spectrum(windowed)
198
+ end
199
+
200
+ def normalize_or_default(alias_value, default_value)
201
+ alias_value.nil? ? default_value : alias_value
202
+ end
203
+ end
204
+ end
metadata ADDED
@@ -0,0 +1,160 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sonus
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Yudai Takada
8
+ bindir: exe
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: fftw3-ruby
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: 0.1.0
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: 0.1.0
26
+ - !ruby/object:Gem::Dependency
27
+ name: benchmark-ips
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '2.12'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '2.12'
40
+ - !ruby/object:Gem::Dependency
41
+ name: rake
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '13.0'
47
+ type: :development
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '13.0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: rspec
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '3.0'
61
+ type: :development
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '3.0'
68
+ - !ruby/object:Gem::Dependency
69
+ name: yard
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '0.9'
75
+ type: :development
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '0.9'
82
+ description: Sonus extracts time-domain, spectral, and perceptual audio features in
83
+ pure Ruby with optional FFTW acceleration.
84
+ email:
85
+ - t.yudai92@gmail.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - CHANGELOG.md
91
+ - LICENSE.txt
92
+ - README.md
93
+ - Rakefile
94
+ - benchmark/extract_all_features.rb
95
+ - examples/analyze_wav.rb
96
+ - examples/example_helper.rb
97
+ - examples/extract_basic.rb
98
+ - examples/extract_spectral_flux.rb
99
+ - examples/read_wav.rb
100
+ - lib/sonus.rb
101
+ - lib/sonus/analyzer.rb
102
+ - lib/sonus/dsp/fft.rb
103
+ - lib/sonus/dsp/fft_backend/fftw.rb
104
+ - lib/sonus/dsp/fft_backend/ruby.rb
105
+ - lib/sonus/dsp/mel_filter_bank.rb
106
+ - lib/sonus/dsp/windowing.rb
107
+ - lib/sonus/error.rb
108
+ - lib/sonus/features.rb
109
+ - lib/sonus/features/amplitude_spectrum.rb
110
+ - lib/sonus/features/base.rb
111
+ - lib/sonus/features/buffer.rb
112
+ - lib/sonus/features/chroma.rb
113
+ - lib/sonus/features/complex_spectrum.rb
114
+ - lib/sonus/features/energy.rb
115
+ - lib/sonus/features/loudness.rb
116
+ - lib/sonus/features/mel_bands.rb
117
+ - lib/sonus/features/mfcc.rb
118
+ - lib/sonus/features/perceptual_sharpness.rb
119
+ - lib/sonus/features/perceptual_spread.rb
120
+ - lib/sonus/features/power_spectrum.rb
121
+ - lib/sonus/features/rms.rb
122
+ - lib/sonus/features/spectral_centroid.rb
123
+ - lib/sonus/features/spectral_crest.rb
124
+ - lib/sonus/features/spectral_flatness.rb
125
+ - lib/sonus/features/spectral_flux.rb
126
+ - lib/sonus/features/spectral_kurtosis.rb
127
+ - lib/sonus/features/spectral_rolloff.rb
128
+ - lib/sonus/features/spectral_skewness.rb
129
+ - lib/sonus/features/spectral_slope.rb
130
+ - lib/sonus/features/spectral_spread.rb
131
+ - lib/sonus/features/windowed_signal.rb
132
+ - lib/sonus/features/zcr.rb
133
+ - lib/sonus/version.rb
134
+ - lib/sonus/wav/format.rb
135
+ - lib/sonus/wav/reader.rb
136
+ homepage: https://github.com/ydah/sonus
137
+ licenses:
138
+ - MIT
139
+ metadata:
140
+ homepage_uri: https://github.com/ydah/sonus
141
+ source_code_uri: https://github.com/ydah/sonus/tree/main
142
+ changelog_uri: https://github.com/ydah/sonus/blob/main/CHANGELOG.md
143
+ rdoc_options: []
144
+ require_paths:
145
+ - lib
146
+ required_ruby_version: !ruby/object:Gem::Requirement
147
+ requirements:
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ version: '3.1'
151
+ required_rubygems_version: !ruby/object:Gem::Requirement
152
+ requirements:
153
+ - - ">="
154
+ - !ruby/object:Gem::Version
155
+ version: '0'
156
+ requirements: []
157
+ rubygems_version: 4.0.6
158
+ specification_version: 4
159
+ summary: Audio feature extraction library for Ruby
160
+ test_files: []