sonus 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +5 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +314 -0
  5. data/Rakefile +8 -0
  6. data/benchmark/extract_all_features.rb +17 -0
  7. data/examples/analyze_wav.rb +25 -0
  8. data/examples/example_helper.rb +84 -0
  9. data/examples/extract_basic.rb +23 -0
  10. data/examples/extract_spectral_flux.rb +37 -0
  11. data/examples/read_wav.rb +19 -0
  12. data/lib/sonus/analyzer.rb +109 -0
  13. data/lib/sonus/dsp/fft.rb +55 -0
  14. data/lib/sonus/dsp/fft_backend/fftw.rb +103 -0
  15. data/lib/sonus/dsp/fft_backend/ruby.rb +80 -0
  16. data/lib/sonus/dsp/mel_filter_bank.rb +98 -0
  17. data/lib/sonus/dsp/windowing.rb +47 -0
  18. data/lib/sonus/error.rb +9 -0
  19. data/lib/sonus/features/amplitude_spectrum.rb +14 -0
  20. data/lib/sonus/features/base.rb +27 -0
  21. data/lib/sonus/features/buffer.rb +13 -0
  22. data/lib/sonus/features/chroma.rb +35 -0
  23. data/lib/sonus/features/complex_spectrum.rb +13 -0
  24. data/lib/sonus/features/energy.rb +13 -0
  25. data/lib/sonus/features/loudness.rb +57 -0
  26. data/lib/sonus/features/mel_bands.rb +27 -0
  27. data/lib/sonus/features/mfcc.rb +30 -0
  28. data/lib/sonus/features/perceptual_sharpness.rb +26 -0
  29. data/lib/sonus/features/perceptual_spread.rb +18 -0
  30. data/lib/sonus/features/power_spectrum.rb +13 -0
  31. data/lib/sonus/features/rms.rb +16 -0
  32. data/lib/sonus/features/spectral_centroid.rb +30 -0
  33. data/lib/sonus/features/spectral_crest.rb +19 -0
  34. data/lib/sonus/features/spectral_flatness.rb +22 -0
  35. data/lib/sonus/features/spectral_flux.rb +24 -0
  36. data/lib/sonus/features/spectral_kurtosis.rb +34 -0
  37. data/lib/sonus/features/spectral_rolloff.rb +26 -0
  38. data/lib/sonus/features/spectral_skewness.rb +34 -0
  39. data/lib/sonus/features/spectral_slope.rb +25 -0
  40. data/lib/sonus/features/spectral_spread.rb +32 -0
  41. data/lib/sonus/features/windowed_signal.rb +16 -0
  42. data/lib/sonus/features/zcr.rb +17 -0
  43. data/lib/sonus/features.rb +109 -0
  44. data/lib/sonus/version.rb +5 -0
  45. data/lib/sonus/wav/format.rb +15 -0
  46. data/lib/sonus/wav/reader.rb +224 -0
  47. data/lib/sonus.rb +204 -0
  48. metadata +160 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e8a628675bd2e5a3487629fefb877e0abbc037989f5deb2f0203be6b9c63d6ee
4
+ data.tar.gz: 915f50ac06a681866e77fbb74a08db79829f72625e7eb888de85c0aa7b9bdeaa
5
+ SHA512:
6
+ metadata.gz: 504fbab955ddef73e4f39b789edfea5ccd52fbf00061f1bc12b796560cde37911b5c10e223d7a736b8d38f55025ee0d1434f2b1e0798a3197ecded6273d9a0c0
7
+ data.tar.gz: 8fbc1a63264627458b5e18a5227b22ef998818e3da52edf92cbf3c40e1d20b26b551570fae7c1b4b12d3d272329074e09f10e05c28d1f618f674c0d8667d4e3b
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ # Changelog
2
+
3
+ ## [0.1.0] - 2026-03-07
4
+
5
+ - Initial release.
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2026 Yudai Takada
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,314 @@
1
+ # Sonus
2
+
3
+ Sonus is a Ruby audio feature extraction library for in-memory signals and WAV files.
4
+ It exposes a small public API around feature extraction, frame-based analysis, and WAV decoding.
5
+
6
+ Sonus always works with the built-in Ruby FFT backend, and automatically uses FFTW through `fftw3-ruby` when the system FFTW library is available.
7
+
8
+ ## Highlights
9
+
10
+ - Ruby `>= 3.1`
11
+ - Single-frame extraction with `Sonus.extract`
12
+ - Cached extraction with `Sonus.extract_with_cache`
13
+ - Frame-by-frame WAV analysis with `Sonus::Analyzer`
14
+ - WAV decoding and inspection with `Sonus::WAV::Reader`
15
+ - Pure Ruby FFT fallback with optional FFTW acceleration
16
+
17
+ ## Installation
18
+
19
+ Install the gem:
20
+
21
+ ```bash
22
+ gem install sonus
23
+ ```
24
+
25
+ Or add it to your Gemfile:
26
+
27
+ ```ruby
28
+ gem "sonus"
29
+ ```
30
+
31
+ ### Optional FFTW acceleration
32
+
33
+ `fftw3-ruby` is installed as a Sonus dependency.
34
+ To enable the FFTW backend, install the native FFTW shared library on your system:
35
+
36
+ ```bash
37
+ # macOS
38
+ brew install fftw
39
+
40
+ # Debian / Ubuntu
41
+ sudo apt-get install libfftw3-dev
42
+ ```
43
+
44
+ You can inspect or force the backend at runtime:
45
+
46
+ ```ruby
47
+ require "sonus"
48
+
49
+ Sonus::DSP::FFT.backend
50
+ # => :ruby or :fftw
51
+
52
+ Sonus::DSP::FFT.backend = :ruby
53
+ Sonus::DSP::FFT.backend = :fftw
54
+ ```
55
+
56
+ Setting `:fftw` raises `Sonus::FFTBackendError` when FFTW is unavailable.
57
+
58
+ ## Quick Start
59
+
60
+ Runnable scripts are available in `examples/`:
61
+
62
+ - `examples/extract_basic.rb`
63
+ - `examples/extract_spectral_flux.rb`
64
+ - `examples/analyze_wav.rb`
65
+ - `examples/read_wav.rb`
66
+
67
+ ### Single-frame extraction
68
+
69
+ ```ruby
70
+ require "sonus"
71
+
72
+ sample_rate = 44_100
73
+ buffer_size = 512
74
+
75
+ signal = Array.new(buffer_size) do |index|
76
+ Math.sin((2.0 * Math::PI * 440.0 * index) / sample_rate)
77
+ end
78
+
79
+ features = Sonus.extract(
80
+ signal,
81
+ %i[rms spectral_centroid mfcc],
82
+ buffer_size: buffer_size,
83
+ sample_rate: sample_rate
84
+ )
85
+
86
+ puts features[:rms]
87
+ puts features[:spectral_centroid]
88
+ puts features[:mfcc].length
89
+ ```
90
+
91
+ ### Frame-based WAV analysis
92
+
93
+ ```ruby
94
+ require "sonus"
95
+
96
+ analyzer = Sonus::Analyzer.new(
97
+ buffer_size: 1024,
98
+ hop_size: 512,
99
+ sample_rate: 44_100,
100
+ features: %i[rms spectral_flux mfcc]
101
+ )
102
+
103
+ results = analyzer.analyze_file("audio.wav")
104
+ puts results.first[:rms]
105
+
106
+ analyzer.each_frame("audio.wav").first(3).each do |frame|
107
+ puts frame[:spectral_flux]
108
+ end
109
+ ```
110
+
111
+ ### WAV reading
112
+
113
+ ```ruby
114
+ require "sonus"
115
+
116
+ reader = Sonus::WAV::Reader.new("audio.wav")
117
+
118
+ puts reader.sample_rate
119
+ puts reader.channels
120
+ puts reader.bit_depth
121
+ puts reader.duration
122
+
123
+ samples = reader.frames
124
+ reader.close
125
+ ```
126
+
127
+ ## Public API
128
+
129
+ ### `Sonus.extract`
130
+
131
+ Use `Sonus.extract(signal, features, **options)` for one analysis frame.
132
+ It returns a hash containing only the requested public features.
133
+
134
+ Important behavior:
135
+
136
+ - `signal` must be an `Array` or respond to `#to_a`
137
+ - signal values must be finite numeric values
138
+ - shorter input is zero-padded to `buffer_size`
139
+ - longer input is truncated to the first `buffer_size` samples
140
+ - feature dependencies are resolved automatically
141
+ - `features` can be a symbol, string, or array of them
142
+
143
+ For previous-frame features such as `:spectral_flux`, pass either `previous_signal` or `prev_amplitude_spectrum`:
144
+
145
+ ```ruby
146
+ previous = Array.new(512, 0.0)
147
+ current = Array.new(512) { |i| Math.sin((2.0 * Math::PI * 440.0 * i) / 44_100.0) }
148
+
149
+ result = Sonus.extract(
150
+ current,
151
+ :spectral_flux,
152
+ buffer_size: 512,
153
+ sample_rate: 44_100,
154
+ previous_signal: previous
155
+ )
156
+
157
+ puts result[:spectral_flux]
158
+ ```
159
+
160
+ ### `Sonus.extract_with_cache`
161
+
162
+ Use `Sonus.extract_with_cache` when you need both the requested result and intermediate feature values.
163
+ It returns `[result, cache]`, where `result` includes only requested public features and `cache` includes dependency outputs as well.
164
+
165
+ This is the API used internally by `Sonus::Analyzer`.
166
+
167
+ ### `Sonus::Analyzer`
168
+
169
+ Use `Sonus::Analyzer` for repeated frame analysis with stable configuration.
170
+
171
+ ```ruby
172
+ analyzer = Sonus::Analyzer.new(
173
+ buffer_size: 1024,
174
+ hop_size: 512,
175
+ sample_rate: 44_100,
176
+ features: %i[rms spectral_centroid]
177
+ )
178
+
179
+ analyzer.analyze(Array.new(1024, 1.0))
180
+ analyzer.analyze_file("audio.wav")
181
+ analyzer.each_frame("audio.wav")
182
+ ```
183
+
184
+ Important behavior:
185
+
186
+ - `features:` defaults to `[:rms]`
187
+ - `hop_size` defaults to `buffer_size`
188
+ - overlapping analysis is supported with `hop_size < buffer_size`
189
+ - `each_frame(path)` returns an enumerator when no block is given
190
+ - `sample_rate` must match the WAV file sample rate for `analyze_file` and `each_frame`
191
+ - previous-frame state is reset at the start of each `each_frame` run
192
+
193
+ ### `Sonus::WAV::Reader`
194
+
195
+ `Sonus::WAV::Reader` accepts a filesystem path or an IO object that responds to `#read` and `#seek`.
196
+
197
+ Supported WAV input:
198
+
199
+ - PCM: 8-bit, 16-bit, 24-bit, 32-bit
200
+ - IEEE float: 32-bit, 64-bit
201
+ - mono and multi-channel files
202
+
203
+ Important behavior:
204
+
205
+ - `frames` returns normalized floats
206
+ - for multi-channel files, Sonus decodes the first channel of each frame
207
+ - `each_buffer(size)` returns an enumerator when no block is given
208
+ - `each_buffer(size)` zero-pads the final buffer
209
+
210
+ ## Features
211
+
212
+ Inspect the public feature set at runtime:
213
+
214
+ ```ruby
215
+ Sonus.available_features
216
+ Sonus.list_available_feature_extractors
217
+ ```
218
+
219
+ Current public features:
220
+
221
+ ### Time-domain and raw outputs
222
+
223
+ - `:buffer`
224
+ - `:rms`
225
+ - `:zcr`
226
+ - `:energy`
227
+ - `:complex_spectrum`
228
+ - `:amplitude_spectrum`
229
+ - `:power_spectrum`
230
+
231
+ ### Spectral features
232
+
233
+ - `:spectral_centroid`
234
+ - `:spectral_flatness`
235
+ - `:spectral_flux`
236
+ - `:spectral_slope`
237
+ - `:spectral_rolloff`
238
+ - `:spectral_spread`
239
+ - `:spectral_skewness`
240
+ - `:spectral_kurtosis`
241
+ - `:spectral_crest`
242
+ - `:chroma`
243
+ - `:mel_bands`
244
+ - `:mfcc`
245
+
246
+ ### Perceptual features
247
+
248
+ - `:loudness`
249
+ - `:perceptual_spread`
250
+ - `:perceptual_sharpness`
251
+
252
+ ## Configuration
253
+
254
+ Default values:
255
+
256
+ - `buffer_size = 512`
257
+ - `sample_rate = 44_100`
258
+ - `windowing_function = :hanning`
259
+ - `number_of_mfcc_coefficients = 13`
260
+ - `number_of_mel_filters = 26`
261
+ - `number_of_chroma_bands = 12`
262
+ - `number_of_bark_bands = 24`
263
+
264
+ Supported windowing functions:
265
+
266
+ - `:rect`
267
+ - `:hann`
268
+ - `:hanning`
269
+ - `:hamming`
270
+ - `:blackman`
271
+ - `:sine`
272
+
273
+ Supported aliases:
274
+
275
+ - `mel_bands:` for `number_of_mel_filters:`
276
+ - `chroma_bands:` for `number_of_chroma_bands:`
277
+
278
+ ## Errors
279
+
280
+ Sonus raises domain-specific errors for common failure cases:
281
+
282
+ - `Sonus::InvalidSignalError`
283
+ - `Sonus::InvalidFeatureError`
284
+ - `Sonus::WAVFormatError`
285
+ - `Sonus::FFTBackendError`
286
+
287
+ ## Development
288
+
289
+ Install dependencies:
290
+
291
+ ```bash
292
+ bundle install
293
+ ```
294
+
295
+ Run tests:
296
+
297
+ ```bash
298
+ bundle exec rspec
299
+ bundle exec rake
300
+ ```
301
+
302
+ Run the benchmark:
303
+
304
+ ```bash
305
+ bundle exec ruby benchmark/extract_all_features.rb
306
+ ```
307
+
308
+ ## Contributing
309
+
310
+ Pull requests and issue reports are welcome.
311
+
312
+ ## License
313
+
314
+ MIT License.
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: :spec
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/setup"
4
+ require "benchmark/ips"
5
+ require "sonus"
6
+
7
+ sample_rate = 44_100
8
+ buffer = Array.new(1024) { |i| Math.sin((2.0 * Math::PI * 440.0 * i) / sample_rate) }
9
+ features = Sonus.available_features
10
+
11
+ Benchmark.ips do |x|
12
+ x.report("extract_all_features") do
13
+ Sonus.extract(buffer, features, buffer_size: 1024, sample_rate: sample_rate)
14
+ end
15
+
16
+ x.compare!
17
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "example_helper"
4
+
5
+ wav_path = ExampleHelper.mono_wav_path
6
+
7
+ analyzer = Sonus::Analyzer.new(
8
+ buffer_size: 1024,
9
+ hop_size: 512,
10
+ sample_rate: 44_100,
11
+ features: %i[rms spectral_centroid spectral_flux]
12
+ )
13
+
14
+ results = analyzer.analyze_file(wav_path)
15
+ first_frame = results.first
16
+
17
+ puts "analyze_file_frame_count: #{results.length}"
18
+ puts "first_frame_rms: #{format("%.6f", first_frame[:rms])}"
19
+ puts "first_frame_centroid_hz: #{format("%.2f", first_frame[:spectral_centroid])}"
20
+ puts "first_frame_flux: #{format("%.6f", first_frame[:spectral_flux])}"
21
+
22
+ puts "first_three_rms_from_each_frame:"
23
+ analyzer.each_frame(wav_path).first(3).each_with_index do |frame, index|
24
+ puts " frame_#{index}: #{format("%.6f", frame[:rms])}"
25
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "tempfile"
4
+
5
+ begin
6
+ require "bundler/setup"
7
+ rescue LoadError
8
+ nil
9
+ end
10
+
11
+ begin
12
+ require "sonus"
13
+ rescue LoadError
14
+ require_relative "../lib/sonus"
15
+ end
16
+
17
+ module ExampleHelper
18
+ module_function
19
+
20
+ TEMPFILES = []
21
+
22
+ def fixture_path(name)
23
+ File.expand_path("../spec/fixtures/#{name}", __dir__)
24
+ end
25
+
26
+ def mono_wav_path
27
+ wav_path("sine_440hz_16bit.wav", channels: 1)
28
+ end
29
+
30
+ def stereo_wav_path
31
+ wav_path("stereo_16bit.wav", channels: 2)
32
+ end
33
+
34
+ def wav_path(name, channels:)
35
+ path = fixture_path(name)
36
+ return path if File.exist?(path)
37
+
38
+ generate_wav(name, channels: channels)
39
+ end
40
+
41
+ def generate_wav(name, channels:, sample_rate: 44_100, frame_count: 44_100, frequency: 440.0)
42
+ tempfile = Tempfile.new([File.basename(name, ".wav"), ".wav"])
43
+ tempfile.binmode
44
+
45
+ data = Array.new(frame_count) do |index|
46
+ sample = Math.sin((2.0 * Math::PI * frequency * index) / sample_rate)
47
+ frame_bytes(sample, channels)
48
+ end.join
49
+
50
+ tempfile.write(wav_bytes(data, channels: channels, sample_rate: sample_rate))
51
+ tempfile.flush
52
+ TEMPFILES << tempfile
53
+ tempfile.path
54
+ end
55
+
56
+ def frame_bytes(sample, channels)
57
+ left = pcm16_bytes(sample)
58
+ return left if channels == 1
59
+
60
+ right = pcm16_bytes(sample * 0.5)
61
+ left + right
62
+ end
63
+
64
+ def pcm16_bytes(sample)
65
+ clipped = [[sample, -1.0].max, 1.0].min
66
+ [(clipped * 32_767).round].pack("s<")
67
+ end
68
+
69
+ def wav_bytes(data, channels:, sample_rate:)
70
+ bits_per_sample = 16
71
+ block_align = channels * (bits_per_sample / 8)
72
+ byte_rate = sample_rate * block_align
73
+ riff_size = 36 + data.bytesize
74
+
75
+ +"RIFF" \
76
+ << [riff_size].pack("V") \
77
+ << "WAVE" \
78
+ << "fmt " \
79
+ << [16, 1, channels, sample_rate, byte_rate, block_align, bits_per_sample].pack("VvvVVvv") \
80
+ << "data" \
81
+ << [data.bytesize].pack("V") \
82
+ << data
83
+ end
84
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "example_helper"
4
+
5
+ sample_rate = 44_100
6
+ buffer_size = 512
7
+
8
+ signal = Array.new(buffer_size) do |index|
9
+ Math.sin((2.0 * Math::PI * 440.0 * index) / sample_rate)
10
+ end
11
+
12
+ features = Sonus.extract(
13
+ signal,
14
+ %i[rms spectral_centroid mfcc],
15
+ buffer_size: buffer_size,
16
+ sample_rate: sample_rate,
17
+ number_of_mfcc_coefficients: 13
18
+ )
19
+
20
+ puts "rms: #{format("%.6f", features[:rms])}"
21
+ puts "spectral_centroid_hz: #{format("%.2f", features[:spectral_centroid])}"
22
+ puts "mfcc_count: #{features[:mfcc].length}"
23
+ puts "mfcc_first_3: #{features[:mfcc].first(3).map { |value| format("%.4f", value) }.join(", ")}"
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "example_helper"
4
+
5
+ sample_rate = 44_100
6
+ buffer_size = 512
7
+
8
+ previous_signal = Array.new(buffer_size, 0.0)
9
+ current_signal = Array.new(buffer_size) do |index|
10
+ Math.sin((2.0 * Math::PI * 440.0 * index) / sample_rate)
11
+ end
12
+
13
+ from_previous_signal = Sonus.extract(
14
+ current_signal,
15
+ :spectral_flux,
16
+ buffer_size: buffer_size,
17
+ sample_rate: sample_rate,
18
+ previous_signal: previous_signal
19
+ )
20
+
21
+ previous_spectrum = Sonus.extract(
22
+ previous_signal,
23
+ :amplitude_spectrum,
24
+ buffer_size: buffer_size,
25
+ sample_rate: sample_rate
26
+ )
27
+
28
+ from_previous_spectrum = Sonus.extract(
29
+ current_signal,
30
+ :spectral_flux,
31
+ buffer_size: buffer_size,
32
+ sample_rate: sample_rate,
33
+ prev_amplitude_spectrum: previous_spectrum[:amplitude_spectrum]
34
+ )
35
+
36
+ puts "spectral_flux_from_previous_signal: #{format("%.6f", from_previous_signal[:spectral_flux])}"
37
+ puts "spectral_flux_from_previous_spectrum: #{format("%.6f", from_previous_spectrum[:spectral_flux])}"
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "example_helper"
4
+
5
+ wav_path = ExampleHelper.stereo_wav_path
6
+ reader = Sonus::WAV::Reader.new(wav_path)
7
+
8
+ begin
9
+ first_buffer = reader.each_buffer(2048).first
10
+
11
+ puts "sample_rate: #{reader.sample_rate}"
12
+ puts "channels: #{reader.channels}"
13
+ puts "bit_depth: #{reader.bit_depth}"
14
+ puts "duration_seconds: #{format("%.3f", reader.duration)}"
15
+ puts "decoded_frame_count: #{reader.frames.length}"
16
+ puts "first_buffer_first_5: #{first_buffer.first(5).map { |value| format("%.4f", value) }.join(", ")}"
17
+ ensure
18
+ reader.close
19
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sonus
4
+ class Analyzer
5
+ attr_reader :buffer_size, :sample_rate, :features, :hop_size, :windowing_function
6
+
7
+ def initialize(
8
+ buffer_size: Sonus::DEFAULT_BUFFER_SIZE,
9
+ sample_rate: Sonus::DEFAULT_SAMPLE_RATE,
10
+ features: [:rms],
11
+ hop_size: nil,
12
+ windowing_function: Sonus::DEFAULT_WINDOWING_FUNCTION,
13
+ number_of_mfcc_coefficients: Sonus::DEFAULT_MFCC_COEFFICIENTS,
14
+ number_of_mel_filters: Sonus::DEFAULT_MEL_FILTERS,
15
+ mel_bands: nil,
16
+ number_of_chroma_bands: Sonus::DEFAULT_CHROMA_BANDS,
17
+ chroma_bands: nil,
18
+ number_of_bark_bands: Sonus::DEFAULT_BARK_BANDS
19
+ )
20
+ @buffer_size = Integer(buffer_size)
21
+ @sample_rate = Float(sample_rate)
22
+ @features = Array(features).map(&:to_sym)
23
+ @hop_size = hop_size ? Integer(hop_size) : @buffer_size
24
+ @windowing_function = windowing_function.to_sym
25
+ @number_of_mfcc_coefficients = Integer(number_of_mfcc_coefficients)
26
+ @number_of_mel_filters = Integer(mel_bands || number_of_mel_filters)
27
+ @number_of_chroma_bands = Integer(chroma_bands || number_of_chroma_bands)
28
+ @number_of_bark_bands = Integer(number_of_bark_bands)
29
+ @prev_amplitude_spectrum = nil
30
+
31
+ raise ArgumentError, "buffer_size must be positive" unless @buffer_size.positive?
32
+ raise ArgumentError, "hop_size must be positive" unless @hop_size.positive?
33
+
34
+ preload_mel_filter_bank
35
+ end
36
+
37
+ def analyze(buffer)
38
+ result, cache = Sonus.extract_with_cache(
39
+ buffer,
40
+ @features,
41
+ buffer_size: @buffer_size,
42
+ sample_rate: @sample_rate,
43
+ windowing_function: @windowing_function,
44
+ number_of_mfcc_coefficients: @number_of_mfcc_coefficients,
45
+ number_of_mel_filters: @number_of_mel_filters,
46
+ number_of_chroma_bands: @number_of_chroma_bands,
47
+ number_of_bark_bands: @number_of_bark_bands,
48
+ prev_amplitude_spectrum: @prev_amplitude_spectrum
49
+ )
50
+
51
+ @prev_amplitude_spectrum = cache[:amplitude_spectrum] if cache.key?(:amplitude_spectrum)
52
+ result
53
+ end
54
+
55
+ def analyze_file(path)
56
+ frames = []
57
+ each_frame(path) { |features| frames << features }
58
+ frames
59
+ end
60
+
61
+ def each_frame(path)
62
+ return enum_for(:each_frame, path) unless block_given?
63
+
64
+ reader = Sonus::WAV::Reader.new(path)
65
+ begin
66
+ @prev_amplitude_spectrum = nil
67
+ validate_reader_sample_rate!(reader)
68
+ signal = reader.frames
69
+ each_signal_frame(signal) { |buffer| yield analyze(buffer) }
70
+ ensure
71
+ reader.close
72
+ end
73
+
74
+ nil
75
+ end
76
+
77
+ private
78
+
79
+ def each_signal_frame(signal)
80
+ total_samples = signal.length
81
+ frame_count = if total_samples <= @buffer_size
82
+ 1
83
+ else
84
+ ((total_samples - @buffer_size).fdiv(@hop_size).ceil) + 1
85
+ end
86
+
87
+ frame_count.times do |frame_index|
88
+ start_index = frame_index * @hop_size
89
+ frame = signal.slice(start_index, @buffer_size) || []
90
+ frame += Array.new(@buffer_size - frame.length, 0.0) if frame.length < @buffer_size
91
+ yield frame
92
+ end
93
+ end
94
+
95
+ def validate_reader_sample_rate!(reader)
96
+ return if reader.sample_rate.to_f == @sample_rate
97
+
98
+ raise ArgumentError, "Analyzer sample_rate (#{@sample_rate}) does not match WAV sample_rate (#{reader.sample_rate})"
99
+ end
100
+
101
+ def preload_mel_filter_bank
102
+ Sonus::DSP::MelFilterBank.create(
103
+ num_filters: @number_of_mel_filters,
104
+ fft_size: @buffer_size,
105
+ sample_rate: @sample_rate
106
+ )
107
+ end
108
+ end
109
+ end