sonus 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +314 -0
- data/Rakefile +8 -0
- data/benchmark/extract_all_features.rb +17 -0
- data/examples/analyze_wav.rb +25 -0
- data/examples/example_helper.rb +84 -0
- data/examples/extract_basic.rb +23 -0
- data/examples/extract_spectral_flux.rb +37 -0
- data/examples/read_wav.rb +19 -0
- data/lib/sonus/analyzer.rb +109 -0
- data/lib/sonus/dsp/fft.rb +55 -0
- data/lib/sonus/dsp/fft_backend/fftw.rb +103 -0
- data/lib/sonus/dsp/fft_backend/ruby.rb +80 -0
- data/lib/sonus/dsp/mel_filter_bank.rb +98 -0
- data/lib/sonus/dsp/windowing.rb +47 -0
- data/lib/sonus/error.rb +9 -0
- data/lib/sonus/features/amplitude_spectrum.rb +14 -0
- data/lib/sonus/features/base.rb +27 -0
- data/lib/sonus/features/buffer.rb +13 -0
- data/lib/sonus/features/chroma.rb +35 -0
- data/lib/sonus/features/complex_spectrum.rb +13 -0
- data/lib/sonus/features/energy.rb +13 -0
- data/lib/sonus/features/loudness.rb +57 -0
- data/lib/sonus/features/mel_bands.rb +27 -0
- data/lib/sonus/features/mfcc.rb +30 -0
- data/lib/sonus/features/perceptual_sharpness.rb +26 -0
- data/lib/sonus/features/perceptual_spread.rb +18 -0
- data/lib/sonus/features/power_spectrum.rb +13 -0
- data/lib/sonus/features/rms.rb +16 -0
- data/lib/sonus/features/spectral_centroid.rb +30 -0
- data/lib/sonus/features/spectral_crest.rb +19 -0
- data/lib/sonus/features/spectral_flatness.rb +22 -0
- data/lib/sonus/features/spectral_flux.rb +24 -0
- data/lib/sonus/features/spectral_kurtosis.rb +34 -0
- data/lib/sonus/features/spectral_rolloff.rb +26 -0
- data/lib/sonus/features/spectral_skewness.rb +34 -0
- data/lib/sonus/features/spectral_slope.rb +25 -0
- data/lib/sonus/features/spectral_spread.rb +32 -0
- data/lib/sonus/features/windowed_signal.rb +16 -0
- data/lib/sonus/features/zcr.rb +17 -0
- data/lib/sonus/features.rb +109 -0
- data/lib/sonus/version.rb +5 -0
- data/lib/sonus/wav/format.rb +15 -0
- data/lib/sonus/wav/reader.rb +224 -0
- data/lib/sonus.rb +204 -0
- metadata +160 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: e8a628675bd2e5a3487629fefb877e0abbc037989f5deb2f0203be6b9c63d6ee
|
|
4
|
+
data.tar.gz: 915f50ac06a681866e77fbb74a08db79829f72625e7eb888de85c0aa7b9bdeaa
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 504fbab955ddef73e4f39b789edfea5ccd52fbf00061f1bc12b796560cde37911b5c10e223d7a736b8d38f55025ee0d1434f2b1e0798a3197ecded6273d9a0c0
|
|
7
|
+
data.tar.gz: 8fbc1a63264627458b5e18a5227b22ef998818e3da52edf92cbf3c40e1d20b26b551570fae7c1b4b12d3d272329074e09f10e05c28d1f618f674c0d8667d4e3b
|
data/CHANGELOG.md
ADDED
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Yudai Takada
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
# Sonus
|
|
2
|
+
|
|
3
|
+
Sonus is a Ruby audio feature extraction library for in-memory signals and WAV files.
|
|
4
|
+
It exposes a small public API around feature extraction, frame-based analysis, and WAV decoding.
|
|
5
|
+
|
|
6
|
+
Sonus always works with the built-in Ruby FFT backend, and automatically uses FFTW through `fftw3-ruby` when the system FFTW library is available.
|
|
7
|
+
|
|
8
|
+
## Highlights
|
|
9
|
+
|
|
10
|
+
- Ruby `>= 3.1`
|
|
11
|
+
- Single-frame extraction with `Sonus.extract`
|
|
12
|
+
- Cached extraction with `Sonus.extract_with_cache`
|
|
13
|
+
- Frame-by-frame WAV analysis with `Sonus::Analyzer`
|
|
14
|
+
- WAV decoding and inspection with `Sonus::WAV::Reader`
|
|
15
|
+
- Pure Ruby FFT fallback with optional FFTW acceleration
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
Install the gem:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
gem install sonus
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Or add it to your Gemfile:
|
|
26
|
+
|
|
27
|
+
```ruby
|
|
28
|
+
gem "sonus"
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### Optional FFTW acceleration
|
|
32
|
+
|
|
33
|
+
`fftw3-ruby` is installed as a Sonus dependency.
|
|
34
|
+
To enable the FFTW backend, install the native FFTW shared library on your system:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# macOS
|
|
38
|
+
brew install fftw
|
|
39
|
+
|
|
40
|
+
# Debian / Ubuntu
|
|
41
|
+
sudo apt-get install libfftw3-dev
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
You can inspect or force the backend at runtime:
|
|
45
|
+
|
|
46
|
+
```ruby
|
|
47
|
+
require "sonus"
|
|
48
|
+
|
|
49
|
+
Sonus::DSP::FFT.backend
|
|
50
|
+
# => :ruby or :fftw
|
|
51
|
+
|
|
52
|
+
Sonus::DSP::FFT.backend = :ruby
|
|
53
|
+
Sonus::DSP::FFT.backend = :fftw
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Setting `:fftw` raises `Sonus::FFTBackendError` when FFTW is unavailable.
|
|
57
|
+
|
|
58
|
+
## Quick Start
|
|
59
|
+
|
|
60
|
+
Runnable scripts are available in `examples/`:
|
|
61
|
+
|
|
62
|
+
- `examples/extract_basic.rb`
|
|
63
|
+
- `examples/extract_spectral_flux.rb`
|
|
64
|
+
- `examples/analyze_wav.rb`
|
|
65
|
+
- `examples/read_wav.rb`
|
|
66
|
+
|
|
67
|
+
### Single-frame extraction
|
|
68
|
+
|
|
69
|
+
```ruby
|
|
70
|
+
require "sonus"
|
|
71
|
+
|
|
72
|
+
sample_rate = 44_100
|
|
73
|
+
buffer_size = 512
|
|
74
|
+
|
|
75
|
+
signal = Array.new(buffer_size) do |index|
|
|
76
|
+
Math.sin((2.0 * Math::PI * 440.0 * index) / sample_rate)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
features = Sonus.extract(
|
|
80
|
+
signal,
|
|
81
|
+
%i[rms spectral_centroid mfcc],
|
|
82
|
+
buffer_size: buffer_size,
|
|
83
|
+
sample_rate: sample_rate
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
puts features[:rms]
|
|
87
|
+
puts features[:spectral_centroid]
|
|
88
|
+
puts features[:mfcc].length
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Frame-based WAV analysis
|
|
92
|
+
|
|
93
|
+
```ruby
|
|
94
|
+
require "sonus"
|
|
95
|
+
|
|
96
|
+
analyzer = Sonus::Analyzer.new(
|
|
97
|
+
buffer_size: 1024,
|
|
98
|
+
hop_size: 512,
|
|
99
|
+
sample_rate: 44_100,
|
|
100
|
+
features: %i[rms spectral_flux mfcc]
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
results = analyzer.analyze_file("audio.wav")
|
|
104
|
+
puts results.first[:rms]
|
|
105
|
+
|
|
106
|
+
analyzer.each_frame("audio.wav").first(3).each do |frame|
|
|
107
|
+
puts frame[:spectral_flux]
|
|
108
|
+
end
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### WAV reading
|
|
112
|
+
|
|
113
|
+
```ruby
|
|
114
|
+
require "sonus"
|
|
115
|
+
|
|
116
|
+
reader = Sonus::WAV::Reader.new("audio.wav")
|
|
117
|
+
|
|
118
|
+
puts reader.sample_rate
|
|
119
|
+
puts reader.channels
|
|
120
|
+
puts reader.bit_depth
|
|
121
|
+
puts reader.duration
|
|
122
|
+
|
|
123
|
+
samples = reader.frames
|
|
124
|
+
reader.close
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Public API
|
|
128
|
+
|
|
129
|
+
### `Sonus.extract`
|
|
130
|
+
|
|
131
|
+
Use `Sonus.extract(signal, features, **options)` for one analysis frame.
|
|
132
|
+
It returns a hash containing only the requested public features.
|
|
133
|
+
|
|
134
|
+
Important behavior:
|
|
135
|
+
|
|
136
|
+
- `signal` must be an `Array` or respond to `#to_a`
|
|
137
|
+
- signal values must be finite numeric values
|
|
138
|
+
- shorter input is zero-padded to `buffer_size`
|
|
139
|
+
- longer input is truncated to the first `buffer_size` samples
|
|
140
|
+
- feature dependencies are resolved automatically
|
|
141
|
+
- `features` can be a symbol, string, or array of them
|
|
142
|
+
|
|
143
|
+
For previous-frame features such as `:spectral_flux`, pass either `previous_signal` or `prev_amplitude_spectrum`:
|
|
144
|
+
|
|
145
|
+
```ruby
|
|
146
|
+
previous = Array.new(512, 0.0)
|
|
147
|
+
current = Array.new(512) { |i| Math.sin((2.0 * Math::PI * 440.0 * i) / 44_100.0) }
|
|
148
|
+
|
|
149
|
+
result = Sonus.extract(
|
|
150
|
+
current,
|
|
151
|
+
:spectral_flux,
|
|
152
|
+
buffer_size: 512,
|
|
153
|
+
sample_rate: 44_100,
|
|
154
|
+
previous_signal: previous
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
puts result[:spectral_flux]
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### `Sonus.extract_with_cache`
|
|
161
|
+
|
|
162
|
+
Use `Sonus.extract_with_cache` when you need both the requested result and intermediate feature values.
|
|
163
|
+
It returns `[result, cache]`, where `result` includes only requested public features and `cache` includes dependency outputs as well.
|
|
164
|
+
|
|
165
|
+
This is the API used internally by `Sonus::Analyzer`.
|
|
166
|
+
|
|
167
|
+
### `Sonus::Analyzer`
|
|
168
|
+
|
|
169
|
+
Use `Sonus::Analyzer` for repeated frame analysis with stable configuration.
|
|
170
|
+
|
|
171
|
+
```ruby
|
|
172
|
+
analyzer = Sonus::Analyzer.new(
|
|
173
|
+
buffer_size: 1024,
|
|
174
|
+
hop_size: 512,
|
|
175
|
+
sample_rate: 44_100,
|
|
176
|
+
features: %i[rms spectral_centroid]
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
analyzer.analyze(Array.new(1024, 1.0))
|
|
180
|
+
analyzer.analyze_file("audio.wav")
|
|
181
|
+
analyzer.each_frame("audio.wav")
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Important behavior:
|
|
185
|
+
|
|
186
|
+
- `features:` defaults to `[:rms]`
|
|
187
|
+
- `hop_size` defaults to `buffer_size`
|
|
188
|
+
- overlapping analysis is supported with `hop_size < buffer_size`
|
|
189
|
+
- `each_frame(path)` returns an enumerator when no block is given
|
|
190
|
+
- `sample_rate` must match the WAV file sample rate for `analyze_file` and `each_frame`
|
|
191
|
+
- previous-frame state is reset at the start of each `each_frame` run
|
|
192
|
+
|
|
193
|
+
### `Sonus::WAV::Reader`
|
|
194
|
+
|
|
195
|
+
`Sonus::WAV::Reader` accepts a filesystem path or an IO object that responds to `#read` and `#seek`.
|
|
196
|
+
|
|
197
|
+
Supported WAV input:
|
|
198
|
+
|
|
199
|
+
- PCM: 8-bit, 16-bit, 24-bit, 32-bit
|
|
200
|
+
- IEEE float: 32-bit, 64-bit
|
|
201
|
+
- mono and multi-channel files
|
|
202
|
+
|
|
203
|
+
Important behavior:
|
|
204
|
+
|
|
205
|
+
- `frames` returns normalized floats
|
|
206
|
+
- for multi-channel files, Sonus decodes the first channel of each frame
|
|
207
|
+
- `each_buffer(size)` returns an enumerator when no block is given
|
|
208
|
+
- `each_buffer(size)` zero-pads the final buffer
|
|
209
|
+
|
|
210
|
+
## Features
|
|
211
|
+
|
|
212
|
+
Inspect the public feature set at runtime:
|
|
213
|
+
|
|
214
|
+
```ruby
|
|
215
|
+
Sonus.available_features
|
|
216
|
+
Sonus.list_available_feature_extractors
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
Current public features:
|
|
220
|
+
|
|
221
|
+
### Time-domain and raw outputs
|
|
222
|
+
|
|
223
|
+
- `:buffer`
|
|
224
|
+
- `:rms`
|
|
225
|
+
- `:zcr`
|
|
226
|
+
- `:energy`
|
|
227
|
+
- `:complex_spectrum`
|
|
228
|
+
- `:amplitude_spectrum`
|
|
229
|
+
- `:power_spectrum`
|
|
230
|
+
|
|
231
|
+
### Spectral features
|
|
232
|
+
|
|
233
|
+
- `:spectral_centroid`
|
|
234
|
+
- `:spectral_flatness`
|
|
235
|
+
- `:spectral_flux`
|
|
236
|
+
- `:spectral_slope`
|
|
237
|
+
- `:spectral_rolloff`
|
|
238
|
+
- `:spectral_spread`
|
|
239
|
+
- `:spectral_skewness`
|
|
240
|
+
- `:spectral_kurtosis`
|
|
241
|
+
- `:spectral_crest`
|
|
242
|
+
- `:chroma`
|
|
243
|
+
- `:mel_bands`
|
|
244
|
+
- `:mfcc`
|
|
245
|
+
|
|
246
|
+
### Perceptual features
|
|
247
|
+
|
|
248
|
+
- `:loudness`
|
|
249
|
+
- `:perceptual_spread`
|
|
250
|
+
- `:perceptual_sharpness`
|
|
251
|
+
|
|
252
|
+
## Configuration
|
|
253
|
+
|
|
254
|
+
Default values:
|
|
255
|
+
|
|
256
|
+
- `buffer_size = 512`
|
|
257
|
+
- `sample_rate = 44_100`
|
|
258
|
+
- `windowing_function = :hanning`
|
|
259
|
+
- `number_of_mfcc_coefficients = 13`
|
|
260
|
+
- `number_of_mel_filters = 26`
|
|
261
|
+
- `number_of_chroma_bands = 12`
|
|
262
|
+
- `number_of_bark_bands = 24`
|
|
263
|
+
|
|
264
|
+
Supported windowing functions:
|
|
265
|
+
|
|
266
|
+
- `:rect`
|
|
267
|
+
- `:hann`
|
|
268
|
+
- `:hanning`
|
|
269
|
+
- `:hamming`
|
|
270
|
+
- `:blackman`
|
|
271
|
+
- `:sine`
|
|
272
|
+
|
|
273
|
+
Supported aliases:
|
|
274
|
+
|
|
275
|
+
- `mel_bands:` for `number_of_mel_filters:`
|
|
276
|
+
- `chroma_bands:` for `number_of_chroma_bands:`
|
|
277
|
+
|
|
278
|
+
## Errors
|
|
279
|
+
|
|
280
|
+
Sonus raises domain-specific errors for common failure cases:
|
|
281
|
+
|
|
282
|
+
- `Sonus::InvalidSignalError`
|
|
283
|
+
- `Sonus::InvalidFeatureError`
|
|
284
|
+
- `Sonus::WAVFormatError`
|
|
285
|
+
- `Sonus::FFTBackendError`
|
|
286
|
+
|
|
287
|
+
## Development
|
|
288
|
+
|
|
289
|
+
Install dependencies:
|
|
290
|
+
|
|
291
|
+
```bash
|
|
292
|
+
bundle install
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
Run tests:
|
|
296
|
+
|
|
297
|
+
```bash
|
|
298
|
+
bundle exec rspec
|
|
299
|
+
bundle exec rake
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
Run the benchmark:
|
|
303
|
+
|
|
304
|
+
```bash
|
|
305
|
+
bundle exec ruby benchmark/extract_all_features.rb
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
## Contributing
|
|
309
|
+
|
|
310
|
+
Pull requests and issue reports are welcome.
|
|
311
|
+
|
|
312
|
+
## License
|
|
313
|
+
|
|
314
|
+
MIT License.
|
data/Rakefile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "benchmark/ips"
|
|
5
|
+
require "sonus"
|
|
6
|
+
|
|
7
|
+
sample_rate = 44_100
|
|
8
|
+
buffer = Array.new(1024) { |i| Math.sin((2.0 * Math::PI * 440.0 * i) / sample_rate) }
|
|
9
|
+
features = Sonus.available_features
|
|
10
|
+
|
|
11
|
+
Benchmark.ips do |x|
|
|
12
|
+
x.report("extract_all_features") do
|
|
13
|
+
Sonus.extract(buffer, features, buffer_size: 1024, sample_rate: sample_rate)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
x.compare!
|
|
17
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "example_helper"
|
|
4
|
+
|
|
5
|
+
wav_path = ExampleHelper.mono_wav_path
|
|
6
|
+
|
|
7
|
+
analyzer = Sonus::Analyzer.new(
|
|
8
|
+
buffer_size: 1024,
|
|
9
|
+
hop_size: 512,
|
|
10
|
+
sample_rate: 44_100,
|
|
11
|
+
features: %i[rms spectral_centroid spectral_flux]
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
results = analyzer.analyze_file(wav_path)
|
|
15
|
+
first_frame = results.first
|
|
16
|
+
|
|
17
|
+
puts "analyze_file_frame_count: #{results.length}"
|
|
18
|
+
puts "first_frame_rms: #{format("%.6f", first_frame[:rms])}"
|
|
19
|
+
puts "first_frame_centroid_hz: #{format("%.2f", first_frame[:spectral_centroid])}"
|
|
20
|
+
puts "first_frame_flux: #{format("%.6f", first_frame[:spectral_flux])}"
|
|
21
|
+
|
|
22
|
+
puts "first_three_rms_from_each_frame:"
|
|
23
|
+
analyzer.each_frame(wav_path).first(3).each_with_index do |frame, index|
|
|
24
|
+
puts " frame_#{index}: #{format("%.6f", frame[:rms])}"
|
|
25
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "tempfile"
|
|
4
|
+
|
|
5
|
+
begin
|
|
6
|
+
require "bundler/setup"
|
|
7
|
+
rescue LoadError
|
|
8
|
+
nil
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
begin
|
|
12
|
+
require "sonus"
|
|
13
|
+
rescue LoadError
|
|
14
|
+
require_relative "../lib/sonus"
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
module ExampleHelper
|
|
18
|
+
module_function
|
|
19
|
+
|
|
20
|
+
TEMPFILES = []
|
|
21
|
+
|
|
22
|
+
def fixture_path(name)
|
|
23
|
+
File.expand_path("../spec/fixtures/#{name}", __dir__)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def mono_wav_path
|
|
27
|
+
wav_path("sine_440hz_16bit.wav", channels: 1)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def stereo_wav_path
|
|
31
|
+
wav_path("stereo_16bit.wav", channels: 2)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def wav_path(name, channels:)
|
|
35
|
+
path = fixture_path(name)
|
|
36
|
+
return path if File.exist?(path)
|
|
37
|
+
|
|
38
|
+
generate_wav(name, channels: channels)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def generate_wav(name, channels:, sample_rate: 44_100, frame_count: 44_100, frequency: 440.0)
|
|
42
|
+
tempfile = Tempfile.new([File.basename(name, ".wav"), ".wav"])
|
|
43
|
+
tempfile.binmode
|
|
44
|
+
|
|
45
|
+
data = Array.new(frame_count) do |index|
|
|
46
|
+
sample = Math.sin((2.0 * Math::PI * frequency * index) / sample_rate)
|
|
47
|
+
frame_bytes(sample, channels)
|
|
48
|
+
end.join
|
|
49
|
+
|
|
50
|
+
tempfile.write(wav_bytes(data, channels: channels, sample_rate: sample_rate))
|
|
51
|
+
tempfile.flush
|
|
52
|
+
TEMPFILES << tempfile
|
|
53
|
+
tempfile.path
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def frame_bytes(sample, channels)
|
|
57
|
+
left = pcm16_bytes(sample)
|
|
58
|
+
return left if channels == 1
|
|
59
|
+
|
|
60
|
+
right = pcm16_bytes(sample * 0.5)
|
|
61
|
+
left + right
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def pcm16_bytes(sample)
|
|
65
|
+
clipped = [[sample, -1.0].max, 1.0].min
|
|
66
|
+
[(clipped * 32_767).round].pack("s<")
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def wav_bytes(data, channels:, sample_rate:)
|
|
70
|
+
bits_per_sample = 16
|
|
71
|
+
block_align = channels * (bits_per_sample / 8)
|
|
72
|
+
byte_rate = sample_rate * block_align
|
|
73
|
+
riff_size = 36 + data.bytesize
|
|
74
|
+
|
|
75
|
+
+"RIFF" \
|
|
76
|
+
<< [riff_size].pack("V") \
|
|
77
|
+
<< "WAVE" \
|
|
78
|
+
<< "fmt " \
|
|
79
|
+
<< [16, 1, channels, sample_rate, byte_rate, block_align, bits_per_sample].pack("VvvVVvv") \
|
|
80
|
+
<< "data" \
|
|
81
|
+
<< [data.bytesize].pack("V") \
|
|
82
|
+
<< data
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "example_helper"
|
|
4
|
+
|
|
5
|
+
sample_rate = 44_100
|
|
6
|
+
buffer_size = 512
|
|
7
|
+
|
|
8
|
+
signal = Array.new(buffer_size) do |index|
|
|
9
|
+
Math.sin((2.0 * Math::PI * 440.0 * index) / sample_rate)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
features = Sonus.extract(
|
|
13
|
+
signal,
|
|
14
|
+
%i[rms spectral_centroid mfcc],
|
|
15
|
+
buffer_size: buffer_size,
|
|
16
|
+
sample_rate: sample_rate,
|
|
17
|
+
number_of_mfcc_coefficients: 13
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
puts "rms: #{format("%.6f", features[:rms])}"
|
|
21
|
+
puts "spectral_centroid_hz: #{format("%.2f", features[:spectral_centroid])}"
|
|
22
|
+
puts "mfcc_count: #{features[:mfcc].length}"
|
|
23
|
+
puts "mfcc_first_3: #{features[:mfcc].first(3).map { |value| format("%.4f", value) }.join(", ")}"
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "example_helper"
|
|
4
|
+
|
|
5
|
+
sample_rate = 44_100
|
|
6
|
+
buffer_size = 512
|
|
7
|
+
|
|
8
|
+
previous_signal = Array.new(buffer_size, 0.0)
|
|
9
|
+
current_signal = Array.new(buffer_size) do |index|
|
|
10
|
+
Math.sin((2.0 * Math::PI * 440.0 * index) / sample_rate)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
from_previous_signal = Sonus.extract(
|
|
14
|
+
current_signal,
|
|
15
|
+
:spectral_flux,
|
|
16
|
+
buffer_size: buffer_size,
|
|
17
|
+
sample_rate: sample_rate,
|
|
18
|
+
previous_signal: previous_signal
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
previous_spectrum = Sonus.extract(
|
|
22
|
+
previous_signal,
|
|
23
|
+
:amplitude_spectrum,
|
|
24
|
+
buffer_size: buffer_size,
|
|
25
|
+
sample_rate: sample_rate
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
from_previous_spectrum = Sonus.extract(
|
|
29
|
+
current_signal,
|
|
30
|
+
:spectral_flux,
|
|
31
|
+
buffer_size: buffer_size,
|
|
32
|
+
sample_rate: sample_rate,
|
|
33
|
+
prev_amplitude_spectrum: previous_spectrum[:amplitude_spectrum]
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
puts "spectral_flux_from_previous_signal: #{format("%.6f", from_previous_signal[:spectral_flux])}"
|
|
37
|
+
puts "spectral_flux_from_previous_spectrum: #{format("%.6f", from_previous_spectrum[:spectral_flux])}"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "example_helper"
|
|
4
|
+
|
|
5
|
+
wav_path = ExampleHelper.stereo_wav_path
|
|
6
|
+
reader = Sonus::WAV::Reader.new(wav_path)
|
|
7
|
+
|
|
8
|
+
begin
|
|
9
|
+
first_buffer = reader.each_buffer(2048).first
|
|
10
|
+
|
|
11
|
+
puts "sample_rate: #{reader.sample_rate}"
|
|
12
|
+
puts "channels: #{reader.channels}"
|
|
13
|
+
puts "bit_depth: #{reader.bit_depth}"
|
|
14
|
+
puts "duration_seconds: #{format("%.3f", reader.duration)}"
|
|
15
|
+
puts "decoded_frame_count: #{reader.frames.length}"
|
|
16
|
+
puts "first_buffer_first_5: #{first_buffer.first(5).map { |value| format("%.4f", value) }.join(", ")}"
|
|
17
|
+
ensure
|
|
18
|
+
reader.close
|
|
19
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
class Analyzer
|
|
5
|
+
attr_reader :buffer_size, :sample_rate, :features, :hop_size, :windowing_function
|
|
6
|
+
|
|
7
|
+
def initialize(
|
|
8
|
+
buffer_size: Sonus::DEFAULT_BUFFER_SIZE,
|
|
9
|
+
sample_rate: Sonus::DEFAULT_SAMPLE_RATE,
|
|
10
|
+
features: [:rms],
|
|
11
|
+
hop_size: nil,
|
|
12
|
+
windowing_function: Sonus::DEFAULT_WINDOWING_FUNCTION,
|
|
13
|
+
number_of_mfcc_coefficients: Sonus::DEFAULT_MFCC_COEFFICIENTS,
|
|
14
|
+
number_of_mel_filters: Sonus::DEFAULT_MEL_FILTERS,
|
|
15
|
+
mel_bands: nil,
|
|
16
|
+
number_of_chroma_bands: Sonus::DEFAULT_CHROMA_BANDS,
|
|
17
|
+
chroma_bands: nil,
|
|
18
|
+
number_of_bark_bands: Sonus::DEFAULT_BARK_BANDS
|
|
19
|
+
)
|
|
20
|
+
@buffer_size = Integer(buffer_size)
|
|
21
|
+
@sample_rate = Float(sample_rate)
|
|
22
|
+
@features = Array(features).map(&:to_sym)
|
|
23
|
+
@hop_size = hop_size ? Integer(hop_size) : @buffer_size
|
|
24
|
+
@windowing_function = windowing_function.to_sym
|
|
25
|
+
@number_of_mfcc_coefficients = Integer(number_of_mfcc_coefficients)
|
|
26
|
+
@number_of_mel_filters = Integer(mel_bands || number_of_mel_filters)
|
|
27
|
+
@number_of_chroma_bands = Integer(chroma_bands || number_of_chroma_bands)
|
|
28
|
+
@number_of_bark_bands = Integer(number_of_bark_bands)
|
|
29
|
+
@prev_amplitude_spectrum = nil
|
|
30
|
+
|
|
31
|
+
raise ArgumentError, "buffer_size must be positive" unless @buffer_size.positive?
|
|
32
|
+
raise ArgumentError, "hop_size must be positive" unless @hop_size.positive?
|
|
33
|
+
|
|
34
|
+
preload_mel_filter_bank
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def analyze(buffer)
|
|
38
|
+
result, cache = Sonus.extract_with_cache(
|
|
39
|
+
buffer,
|
|
40
|
+
@features,
|
|
41
|
+
buffer_size: @buffer_size,
|
|
42
|
+
sample_rate: @sample_rate,
|
|
43
|
+
windowing_function: @windowing_function,
|
|
44
|
+
number_of_mfcc_coefficients: @number_of_mfcc_coefficients,
|
|
45
|
+
number_of_mel_filters: @number_of_mel_filters,
|
|
46
|
+
number_of_chroma_bands: @number_of_chroma_bands,
|
|
47
|
+
number_of_bark_bands: @number_of_bark_bands,
|
|
48
|
+
prev_amplitude_spectrum: @prev_amplitude_spectrum
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
@prev_amplitude_spectrum = cache[:amplitude_spectrum] if cache.key?(:amplitude_spectrum)
|
|
52
|
+
result
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def analyze_file(path)
|
|
56
|
+
frames = []
|
|
57
|
+
each_frame(path) { |features| frames << features }
|
|
58
|
+
frames
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def each_frame(path)
|
|
62
|
+
return enum_for(:each_frame, path) unless block_given?
|
|
63
|
+
|
|
64
|
+
reader = Sonus::WAV::Reader.new(path)
|
|
65
|
+
begin
|
|
66
|
+
@prev_amplitude_spectrum = nil
|
|
67
|
+
validate_reader_sample_rate!(reader)
|
|
68
|
+
signal = reader.frames
|
|
69
|
+
each_signal_frame(signal) { |buffer| yield analyze(buffer) }
|
|
70
|
+
ensure
|
|
71
|
+
reader.close
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
nil
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
def each_signal_frame(signal)
|
|
80
|
+
total_samples = signal.length
|
|
81
|
+
frame_count = if total_samples <= @buffer_size
|
|
82
|
+
1
|
|
83
|
+
else
|
|
84
|
+
((total_samples - @buffer_size).fdiv(@hop_size).ceil) + 1
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
frame_count.times do |frame_index|
|
|
88
|
+
start_index = frame_index * @hop_size
|
|
89
|
+
frame = signal.slice(start_index, @buffer_size) || []
|
|
90
|
+
frame += Array.new(@buffer_size - frame.length, 0.0) if frame.length < @buffer_size
|
|
91
|
+
yield frame
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def validate_reader_sample_rate!(reader)
|
|
96
|
+
return if reader.sample_rate.to_f == @sample_rate
|
|
97
|
+
|
|
98
|
+
raise ArgumentError, "Analyzer sample_rate (#{@sample_rate}) does not match WAV sample_rate (#{reader.sample_rate})"
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def preload_mel_filter_bank
|
|
102
|
+
Sonus::DSP::MelFilterBank.create(
|
|
103
|
+
num_filters: @number_of_mel_filters,
|
|
104
|
+
fft_size: @buffer_size,
|
|
105
|
+
sample_rate: @sample_rate
|
|
106
|
+
)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|