sonus 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +314 -0
- data/Rakefile +8 -0
- data/benchmark/extract_all_features.rb +17 -0
- data/examples/analyze_wav.rb +25 -0
- data/examples/example_helper.rb +84 -0
- data/examples/extract_basic.rb +23 -0
- data/examples/extract_spectral_flux.rb +37 -0
- data/examples/read_wav.rb +19 -0
- data/lib/sonus/analyzer.rb +109 -0
- data/lib/sonus/dsp/fft.rb +55 -0
- data/lib/sonus/dsp/fft_backend/fftw.rb +103 -0
- data/lib/sonus/dsp/fft_backend/ruby.rb +80 -0
- data/lib/sonus/dsp/mel_filter_bank.rb +98 -0
- data/lib/sonus/dsp/windowing.rb +47 -0
- data/lib/sonus/error.rb +9 -0
- data/lib/sonus/features/amplitude_spectrum.rb +14 -0
- data/lib/sonus/features/base.rb +27 -0
- data/lib/sonus/features/buffer.rb +13 -0
- data/lib/sonus/features/chroma.rb +35 -0
- data/lib/sonus/features/complex_spectrum.rb +13 -0
- data/lib/sonus/features/energy.rb +13 -0
- data/lib/sonus/features/loudness.rb +57 -0
- data/lib/sonus/features/mel_bands.rb +27 -0
- data/lib/sonus/features/mfcc.rb +30 -0
- data/lib/sonus/features/perceptual_sharpness.rb +26 -0
- data/lib/sonus/features/perceptual_spread.rb +18 -0
- data/lib/sonus/features/power_spectrum.rb +13 -0
- data/lib/sonus/features/rms.rb +16 -0
- data/lib/sonus/features/spectral_centroid.rb +30 -0
- data/lib/sonus/features/spectral_crest.rb +19 -0
- data/lib/sonus/features/spectral_flatness.rb +22 -0
- data/lib/sonus/features/spectral_flux.rb +24 -0
- data/lib/sonus/features/spectral_kurtosis.rb +34 -0
- data/lib/sonus/features/spectral_rolloff.rb +26 -0
- data/lib/sonus/features/spectral_skewness.rb +34 -0
- data/lib/sonus/features/spectral_slope.rb +25 -0
- data/lib/sonus/features/spectral_spread.rb +32 -0
- data/lib/sonus/features/windowed_signal.rb +16 -0
- data/lib/sonus/features/zcr.rb +17 -0
- data/lib/sonus/features.rb +109 -0
- data/lib/sonus/version.rb +5 -0
- data/lib/sonus/wav/format.rb +15 -0
- data/lib/sonus/wav/reader.rb +224 -0
- data/lib/sonus.rb +204 -0
- metadata +160 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class RMS < Base
|
|
6
|
+
feature :rms, dependencies: [:buffer]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
buffer = context[:cache].fetch(:buffer)
|
|
10
|
+
return 0.0 if buffer.empty?
|
|
11
|
+
|
|
12
|
+
Math.sqrt(buffer.sum { |sample| sample * sample } / buffer.length.to_f)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class SpectralCentroid < Base
|
|
6
|
+
feature :spectral_centroid, dependencies: [:amplitude_spectrum]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
amplitudes = context[:cache].fetch(:amplitude_spectrum)
|
|
10
|
+
total_amplitude = amplitudes.sum
|
|
11
|
+
return 0.0 if total_amplitude.zero?
|
|
12
|
+
|
|
13
|
+
frequency_step = frequency_step_for(amplitudes.length, context.fetch(:sample_rate))
|
|
14
|
+
weighted_sum = amplitudes.each_with_index.sum do |amplitude, index|
|
|
15
|
+
amplitude * index * frequency_step
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
weighted_sum / total_amplitude
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def self.frequency_step_for(length, sample_rate)
|
|
22
|
+
return 0.0 if length <= 1
|
|
23
|
+
|
|
24
|
+
fft_size = (length - 1) * 2
|
|
25
|
+
sample_rate.to_f / fft_size
|
|
26
|
+
end
|
|
27
|
+
private_class_method :frequency_step_for
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class SpectralCrest < Base
|
|
6
|
+
feature :spectral_crest, dependencies: [:amplitude_spectrum]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
amplitudes = context[:cache].fetch(:amplitude_spectrum)
|
|
10
|
+
return 0.0 if amplitudes.empty?
|
|
11
|
+
|
|
12
|
+
rms = Math.sqrt(amplitudes.sum { |value| value * value } / amplitudes.length.to_f)
|
|
13
|
+
return 0.0 if rms.zero?
|
|
14
|
+
|
|
15
|
+
amplitudes.max / rms
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class SpectralFlatness < Base
|
|
6
|
+
EPSILON = 1e-12
|
|
7
|
+
|
|
8
|
+
feature :spectral_flatness, dependencies: [:amplitude_spectrum]
|
|
9
|
+
|
|
10
|
+
def self.extract(context)
|
|
11
|
+
amplitudes = context[:cache].fetch(:amplitude_spectrum)
|
|
12
|
+
return 0.0 if amplitudes.empty?
|
|
13
|
+
|
|
14
|
+
arithmetic_mean = amplitudes.sum / amplitudes.length.to_f
|
|
15
|
+
return 0.0 if arithmetic_mean.zero?
|
|
16
|
+
|
|
17
|
+
geometric_mean = Math.exp(amplitudes.sum { |value| Math.log(value + EPSILON) } / amplitudes.length)
|
|
18
|
+
geometric_mean / arithmetic_mean
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class SpectralFlux < Base
|
|
6
|
+
feature :spectral_flux, dependencies: [:amplitude_spectrum]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
current = context[:cache].fetch(:amplitude_spectrum)
|
|
10
|
+
previous = context[:prev_amplitude_spectrum]
|
|
11
|
+
return 0.0 unless previous
|
|
12
|
+
|
|
13
|
+
length = [current.length, previous.length].max
|
|
14
|
+
|
|
15
|
+
length.times.sum do |index|
|
|
16
|
+
current_value = current[index] || 0.0
|
|
17
|
+
previous_value = previous[index] || 0.0
|
|
18
|
+
delta = current_value - previous_value
|
|
19
|
+
delta * delta
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class SpectralKurtosis < Base
|
|
6
|
+
feature :spectral_kurtosis, dependencies: [:amplitude_spectrum, :spectral_centroid, :spectral_spread]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
amplitudes = context[:cache].fetch(:amplitude_spectrum)
|
|
10
|
+
centroid = context[:cache].fetch(:spectral_centroid)
|
|
11
|
+
spread = context[:cache].fetch(:spectral_spread)
|
|
12
|
+
total_amplitude = amplitudes.sum
|
|
13
|
+
return 0.0 if total_amplitude.zero? || spread.zero?
|
|
14
|
+
|
|
15
|
+
frequency_step = frequency_step_for(amplitudes.length, context.fetch(:sample_rate))
|
|
16
|
+
|
|
17
|
+
numerator = amplitudes.each_with_index.sum do |amplitude, index|
|
|
18
|
+
frequency = index * frequency_step
|
|
19
|
+
amplitude * ((frequency - centroid)**4)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
numerator / (total_amplitude * (spread**4))
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def self.frequency_step_for(length, sample_rate)
|
|
26
|
+
return 0.0 if length <= 1
|
|
27
|
+
|
|
28
|
+
fft_size = (length - 1) * 2
|
|
29
|
+
sample_rate.to_f / fft_size
|
|
30
|
+
end
|
|
31
|
+
private_class_method :frequency_step_for
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class SpectralRolloff < Base
|
|
6
|
+
feature :spectral_rolloff, dependencies: [:amplitude_spectrum]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
amplitudes = context[:cache].fetch(:amplitude_spectrum)
|
|
10
|
+
total_amplitude = amplitudes.sum
|
|
11
|
+
return 0.0 if total_amplitude.zero?
|
|
12
|
+
|
|
13
|
+
threshold = total_amplitude * 0.99
|
|
14
|
+
cumulative = 0.0
|
|
15
|
+
|
|
16
|
+
rolloff_index = amplitudes.each_with_index.find do |amplitude, _index|
|
|
17
|
+
cumulative += amplitude
|
|
18
|
+
cumulative >= threshold
|
|
19
|
+
end&.last || 0
|
|
20
|
+
|
|
21
|
+
fft_size = [(amplitudes.length - 1) * 2, 1].max
|
|
22
|
+
(rolloff_index * context.fetch(:sample_rate).to_f) / fft_size
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class SpectralSkewness < Base
|
|
6
|
+
feature :spectral_skewness, dependencies: [:amplitude_spectrum, :spectral_centroid, :spectral_spread]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
amplitudes = context[:cache].fetch(:amplitude_spectrum)
|
|
10
|
+
centroid = context[:cache].fetch(:spectral_centroid)
|
|
11
|
+
spread = context[:cache].fetch(:spectral_spread)
|
|
12
|
+
total_amplitude = amplitudes.sum
|
|
13
|
+
return 0.0 if total_amplitude.zero? || spread.zero?
|
|
14
|
+
|
|
15
|
+
frequency_step = frequency_step_for(amplitudes.length, context.fetch(:sample_rate))
|
|
16
|
+
|
|
17
|
+
numerator = amplitudes.each_with_index.sum do |amplitude, index|
|
|
18
|
+
frequency = index * frequency_step
|
|
19
|
+
amplitude * ((frequency - centroid)**3)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
numerator / (total_amplitude * (spread**3))
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def self.frequency_step_for(length, sample_rate)
|
|
26
|
+
return 0.0 if length <= 1
|
|
27
|
+
|
|
28
|
+
fft_size = (length - 1) * 2
|
|
29
|
+
sample_rate.to_f / fft_size
|
|
30
|
+
end
|
|
31
|
+
private_class_method :frequency_step_for
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class SpectralSlope < Base
|
|
6
|
+
feature :spectral_slope, dependencies: [:amplitude_spectrum]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
amplitudes = context[:cache].fetch(:amplitude_spectrum)
|
|
10
|
+
sample_count = amplitudes.length
|
|
11
|
+
return 0.0 if sample_count <= 1
|
|
12
|
+
|
|
13
|
+
sum_x = (0...sample_count).sum
|
|
14
|
+
sum_y = amplitudes.sum
|
|
15
|
+
sum_xy = amplitudes.each_with_index.sum { |value, index| index * value }
|
|
16
|
+
sum_x2 = (0...sample_count).sum { |index| index * index }
|
|
17
|
+
|
|
18
|
+
denominator = (sample_count * sum_x2) - (sum_x * sum_x)
|
|
19
|
+
return 0.0 if denominator.zero?
|
|
20
|
+
|
|
21
|
+
((sample_count * sum_xy) - (sum_x * sum_y)) / denominator.to_f
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class SpectralSpread < Base
|
|
6
|
+
feature :spectral_spread, dependencies: [:amplitude_spectrum, :spectral_centroid]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
amplitudes = context[:cache].fetch(:amplitude_spectrum)
|
|
10
|
+
centroid = context[:cache].fetch(:spectral_centroid)
|
|
11
|
+
total_amplitude = amplitudes.sum
|
|
12
|
+
return 0.0 if total_amplitude.zero?
|
|
13
|
+
|
|
14
|
+
frequency_step = frequency_step_for(amplitudes.length, context.fetch(:sample_rate))
|
|
15
|
+
weighted_variance = amplitudes.each_with_index.sum do |amplitude, index|
|
|
16
|
+
frequency = index * frequency_step
|
|
17
|
+
amplitude * ((frequency - centroid)**2)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
Math.sqrt(weighted_variance / total_amplitude)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def self.frequency_step_for(length, sample_rate)
|
|
24
|
+
return 0.0 if length <= 1
|
|
25
|
+
|
|
26
|
+
fft_size = (length - 1) * 2
|
|
27
|
+
sample_rate.to_f / fft_size
|
|
28
|
+
end
|
|
29
|
+
private_class_method :frequency_step_for
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class WindowedSignal < Base
|
|
6
|
+
feature :windowed_signal, dependencies: [:buffer], public_feature: false
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
Sonus::DSP::Windowing.apply(
|
|
10
|
+
context[:cache].fetch(:buffer),
|
|
11
|
+
context.fetch(:windowing_function)
|
|
12
|
+
)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
class ZCR < Base
|
|
6
|
+
feature :zcr, dependencies: [:buffer]
|
|
7
|
+
|
|
8
|
+
def self.extract(context)
|
|
9
|
+
buffer = context[:cache].fetch(:buffer)
|
|
10
|
+
|
|
11
|
+
buffer.each_cons(2).count do |left, right|
|
|
12
|
+
(left >= 0.0 && right < 0.0) || (left < 0.0 && right >= 0.0)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module Features
|
|
5
|
+
@registry = {}
|
|
6
|
+
|
|
7
|
+
class << self
|
|
8
|
+
attr_reader :registry
|
|
9
|
+
|
|
10
|
+
def register(klass)
|
|
11
|
+
key = klass.feature_key
|
|
12
|
+
return if key.nil?
|
|
13
|
+
|
|
14
|
+
@registry[key] = klass
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def public_feature_keys
|
|
18
|
+
@registry.each_with_object([]) do |(key, klass), features|
|
|
19
|
+
features << key if klass.public_feature?
|
|
20
|
+
end.sort
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def compute(feature_keys, context, return_cache: false)
|
|
24
|
+
requested = normalize_feature_keys(feature_keys)
|
|
25
|
+
order = resolve_order(requested)
|
|
26
|
+
|
|
27
|
+
cache = {}
|
|
28
|
+
order.each do |key|
|
|
29
|
+
cache[key] = @registry.fetch(key).extract(context.merge(cache: cache))
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
result = requested.each_with_object({}) { |key, hash| hash[key] = cache[key] }
|
|
33
|
+
return [result, cache] if return_cache
|
|
34
|
+
|
|
35
|
+
result
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def resolve_order(feature_keys)
|
|
39
|
+
state = {}
|
|
40
|
+
order = []
|
|
41
|
+
|
|
42
|
+
feature_keys.each do |key|
|
|
43
|
+
dfs_visit(key, state, order)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
order
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def normalize_feature_keys(feature_keys)
|
|
52
|
+
feature_keys.map do |feature_key|
|
|
53
|
+
normalized = feature_key.to_sym
|
|
54
|
+
next normalized if @registry.key?(normalized)
|
|
55
|
+
|
|
56
|
+
raise Sonus::InvalidFeatureError, "Unsupported feature: #{feature_key}"
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def dfs_visit(feature_key, state, order)
|
|
61
|
+
case state[feature_key]
|
|
62
|
+
when :done
|
|
63
|
+
return
|
|
64
|
+
when :visiting
|
|
65
|
+
raise Sonus::InvalidFeatureError, "Circular dependency detected at #{feature_key}"
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
state[feature_key] = :visiting
|
|
69
|
+
|
|
70
|
+
klass = @registry[feature_key]
|
|
71
|
+
unless klass
|
|
72
|
+
raise Sonus::InvalidFeatureError, "Unsupported feature: #{feature_key}"
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
klass.dependencies.each do |dependency|
|
|
76
|
+
dfs_visit(dependency, state, order)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
state[feature_key] = :done
|
|
80
|
+
order << feature_key
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
require_relative "features/base"
|
|
87
|
+
require_relative "features/buffer"
|
|
88
|
+
require_relative "features/windowed_signal"
|
|
89
|
+
require_relative "features/complex_spectrum"
|
|
90
|
+
require_relative "features/rms"
|
|
91
|
+
require_relative "features/zcr"
|
|
92
|
+
require_relative "features/energy"
|
|
93
|
+
require_relative "features/amplitude_spectrum"
|
|
94
|
+
require_relative "features/power_spectrum"
|
|
95
|
+
require_relative "features/spectral_centroid"
|
|
96
|
+
require_relative "features/spectral_flatness"
|
|
97
|
+
require_relative "features/spectral_flux"
|
|
98
|
+
require_relative "features/spectral_slope"
|
|
99
|
+
require_relative "features/spectral_rolloff"
|
|
100
|
+
require_relative "features/spectral_spread"
|
|
101
|
+
require_relative "features/spectral_skewness"
|
|
102
|
+
require_relative "features/spectral_kurtosis"
|
|
103
|
+
require_relative "features/spectral_crest"
|
|
104
|
+
require_relative "features/chroma"
|
|
105
|
+
require_relative "features/loudness"
|
|
106
|
+
require_relative "features/perceptual_spread"
|
|
107
|
+
require_relative "features/perceptual_sharpness"
|
|
108
|
+
require_relative "features/mfcc"
|
|
109
|
+
require_relative "features/mel_bands"
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sonus
|
|
4
|
+
module WAV
|
|
5
|
+
class Reader
|
|
6
|
+
attr_reader :format, :data_size
|
|
7
|
+
|
|
8
|
+
def initialize(path_or_io)
|
|
9
|
+
@io, @close_io = io_for(path_or_io)
|
|
10
|
+
@format, @data_offset, @data_size = parse_header
|
|
11
|
+
@frames = nil
|
|
12
|
+
rescue StandardError
|
|
13
|
+
close
|
|
14
|
+
raise
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def sample_rate
|
|
18
|
+
@format.sample_rate
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def channels
|
|
22
|
+
@format.channels
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def bit_depth
|
|
26
|
+
@format.bit_depth
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def duration
|
|
30
|
+
return 0.0 if sample_rate.zero?
|
|
31
|
+
|
|
32
|
+
frames.length / sample_rate.to_f
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def frames
|
|
36
|
+
return @frames if @frames
|
|
37
|
+
|
|
38
|
+
@io.seek(@data_offset, IO::SEEK_SET)
|
|
39
|
+
data = @io.read(@data_size)
|
|
40
|
+
@frames = decode_frames(data)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def each_buffer(size)
|
|
44
|
+
raise ArgumentError, "size must be positive" unless size.to_i.positive?
|
|
45
|
+
|
|
46
|
+
return enum_for(:each_buffer, size) unless block_given?
|
|
47
|
+
|
|
48
|
+
current = 0
|
|
49
|
+
data = frames
|
|
50
|
+
|
|
51
|
+
while current < data.length
|
|
52
|
+
buffer = data.slice(current, size)
|
|
53
|
+
if buffer.length < size
|
|
54
|
+
buffer = buffer + Array.new(size - buffer.length, 0.0)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
yield buffer
|
|
58
|
+
current += size
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def close
|
|
63
|
+
return unless @close_io
|
|
64
|
+
|
|
65
|
+
@io.close unless @io.closed?
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def io_for(path_or_io)
|
|
71
|
+
if path_or_io.respond_to?(:read) && path_or_io.respond_to?(:seek)
|
|
72
|
+
[path_or_io, false]
|
|
73
|
+
else
|
|
74
|
+
[File.open(path_or_io, "rb"), true]
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def parse_header
|
|
79
|
+
@io.seek(0, IO::SEEK_SET)
|
|
80
|
+
|
|
81
|
+
riff = @io.read(4)
|
|
82
|
+
_riff_size = read_uint32
|
|
83
|
+
wave = @io.read(4)
|
|
84
|
+
|
|
85
|
+
unless riff == "RIFF" && wave == "WAVE"
|
|
86
|
+
raise Sonus::WAVFormatError, "Invalid WAV header"
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
format = nil
|
|
90
|
+
data_offset = nil
|
|
91
|
+
data_size = nil
|
|
92
|
+
|
|
93
|
+
until @io.eof?
|
|
94
|
+
chunk_id = @io.read(4)
|
|
95
|
+
break unless chunk_id && chunk_id.length == 4
|
|
96
|
+
|
|
97
|
+
chunk_size = read_uint32
|
|
98
|
+
chunk_start = @io.pos
|
|
99
|
+
|
|
100
|
+
case chunk_id
|
|
101
|
+
when "fmt "
|
|
102
|
+
format = parse_fmt_chunk(chunk_size)
|
|
103
|
+
when "data"
|
|
104
|
+
data_offset = @io.pos
|
|
105
|
+
data_size = chunk_size
|
|
106
|
+
else
|
|
107
|
+
@io.seek(chunk_size, IO::SEEK_CUR)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
@io.seek(chunk_start + chunk_size + (chunk_size % 2), IO::SEEK_SET)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
unless format
|
|
114
|
+
raise Sonus::WAVFormatError, "Missing fmt chunk"
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
unless data_offset && data_size
|
|
118
|
+
raise Sonus::WAVFormatError, "Missing data chunk"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
[format, data_offset, data_size]
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def parse_fmt_chunk(chunk_size)
|
|
125
|
+
raw = @io.read(chunk_size)
|
|
126
|
+
if raw.nil? || raw.length < 16
|
|
127
|
+
raise Sonus::WAVFormatError, "fmt chunk is too short"
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
audio_format, channels, sample_rate, byte_rate, block_align, bit_depth = raw.unpack("v v V V v v")
|
|
131
|
+
|
|
132
|
+
unless [1, 3].include?(audio_format)
|
|
133
|
+
raise Sonus::WAVFormatError, "Unsupported WAV audio format: #{audio_format}"
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
validate_bit_depth!(audio_format, bit_depth)
|
|
137
|
+
|
|
138
|
+
Sonus::WAV::Format.new(
|
|
139
|
+
audio_format: audio_format,
|
|
140
|
+
channels: channels,
|
|
141
|
+
sample_rate: sample_rate,
|
|
142
|
+
byte_rate: byte_rate,
|
|
143
|
+
block_align: block_align,
|
|
144
|
+
bit_depth: bit_depth
|
|
145
|
+
)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def validate_bit_depth!(audio_format, bit_depth)
|
|
149
|
+
supported = if audio_format == 1
|
|
150
|
+
[8, 16, 24, 32]
|
|
151
|
+
else
|
|
152
|
+
[32, 64]
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
return if supported.include?(bit_depth)
|
|
156
|
+
|
|
157
|
+
raise Sonus::WAVFormatError, "Unsupported bit depth #{bit_depth} for audio format #{audio_format}"
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def decode_frames(data)
|
|
161
|
+
bytes_per_sample = @format.bit_depth / 8
|
|
162
|
+
frame_size = bytes_per_sample * @format.channels
|
|
163
|
+
frame_count = data.length / frame_size
|
|
164
|
+
|
|
165
|
+
Array.new(frame_count) do |index|
|
|
166
|
+
sample_offset = index * frame_size
|
|
167
|
+
sample_bytes = data.byteslice(sample_offset, bytes_per_sample)
|
|
168
|
+
decode_sample(sample_bytes)
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def decode_sample(bytes)
|
|
173
|
+
case @format.audio_format
|
|
174
|
+
when 1
|
|
175
|
+
decode_pcm_sample(bytes)
|
|
176
|
+
when 3
|
|
177
|
+
decode_float_sample(bytes)
|
|
178
|
+
else
|
|
179
|
+
raise Sonus::WAVFormatError, "Unsupported audio format #{@format.audio_format}"
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def decode_pcm_sample(bytes)
|
|
184
|
+
case @format.bit_depth
|
|
185
|
+
when 8
|
|
186
|
+
(bytes.unpack1("C") - 128) / 128.0
|
|
187
|
+
when 16
|
|
188
|
+
bytes.unpack1("s<") / 32_768.0
|
|
189
|
+
when 24
|
|
190
|
+
decode_int24(bytes) / 8_388_608.0
|
|
191
|
+
when 32
|
|
192
|
+
bytes.unpack1("l<") / 2_147_483_648.0
|
|
193
|
+
else
|
|
194
|
+
raise Sonus::WAVFormatError, "Unsupported PCM bit depth #{@format.bit_depth}"
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def decode_float_sample(bytes)
|
|
199
|
+
case @format.bit_depth
|
|
200
|
+
when 32
|
|
201
|
+
bytes.unpack1("e")
|
|
202
|
+
when 64
|
|
203
|
+
bytes.unpack1("E")
|
|
204
|
+
else
|
|
205
|
+
raise Sonus::WAVFormatError, "Unsupported float bit depth #{@format.bit_depth}"
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def decode_int24(bytes)
|
|
210
|
+
byte0, byte1, byte2 = bytes.unpack("C3")
|
|
211
|
+
value = byte0 | (byte1 << 8) | (byte2 << 16)
|
|
212
|
+
value -= 1 << 24 if (value & (1 << 23)) != 0
|
|
213
|
+
value
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def read_uint32
|
|
217
|
+
bytes = @io.read(4)
|
|
218
|
+
raise Sonus::WAVFormatError, "Unexpected EOF while reading chunk size" unless bytes && bytes.length == 4
|
|
219
|
+
|
|
220
|
+
bytes.unpack1("V")
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
end
|