torchaudio 0.1.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/LICENSE.txt +1 -1
- data/README.md +18 -8
- data/ext/torchaudio/ext.cpp +18 -60
- data/ext/torchaudio/extconf.rb +2 -14
- data/lib/torchaudio/datasets/utils.rb +4 -2
- data/lib/torchaudio/functional.rb +38 -9
- data/lib/torchaudio/transforms/compute_deltas.rb +15 -0
- data/lib/torchaudio/transforms/fade.rb +74 -0
- data/lib/torchaudio/transforms/mel_spectrogram.rb +2 -0
- data/lib/torchaudio/transforms/mfcc.rb +43 -0
- data/lib/torchaudio/transforms/spectrogram.rb +9 -2
- data/lib/torchaudio/transforms/vol.rb +31 -0
- data/lib/torchaudio/version.rb +1 -1
- data/lib/torchaudio.rb +5 -1
- metadata +13 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f35bd53e4a972f24d5d0a0bfbe8ee45a76a203a535a96043608fa768fee71bf4
|
4
|
+
data.tar.gz: 2a6061fa3aa68c13352ec7c12a79faf85a332163f92b976614fb2f59a1c53d68
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3f3566e6d1cc9bf1467c39350a4c317c2e192d75851c96d725f4e128be42b9fe9ab9be9c3b6ff33a4ab23f38d7d63b820f001f5b9e4af888f8e8b9b98fe795a5
|
7
|
+
data.tar.gz: bde46760427829d4c9955eba0da00f900425e44cb1fe93c6788691c964949d30d9a47ee3e896df226327294018ea4d4149acc25102e85598394644d5f0e3b568
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,18 @@
|
|
1
|
+
## 0.3.0 (2022-07-06)
|
2
|
+
|
3
|
+
- Added `center`, `pad_mode`, and `onesided` options to `Spectogram` transform
|
4
|
+
- Dropped support for Ruby < 2.7
|
5
|
+
|
6
|
+
## 0.2.1 (2021-07-16)
|
7
|
+
|
8
|
+
- Added `create_dct` method
|
9
|
+
- Added `ComputeDeltas`, `Fade`, `MFCC`, and `Vol` transforms
|
10
|
+
|
11
|
+
## 0.2.0 (2021-05-23)
|
12
|
+
|
13
|
+
- Updated to Rice 4
|
14
|
+
- Dropped support for Ruby < 2.6
|
15
|
+
|
1
16
|
## 0.1.2 (2021-02-06)
|
2
17
|
|
3
18
|
- Added `amplitude_to_DB` and `DB_to_amplitude` methods
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
# TorchAudio
|
1
|
+
# TorchAudio Ruby
|
2
2
|
|
3
3
|
:fire: An audio library for Torch.rb
|
4
4
|
|
5
|
-
[](https://github.com/ankane/torchaudio/actions)
|
5
|
+
[](https://github.com/ankane/torchaudio-ruby/actions)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -15,7 +15,7 @@ brew install sox
|
|
15
15
|
Add this line to your application’s Gemfile:
|
16
16
|
|
17
17
|
```ruby
|
18
|
-
gem
|
18
|
+
gem "torchaudio"
|
19
19
|
```
|
20
20
|
|
21
21
|
## Getting Started
|
@@ -51,10 +51,16 @@ TorchAudio::Transforms::Spectrogram.new.call(waveform)
|
|
51
51
|
|
52
52
|
Supported transforms are:
|
53
53
|
|
54
|
+
- AmplitudeToDB
|
55
|
+
- ComputeDeltas
|
56
|
+
- Fade
|
57
|
+
- MelScale
|
54
58
|
- MelSpectrogram
|
59
|
+
- MFCC
|
55
60
|
- MuLawDecoding
|
56
61
|
- MuLawEncoding
|
57
62
|
- Spectrogram
|
63
|
+
- Vol
|
58
64
|
|
59
65
|
## Functional
|
60
66
|
|
@@ -64,7 +70,11 @@ TorchAudio::Functional.lowpass_biquad(waveform, sample_rate, cutoff_freq)
|
|
64
70
|
|
65
71
|
Supported functions are:
|
66
72
|
|
73
|
+
- amplitude_to_DB
|
67
74
|
- compute_deltas
|
75
|
+
- create_dct
|
76
|
+
- create_fb_matrix
|
77
|
+
- DB_to_amplitude
|
68
78
|
- dither
|
69
79
|
- gain
|
70
80
|
- highpass_biquad
|
@@ -124,22 +134,22 @@ addons:
|
|
124
134
|
|
125
135
|
## History
|
126
136
|
|
127
|
-
View the [changelog](https://github.com/ankane/torchaudio/blob/master/CHANGELOG.md)
|
137
|
+
View the [changelog](https://github.com/ankane/torchaudio-ruby/blob/master/CHANGELOG.md)
|
128
138
|
|
129
139
|
## Contributing
|
130
140
|
|
131
141
|
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
132
142
|
|
133
|
-
- [Report bugs](https://github.com/ankane/torchaudio/issues)
|
134
|
-
- Fix bugs and [submit pull requests](https://github.com/ankane/torchaudio/pulls)
|
143
|
+
- [Report bugs](https://github.com/ankane/torchaudio-ruby/issues)
|
144
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/torchaudio-ruby/pulls)
|
135
145
|
- Write, clarify, or fix documentation
|
136
146
|
- Suggest or add new features
|
137
147
|
|
138
148
|
To get started with development:
|
139
149
|
|
140
150
|
```sh
|
141
|
-
git clone https://github.com/ankane/torchaudio.git
|
142
|
-
cd torchaudio
|
151
|
+
git clone https://github.com/ankane/torchaudio-ruby.git
|
152
|
+
cd torchaudio-ruby
|
143
153
|
bundle install
|
144
154
|
bundle exec rake compile
|
145
155
|
bundle exec rake test
|
data/ext/torchaudio/ext.cpp
CHANGED
@@ -1,75 +1,33 @@
|
|
1
1
|
#include <torchaudio/csrc/sox.h>
|
2
2
|
|
3
|
-
#include <rice/
|
4
|
-
#include <rice/
|
5
|
-
|
6
|
-
using namespace Rice;
|
7
|
-
|
8
|
-
class SignalInfo {
|
9
|
-
sox_signalinfo_t* value = nullptr;
|
10
|
-
public:
|
11
|
-
SignalInfo(Object o) {
|
12
|
-
if (!o.is_nil()) {
|
13
|
-
value = from_ruby<sox_signalinfo_t*>(o);
|
14
|
-
}
|
15
|
-
}
|
16
|
-
operator sox_signalinfo_t*() {
|
17
|
-
return value;
|
18
|
-
}
|
19
|
-
};
|
20
|
-
|
21
|
-
template<>
|
22
|
-
inline
|
23
|
-
SignalInfo from_ruby<SignalInfo>(Object x)
|
24
|
-
{
|
25
|
-
return SignalInfo(x);
|
26
|
-
}
|
27
|
-
|
28
|
-
class EncodingInfo {
|
29
|
-
sox_encodinginfo_t* value = nullptr;
|
30
|
-
public:
|
31
|
-
EncodingInfo(Object o) {
|
32
|
-
if (!o.is_nil()) {
|
33
|
-
value = from_ruby<sox_encodinginfo_t*>(o);
|
34
|
-
}
|
35
|
-
}
|
36
|
-
operator sox_encodinginfo_t*() {
|
37
|
-
return value;
|
38
|
-
}
|
39
|
-
};
|
40
|
-
|
41
|
-
template<>
|
42
|
-
inline
|
43
|
-
EncodingInfo from_ruby<EncodingInfo>(Object x)
|
44
|
-
{
|
45
|
-
return EncodingInfo(x);
|
46
|
-
}
|
3
|
+
#include <rice/rice.hpp>
|
4
|
+
#include <rice/stl.hpp>
|
47
5
|
|
48
6
|
extern "C"
|
49
7
|
void Init_ext()
|
50
8
|
{
|
51
|
-
|
9
|
+
auto rb_mTorchAudio = Rice::define_module("TorchAudio");
|
52
10
|
|
53
|
-
|
54
|
-
.
|
11
|
+
auto rb_mExt = Rice::define_module_under(rb_mTorchAudio, "Ext")
|
12
|
+
.define_singleton_function(
|
55
13
|
"read_audio_file",
|
56
|
-
|
14
|
+
[](const std::string& file_name, at::Tensor output, bool ch_first, int64_t nframes, int64_t offset, sox_signalinfo_t* si, sox_encodinginfo_t* ei, const char* ft) {
|
57
15
|
return torch::audio::read_audio_file(file_name, output, ch_first, nframes, offset, si, ei, ft);
|
58
16
|
})
|
59
|
-
.
|
17
|
+
.define_singleton_function(
|
60
18
|
"write_audio_file",
|
61
|
-
|
19
|
+
[](const std::string& file_name, const at::Tensor& tensor, sox_signalinfo_t* si, sox_encodinginfo_t* ei, const char* file_type) {
|
62
20
|
return torch::audio::write_audio_file(file_name, tensor, si, ei, file_type);
|
63
21
|
});
|
64
22
|
|
65
|
-
|
66
|
-
.define_constructor(Constructor<sox_signalinfo_t>())
|
67
|
-
.define_method("rate",
|
68
|
-
.define_method("channels",
|
69
|
-
.define_method("precision",
|
70
|
-
.define_method("length",
|
71
|
-
.define_method("rate=",
|
72
|
-
.define_method("channels=",
|
73
|
-
.define_method("precision=",
|
74
|
-
.define_method("length=",
|
23
|
+
auto rb_cSignalInfo = Rice::define_class_under<sox_signalinfo_t>(rb_mExt, "SignalInfo")
|
24
|
+
.define_constructor(Rice::Constructor<sox_signalinfo_t>())
|
25
|
+
.define_method("rate", [](sox_signalinfo_t& self) { return self.rate; })
|
26
|
+
.define_method("channels", [](sox_signalinfo_t& self) { return self.channels; })
|
27
|
+
.define_method("precision", [](sox_signalinfo_t& self) { return self.precision; })
|
28
|
+
.define_method("length", [](sox_signalinfo_t& self) { return self.length; })
|
29
|
+
.define_method("rate=", [](sox_signalinfo_t& self, sox_rate_t rate) { self.rate = rate; })
|
30
|
+
.define_method("channels=", [](sox_signalinfo_t& self, unsigned channels) { self.channels = channels; })
|
31
|
+
.define_method("precision=", [](sox_signalinfo_t& self, unsigned precision) { self.precision = precision; })
|
32
|
+
.define_method("length=", [](sox_signalinfo_t& self, sox_uint64_t length) { self.length = length; });
|
75
33
|
}
|
data/ext/torchaudio/extconf.rb
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
require "mkmf-rice"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
$CXXFLAGS += " -std=c++14"
|
3
|
+
$CXXFLAGS += " -std=c++17 $(optflags)"
|
6
4
|
|
7
5
|
abort "SoX not found" unless have_library("sox")
|
8
6
|
|
@@ -22,19 +20,9 @@ $CXXFLAGS += " -D_GLIBCXX_USE_CXX11_ABI=1"
|
|
22
20
|
|
23
21
|
apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
|
24
22
|
|
25
|
-
# check omp first
|
26
|
-
if have_library("omp") || have_library("gomp")
|
27
|
-
$CXXFLAGS += " -DAT_PARALLEL_OPENMP=1"
|
28
|
-
$CXXFLAGS += " -Xclang" if apple_clang
|
29
|
-
$CXXFLAGS += " -fopenmp"
|
30
|
-
end
|
31
|
-
|
32
23
|
if apple_clang
|
33
|
-
# silence ruby/intern.h warning
|
34
|
-
$CXXFLAGS += " -Wno-deprecated-register"
|
35
|
-
|
36
24
|
# silence torch warnings
|
37
|
-
$CXXFLAGS += " -Wno-
|
25
|
+
$CXXFLAGS += " -Wno-deprecated-declarations"
|
38
26
|
else
|
39
27
|
# silence rice warnings
|
40
28
|
$CXXFLAGS += " -Wno-noexcept-type"
|
@@ -15,7 +15,9 @@ module TorchAudio
|
|
15
15
|
end
|
16
16
|
|
17
17
|
# follows redirects
|
18
|
-
def download_url_to_file(url, dst, hash_value, hash_type)
|
18
|
+
def download_url_to_file(url, dst, hash_value, hash_type, redirects = 0)
|
19
|
+
raise "Too many redirects" if redirects > 10
|
20
|
+
|
19
21
|
uri = URI(url)
|
20
22
|
tmp = nil
|
21
23
|
location = nil
|
@@ -41,7 +43,7 @@ module TorchAudio
|
|
41
43
|
end
|
42
44
|
|
43
45
|
if location
|
44
|
-
download_url_to_file(location, dst)
|
46
|
+
download_url_to_file(location, dst, hash_value, hash_type, redirects + 1)
|
45
47
|
else
|
46
48
|
# check hash
|
47
49
|
# TODO use hash_type
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module TorchAudio
|
2
2
|
module Functional
|
3
3
|
class << self
|
4
|
-
def spectrogram(waveform, pad, window, n_fft, hop_length, win_length, power, normalized)
|
4
|
+
def spectrogram(waveform, pad, window, n_fft, hop_length, win_length, power, normalized, center: true, pad_mode: "reflect", onesided: true)
|
5
5
|
if pad > 0
|
6
6
|
# TODO add "with torch.no_grad():" back when JIT supports it
|
7
7
|
waveform = Torch::NN::Functional.pad(waveform, [pad, pad], "constant")
|
@@ -12,20 +12,32 @@ module TorchAudio
|
|
12
12
|
waveform = waveform.reshape(-1, shape[-1])
|
13
13
|
|
14
14
|
# default values are consistent with librosa.core.spectrum._spectrogram
|
15
|
-
spec_f =
|
16
|
-
|
17
|
-
|
15
|
+
spec_f =
|
16
|
+
Torch.stft(
|
17
|
+
waveform,
|
18
|
+
n_fft,
|
19
|
+
hop_length: hop_length,
|
20
|
+
win_length: win_length,
|
21
|
+
window: window,
|
22
|
+
center: center,
|
23
|
+
pad_mode: pad_mode,
|
24
|
+
normalized: false,
|
25
|
+
onesided: onesided,
|
26
|
+
return_complex: true
|
27
|
+
)
|
18
28
|
|
19
29
|
# unpack batch
|
20
|
-
spec_f = spec_f.reshape(shape[0..-2] + spec_f.shape[-
|
30
|
+
spec_f = spec_f.reshape(shape[0..-2] + spec_f.shape[-2..-1])
|
21
31
|
|
22
32
|
if normalized
|
23
|
-
spec_f
|
33
|
+
spec_f /= window.pow(2.0).sum.sqrt
|
24
34
|
end
|
25
|
-
if power
|
26
|
-
|
35
|
+
if !power.nil?
|
36
|
+
if power == 1
|
37
|
+
return spec_f.abs
|
38
|
+
end
|
39
|
+
return spec_f.abs.pow(power)
|
27
40
|
end
|
28
|
-
|
29
41
|
spec_f
|
30
42
|
end
|
31
43
|
|
@@ -240,6 +252,23 @@ module TorchAudio
|
|
240
252
|
Torch.pow(Torch.pow(10.0, db * 0.1), power) * ref
|
241
253
|
end
|
242
254
|
|
255
|
+
def create_dct(n_mfcc, n_mels, norm: nil)
|
256
|
+
n = Torch.arange(n_mels.to_f)
|
257
|
+
k = Torch.arange(n_mfcc.to_f).unsqueeze!(1)
|
258
|
+
dct = Torch.cos((n + 0.5) * k * Math::PI / n_mels.to_f)
|
259
|
+
|
260
|
+
if norm.nil?
|
261
|
+
dct *= 2.0
|
262
|
+
else
|
263
|
+
raise ArgumentError, "Invalid DCT norm value" unless norm == :ortho
|
264
|
+
|
265
|
+
dct[0] *= 1.0 / Math.sqrt(2.0)
|
266
|
+
dct *= Math.sqrt(2.0 / n_mels)
|
267
|
+
end
|
268
|
+
|
269
|
+
dct.t
|
270
|
+
end
|
271
|
+
|
243
272
|
private
|
244
273
|
|
245
274
|
def _apply_probability_distribution(waveform, density_function: "TPDF")
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module TorchAudio
|
2
|
+
module Transforms
|
3
|
+
class ComputeDeltas < Torch::NN::Module
|
4
|
+
def initialize(win_length: 5, mode: "replicate")
|
5
|
+
super()
|
6
|
+
@win_length = win_length
|
7
|
+
@mode = mode
|
8
|
+
end
|
9
|
+
|
10
|
+
def forward(specgram)
|
11
|
+
F.compute_deltas(specgram, win_length: @win_length, mode: @mode)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module TorchAudio
|
2
|
+
module Transforms
|
3
|
+
class Fade < Torch::NN::Module
|
4
|
+
def initialize(fade_in_len: 0, fade_out_len: 0, fade_shape: "linear")
|
5
|
+
super()
|
6
|
+
@fade_in_len = fade_in_len
|
7
|
+
@fade_out_len = fade_out_len
|
8
|
+
@fade_shape = fade_shape
|
9
|
+
end
|
10
|
+
|
11
|
+
def forward(waveform)
|
12
|
+
waveform_length = waveform.size[-1]
|
13
|
+
device = waveform.device
|
14
|
+
fade_in(waveform_length).to(device) * fade_out(waveform_length).to(device) * waveform
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def fade_in(waveform_length)
|
20
|
+
fade = Torch.linspace(0, 1, @fade_in_len)
|
21
|
+
ones = Torch.ones(waveform_length - @fade_in_len)
|
22
|
+
|
23
|
+
if @fade_shape == "linear"
|
24
|
+
fade = fade
|
25
|
+
end
|
26
|
+
|
27
|
+
if @fade_shape == "exponential"
|
28
|
+
fade = Torch.pow(2, (fade - 1)) * fade
|
29
|
+
end
|
30
|
+
|
31
|
+
if @fade_shape == "logarithmic"
|
32
|
+
fade = Torch.log10(0.1 + fade) + 1
|
33
|
+
end
|
34
|
+
|
35
|
+
if @fade_shape == "quarter_sine"
|
36
|
+
fade = Torch.sin(fade * Math::PI / 2)
|
37
|
+
end
|
38
|
+
|
39
|
+
if @fade_shape == "half_sine"
|
40
|
+
fade = Torch.sin(fade * Math::PI - Math::PI / 2) / 2 + 0.5
|
41
|
+
end
|
42
|
+
|
43
|
+
Torch.cat([fade, ones]).clamp!(0, 1)
|
44
|
+
end
|
45
|
+
|
46
|
+
def fade_out(waveform_length)
|
47
|
+
fade = Torch.linspace(0, 1, @fade_out_len)
|
48
|
+
ones = Torch.ones(waveform_length - @fade_out_len)
|
49
|
+
|
50
|
+
if @fade_shape == "linear"
|
51
|
+
fade = - fade + 1
|
52
|
+
end
|
53
|
+
|
54
|
+
if @fade_shape == "exponential"
|
55
|
+
fade = Torch.pow(2, - fade) * (1 - fade)
|
56
|
+
end
|
57
|
+
|
58
|
+
if @fade_shape == "logarithmic"
|
59
|
+
fade = Torch.log10(1.1 - fade) + 1
|
60
|
+
end
|
61
|
+
|
62
|
+
if @fade_shape == "quarter_sine"
|
63
|
+
fade = Torch.sin(fade * Math::PI / 2 + Math::PI / 2)
|
64
|
+
end
|
65
|
+
|
66
|
+
if @fade_shape == "half_sine"
|
67
|
+
fade = Torch.sin(fade * Math::PI + Math::PI / 2) / 2 + 0.5
|
68
|
+
end
|
69
|
+
|
70
|
+
Torch.cat([ones, fade]).clamp!(0, 1)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module TorchAudio
|
2
2
|
module Transforms
|
3
3
|
class MelSpectrogram < Torch::NN::Module
|
4
|
+
attr_reader :n_mels
|
5
|
+
|
4
6
|
def initialize(
|
5
7
|
sample_rate: 16000, n_fft: 400, win_length: nil, hop_length: nil, f_min: 0.0,
|
6
8
|
f_max: nil, pad: 0, n_mels: 128, window_fn: Torch.method(:hann_window),
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module TorchAudio
|
2
|
+
module Transforms
|
3
|
+
class MFCC < Torch::NN::Module
|
4
|
+
|
5
|
+
SUPPORTED_DCT_TYPES = [2]
|
6
|
+
|
7
|
+
def initialize(sample_rate: 16000, n_mfcc: 40, dct_type: 2, norm: :ortho, log_mels: false, melkwargs: {})
|
8
|
+
super()
|
9
|
+
|
10
|
+
raise ArgumentError, "DCT type not supported: #{dct_type}" unless SUPPORTED_DCT_TYPES.include?(dct_type)
|
11
|
+
|
12
|
+
@sample_rate = sample_rate
|
13
|
+
@n_mfcc = n_mfcc
|
14
|
+
@dct_type = dct_type
|
15
|
+
@norm = norm
|
16
|
+
@top_db = 80.0
|
17
|
+
@amplitude_to_db = TorchAudio::Transforms::AmplitudeToDB.new(stype: :power, top_db: @top_db)
|
18
|
+
|
19
|
+
@melspectrogram = TorchAudio::Transforms::MelSpectrogram.new(sample_rate: @sample_rate, **melkwargs)
|
20
|
+
|
21
|
+
raise ArgumentError, "Cannot select more MFCC coefficients than # mel bins" if @n_mfcc > @melspectrogram.n_mels
|
22
|
+
|
23
|
+
dct_mat = F.create_dct(@n_mfcc, @melspectrogram.n_mels, norm: @norm)
|
24
|
+
register_buffer('dct_mat', dct_mat)
|
25
|
+
|
26
|
+
@log_mels = log_mels
|
27
|
+
end
|
28
|
+
|
29
|
+
def forward(waveform)
|
30
|
+
mel_specgram = @melspectrogram.(waveform)
|
31
|
+
if @log_mels
|
32
|
+
mel_specgram = Torch.log(mel_specgram + 1e-6)
|
33
|
+
else
|
34
|
+
mel_specgram = @amplitude_to_db.(mel_specgram)
|
35
|
+
end
|
36
|
+
|
37
|
+
Torch
|
38
|
+
.matmul(mel_specgram.transpose(-2, -1), @dct_mat)
|
39
|
+
.transpose(-2, -1)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -3,7 +3,8 @@ module TorchAudio
|
|
3
3
|
class Spectrogram < Torch::NN::Module
|
4
4
|
def initialize(
|
5
5
|
n_fft: 400, win_length: nil, hop_length: nil, pad: 0,
|
6
|
-
window_fn: Torch.method(:hann_window), power: 2.0, normalized: false, wkwargs: nil
|
6
|
+
window_fn: Torch.method(:hann_window), power: 2.0, normalized: false, wkwargs: nil,
|
7
|
+
center: true, pad_mode: "reflect", onesided: true
|
7
8
|
)
|
8
9
|
|
9
10
|
super()
|
@@ -17,10 +18,16 @@ module TorchAudio
|
|
17
18
|
@pad = pad
|
18
19
|
@power = power
|
19
20
|
@normalized = normalized
|
21
|
+
@center = center
|
22
|
+
@pad_mode = pad_mode
|
23
|
+
@onesided = onesided
|
20
24
|
end
|
21
25
|
|
22
26
|
def forward(waveform)
|
23
|
-
F.spectrogram(
|
27
|
+
F.spectrogram(
|
28
|
+
waveform, @pad, @window, @n_fft, @hop_length, @win_length, @power, @normalized,
|
29
|
+
center: @center, pad_mode: @pad_mode, onesided: @onesided
|
30
|
+
)
|
24
31
|
end
|
25
32
|
end
|
26
33
|
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module TorchAudio
|
2
|
+
module Transforms
|
3
|
+
class Vol < Torch::NN::Module
|
4
|
+
def initialize(gain, gain_type: "amplitude")
|
5
|
+
super()
|
6
|
+
@gain = gain
|
7
|
+
@gain_type = gain_type
|
8
|
+
|
9
|
+
if ["amplitude", "power"].include?(gain_type) && gain < 0
|
10
|
+
raise ArgumentError, "If gain_type = amplitude or power, gain must be positive."
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def forward(waveform)
|
15
|
+
if @gain_type == "amplitude"
|
16
|
+
waveform = waveform * @gain
|
17
|
+
end
|
18
|
+
|
19
|
+
if @gain_type == "db"
|
20
|
+
waveform = F.gain(waveform, @gain)
|
21
|
+
end
|
22
|
+
|
23
|
+
if @gain_type == "power"
|
24
|
+
waveform = F.gain(waveform, 10 * Math.log10(@gain))
|
25
|
+
end
|
26
|
+
|
27
|
+
Torch.clamp(waveform, -1, 1)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/lib/torchaudio/version.rb
CHANGED
data/lib/torchaudio.rb
CHANGED
@@ -15,12 +15,16 @@ require "set"
|
|
15
15
|
require "torchaudio/datasets/utils"
|
16
16
|
require "torchaudio/datasets/yesno"
|
17
17
|
require "torchaudio/functional"
|
18
|
+
require "torchaudio/transforms/compute_deltas"
|
19
|
+
require "torchaudio/transforms/fade"
|
18
20
|
require "torchaudio/transforms/mel_scale"
|
19
21
|
require "torchaudio/transforms/mel_spectrogram"
|
20
22
|
require "torchaudio/transforms/mu_law_encoding"
|
21
23
|
require "torchaudio/transforms/mu_law_decoding"
|
22
24
|
require "torchaudio/transforms/spectrogram"
|
23
25
|
require "torchaudio/transforms/amplitude_to_db"
|
26
|
+
require "torchaudio/transforms/mfcc"
|
27
|
+
require "torchaudio/transforms/vol"
|
24
28
|
require "torchaudio/version"
|
25
29
|
|
26
30
|
module TorchAudio
|
@@ -91,7 +95,7 @@ module TorchAudio
|
|
91
95
|
end
|
92
96
|
|
93
97
|
def save_encinfo(filepath, src, channels_first: true, signalinfo: nil, encodinginfo: nil, filetype: nil)
|
94
|
-
ch_idx,
|
98
|
+
ch_idx, _len_idx = channels_first ? [0, 1] : [1, 0]
|
95
99
|
|
96
100
|
# check if save directory exists
|
97
101
|
abs_dirpath = File.dirname(File.expand_path(filepath))
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: torchaudio
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-07-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: torch-rb
|
@@ -16,28 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 0.11.1
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
26
|
+
version: 0.11.1
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rice
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 4.0.2
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 4.0.2
|
41
41
|
description:
|
42
42
|
email: andrew@ankane.org
|
43
43
|
executables: []
|
@@ -64,13 +64,17 @@ files:
|
|
64
64
|
- lib/torchaudio/datasets/yesno.rb
|
65
65
|
- lib/torchaudio/functional.rb
|
66
66
|
- lib/torchaudio/transforms/amplitude_to_db.rb
|
67
|
+
- lib/torchaudio/transforms/compute_deltas.rb
|
68
|
+
- lib/torchaudio/transforms/fade.rb
|
67
69
|
- lib/torchaudio/transforms/mel_scale.rb
|
68
70
|
- lib/torchaudio/transforms/mel_spectrogram.rb
|
71
|
+
- lib/torchaudio/transforms/mfcc.rb
|
69
72
|
- lib/torchaudio/transforms/mu_law_decoding.rb
|
70
73
|
- lib/torchaudio/transforms/mu_law_encoding.rb
|
71
74
|
- lib/torchaudio/transforms/spectrogram.rb
|
75
|
+
- lib/torchaudio/transforms/vol.rb
|
72
76
|
- lib/torchaudio/version.rb
|
73
|
-
homepage: https://github.com/ankane/torchaudio
|
77
|
+
homepage: https://github.com/ankane/torchaudio-ruby
|
74
78
|
licenses:
|
75
79
|
- BSD-2-Clause
|
76
80
|
metadata: {}
|
@@ -82,14 +86,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
82
86
|
requirements:
|
83
87
|
- - ">="
|
84
88
|
- !ruby/object:Gem::Version
|
85
|
-
version: '2.
|
89
|
+
version: '2.7'
|
86
90
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
87
91
|
requirements:
|
88
92
|
- - ">="
|
89
93
|
- !ruby/object:Gem::Version
|
90
94
|
version: '0'
|
91
95
|
requirements: []
|
92
|
-
rubygems_version: 3.
|
96
|
+
rubygems_version: 3.3.7
|
93
97
|
signing_key:
|
94
98
|
specification_version: 4
|
95
99
|
summary: Data manipulation and transformation for audio signal processing
|