torchaudio 0.1.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/LICENSE.txt +1 -1
- data/README.md +18 -8
- data/ext/torchaudio/ext.cpp +18 -60
- data/ext/torchaudio/extconf.rb +2 -14
- data/lib/torchaudio/datasets/utils.rb +4 -2
- data/lib/torchaudio/functional.rb +38 -9
- data/lib/torchaudio/transforms/compute_deltas.rb +15 -0
- data/lib/torchaudio/transforms/fade.rb +74 -0
- data/lib/torchaudio/transforms/mel_spectrogram.rb +2 -0
- data/lib/torchaudio/transforms/mfcc.rb +43 -0
- data/lib/torchaudio/transforms/spectrogram.rb +9 -2
- data/lib/torchaudio/transforms/vol.rb +31 -0
- data/lib/torchaudio/version.rb +1 -1
- data/lib/torchaudio.rb +5 -1
- metadata +13 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f35bd53e4a972f24d5d0a0bfbe8ee45a76a203a535a96043608fa768fee71bf4
|
4
|
+
data.tar.gz: 2a6061fa3aa68c13352ec7c12a79faf85a332163f92b976614fb2f59a1c53d68
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3f3566e6d1cc9bf1467c39350a4c317c2e192d75851c96d725f4e128be42b9fe9ab9be9c3b6ff33a4ab23f38d7d63b820f001f5b9e4af888f8e8b9b98fe795a5
|
7
|
+
data.tar.gz: bde46760427829d4c9955eba0da00f900425e44cb1fe93c6788691c964949d30d9a47ee3e896df226327294018ea4d4149acc25102e85598394644d5f0e3b568
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,18 @@
|
|
1
|
+
## 0.3.0 (2022-07-06)
|
2
|
+
|
3
|
+
- Added `center`, `pad_mode`, and `onesided` options to `Spectogram` transform
|
4
|
+
- Dropped support for Ruby < 2.7
|
5
|
+
|
6
|
+
## 0.2.1 (2021-07-16)
|
7
|
+
|
8
|
+
- Added `create_dct` method
|
9
|
+
- Added `ComputeDeltas`, `Fade`, `MFCC`, and `Vol` transforms
|
10
|
+
|
11
|
+
## 0.2.0 (2021-05-23)
|
12
|
+
|
13
|
+
- Updated to Rice 4
|
14
|
+
- Dropped support for Ruby < 2.6
|
15
|
+
|
1
16
|
## 0.1.2 (2021-02-06)
|
2
17
|
|
3
18
|
- Added `amplitude_to_DB` and `DB_to_amplitude` methods
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
# TorchAudio
|
1
|
+
# TorchAudio Ruby
|
2
2
|
|
3
3
|
:fire: An audio library for Torch.rb
|
4
4
|
|
5
|
-
[![Build Status](https://github.com/ankane/torchaudio/workflows/build/badge.svg?branch=master)](https://github.com/ankane/torchaudio/actions)
|
5
|
+
[![Build Status](https://github.com/ankane/torchaudio-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/torchaudio-ruby/actions)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -15,7 +15,7 @@ brew install sox
|
|
15
15
|
Add this line to your application’s Gemfile:
|
16
16
|
|
17
17
|
```ruby
|
18
|
-
gem
|
18
|
+
gem "torchaudio"
|
19
19
|
```
|
20
20
|
|
21
21
|
## Getting Started
|
@@ -51,10 +51,16 @@ TorchAudio::Transforms::Spectrogram.new.call(waveform)
|
|
51
51
|
|
52
52
|
Supported transforms are:
|
53
53
|
|
54
|
+
- AmplitudeToDB
|
55
|
+
- ComputeDeltas
|
56
|
+
- Fade
|
57
|
+
- MelScale
|
54
58
|
- MelSpectrogram
|
59
|
+
- MFCC
|
55
60
|
- MuLawDecoding
|
56
61
|
- MuLawEncoding
|
57
62
|
- Spectrogram
|
63
|
+
- Vol
|
58
64
|
|
59
65
|
## Functional
|
60
66
|
|
@@ -64,7 +70,11 @@ TorchAudio::Functional.lowpass_biquad(waveform, sample_rate, cutoff_freq)
|
|
64
70
|
|
65
71
|
Supported functions are:
|
66
72
|
|
73
|
+
- amplitude_to_DB
|
67
74
|
- compute_deltas
|
75
|
+
- create_dct
|
76
|
+
- create_fb_matrix
|
77
|
+
- DB_to_amplitude
|
68
78
|
- dither
|
69
79
|
- gain
|
70
80
|
- highpass_biquad
|
@@ -124,22 +134,22 @@ addons:
|
|
124
134
|
|
125
135
|
## History
|
126
136
|
|
127
|
-
View the [changelog](https://github.com/ankane/torchaudio/blob/master/CHANGELOG.md)
|
137
|
+
View the [changelog](https://github.com/ankane/torchaudio-ruby/blob/master/CHANGELOG.md)
|
128
138
|
|
129
139
|
## Contributing
|
130
140
|
|
131
141
|
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
132
142
|
|
133
|
-
- [Report bugs](https://github.com/ankane/torchaudio/issues)
|
134
|
-
- Fix bugs and [submit pull requests](https://github.com/ankane/torchaudio/pulls)
|
143
|
+
- [Report bugs](https://github.com/ankane/torchaudio-ruby/issues)
|
144
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/torchaudio-ruby/pulls)
|
135
145
|
- Write, clarify, or fix documentation
|
136
146
|
- Suggest or add new features
|
137
147
|
|
138
148
|
To get started with development:
|
139
149
|
|
140
150
|
```sh
|
141
|
-
git clone https://github.com/ankane/torchaudio.git
|
142
|
-
cd torchaudio
|
151
|
+
git clone https://github.com/ankane/torchaudio-ruby.git
|
152
|
+
cd torchaudio-ruby
|
143
153
|
bundle install
|
144
154
|
bundle exec rake compile
|
145
155
|
bundle exec rake test
|
data/ext/torchaudio/ext.cpp
CHANGED
@@ -1,75 +1,33 @@
|
|
1
1
|
#include <torchaudio/csrc/sox.h>
|
2
2
|
|
3
|
-
#include <rice/
|
4
|
-
#include <rice/
|
5
|
-
|
6
|
-
using namespace Rice;
|
7
|
-
|
8
|
-
class SignalInfo {
|
9
|
-
sox_signalinfo_t* value = nullptr;
|
10
|
-
public:
|
11
|
-
SignalInfo(Object o) {
|
12
|
-
if (!o.is_nil()) {
|
13
|
-
value = from_ruby<sox_signalinfo_t*>(o);
|
14
|
-
}
|
15
|
-
}
|
16
|
-
operator sox_signalinfo_t*() {
|
17
|
-
return value;
|
18
|
-
}
|
19
|
-
};
|
20
|
-
|
21
|
-
template<>
|
22
|
-
inline
|
23
|
-
SignalInfo from_ruby<SignalInfo>(Object x)
|
24
|
-
{
|
25
|
-
return SignalInfo(x);
|
26
|
-
}
|
27
|
-
|
28
|
-
class EncodingInfo {
|
29
|
-
sox_encodinginfo_t* value = nullptr;
|
30
|
-
public:
|
31
|
-
EncodingInfo(Object o) {
|
32
|
-
if (!o.is_nil()) {
|
33
|
-
value = from_ruby<sox_encodinginfo_t*>(o);
|
34
|
-
}
|
35
|
-
}
|
36
|
-
operator sox_encodinginfo_t*() {
|
37
|
-
return value;
|
38
|
-
}
|
39
|
-
};
|
40
|
-
|
41
|
-
template<>
|
42
|
-
inline
|
43
|
-
EncodingInfo from_ruby<EncodingInfo>(Object x)
|
44
|
-
{
|
45
|
-
return EncodingInfo(x);
|
46
|
-
}
|
3
|
+
#include <rice/rice.hpp>
|
4
|
+
#include <rice/stl.hpp>
|
47
5
|
|
48
6
|
extern "C"
|
49
7
|
void Init_ext()
|
50
8
|
{
|
51
|
-
|
9
|
+
auto rb_mTorchAudio = Rice::define_module("TorchAudio");
|
52
10
|
|
53
|
-
|
54
|
-
.
|
11
|
+
auto rb_mExt = Rice::define_module_under(rb_mTorchAudio, "Ext")
|
12
|
+
.define_singleton_function(
|
55
13
|
"read_audio_file",
|
56
|
-
|
14
|
+
[](const std::string& file_name, at::Tensor output, bool ch_first, int64_t nframes, int64_t offset, sox_signalinfo_t* si, sox_encodinginfo_t* ei, const char* ft) {
|
57
15
|
return torch::audio::read_audio_file(file_name, output, ch_first, nframes, offset, si, ei, ft);
|
58
16
|
})
|
59
|
-
.
|
17
|
+
.define_singleton_function(
|
60
18
|
"write_audio_file",
|
61
|
-
|
19
|
+
[](const std::string& file_name, const at::Tensor& tensor, sox_signalinfo_t* si, sox_encodinginfo_t* ei, const char* file_type) {
|
62
20
|
return torch::audio::write_audio_file(file_name, tensor, si, ei, file_type);
|
63
21
|
});
|
64
22
|
|
65
|
-
|
66
|
-
.define_constructor(Constructor<sox_signalinfo_t>())
|
67
|
-
.define_method("rate",
|
68
|
-
.define_method("channels",
|
69
|
-
.define_method("precision",
|
70
|
-
.define_method("length",
|
71
|
-
.define_method("rate=",
|
72
|
-
.define_method("channels=",
|
73
|
-
.define_method("precision=",
|
74
|
-
.define_method("length=",
|
23
|
+
auto rb_cSignalInfo = Rice::define_class_under<sox_signalinfo_t>(rb_mExt, "SignalInfo")
|
24
|
+
.define_constructor(Rice::Constructor<sox_signalinfo_t>())
|
25
|
+
.define_method("rate", [](sox_signalinfo_t& self) { return self.rate; })
|
26
|
+
.define_method("channels", [](sox_signalinfo_t& self) { return self.channels; })
|
27
|
+
.define_method("precision", [](sox_signalinfo_t& self) { return self.precision; })
|
28
|
+
.define_method("length", [](sox_signalinfo_t& self) { return self.length; })
|
29
|
+
.define_method("rate=", [](sox_signalinfo_t& self, sox_rate_t rate) { self.rate = rate; })
|
30
|
+
.define_method("channels=", [](sox_signalinfo_t& self, unsigned channels) { self.channels = channels; })
|
31
|
+
.define_method("precision=", [](sox_signalinfo_t& self, unsigned precision) { self.precision = precision; })
|
32
|
+
.define_method("length=", [](sox_signalinfo_t& self, sox_uint64_t length) { self.length = length; });
|
75
33
|
}
|
data/ext/torchaudio/extconf.rb
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
require "mkmf-rice"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
$CXXFLAGS += " -std=c++14"
|
3
|
+
$CXXFLAGS += " -std=c++17 $(optflags)"
|
6
4
|
|
7
5
|
abort "SoX not found" unless have_library("sox")
|
8
6
|
|
@@ -22,19 +20,9 @@ $CXXFLAGS += " -D_GLIBCXX_USE_CXX11_ABI=1"
|
|
22
20
|
|
23
21
|
apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
|
24
22
|
|
25
|
-
# check omp first
|
26
|
-
if have_library("omp") || have_library("gomp")
|
27
|
-
$CXXFLAGS += " -DAT_PARALLEL_OPENMP=1"
|
28
|
-
$CXXFLAGS += " -Xclang" if apple_clang
|
29
|
-
$CXXFLAGS += " -fopenmp"
|
30
|
-
end
|
31
|
-
|
32
23
|
if apple_clang
|
33
|
-
# silence ruby/intern.h warning
|
34
|
-
$CXXFLAGS += " -Wno-deprecated-register"
|
35
|
-
|
36
24
|
# silence torch warnings
|
37
|
-
$CXXFLAGS += " -Wno-
|
25
|
+
$CXXFLAGS += " -Wno-deprecated-declarations"
|
38
26
|
else
|
39
27
|
# silence rice warnings
|
40
28
|
$CXXFLAGS += " -Wno-noexcept-type"
|
@@ -15,7 +15,9 @@ module TorchAudio
|
|
15
15
|
end
|
16
16
|
|
17
17
|
# follows redirects
|
18
|
-
def download_url_to_file(url, dst, hash_value, hash_type)
|
18
|
+
def download_url_to_file(url, dst, hash_value, hash_type, redirects = 0)
|
19
|
+
raise "Too many redirects" if redirects > 10
|
20
|
+
|
19
21
|
uri = URI(url)
|
20
22
|
tmp = nil
|
21
23
|
location = nil
|
@@ -41,7 +43,7 @@ module TorchAudio
|
|
41
43
|
end
|
42
44
|
|
43
45
|
if location
|
44
|
-
download_url_to_file(location, dst)
|
46
|
+
download_url_to_file(location, dst, hash_value, hash_type, redirects + 1)
|
45
47
|
else
|
46
48
|
# check hash
|
47
49
|
# TODO use hash_type
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module TorchAudio
|
2
2
|
module Functional
|
3
3
|
class << self
|
4
|
-
def spectrogram(waveform, pad, window, n_fft, hop_length, win_length, power, normalized)
|
4
|
+
def spectrogram(waveform, pad, window, n_fft, hop_length, win_length, power, normalized, center: true, pad_mode: "reflect", onesided: true)
|
5
5
|
if pad > 0
|
6
6
|
# TODO add "with torch.no_grad():" back when JIT supports it
|
7
7
|
waveform = Torch::NN::Functional.pad(waveform, [pad, pad], "constant")
|
@@ -12,20 +12,32 @@ module TorchAudio
|
|
12
12
|
waveform = waveform.reshape(-1, shape[-1])
|
13
13
|
|
14
14
|
# default values are consistent with librosa.core.spectrum._spectrogram
|
15
|
-
spec_f =
|
16
|
-
|
17
|
-
|
15
|
+
spec_f =
|
16
|
+
Torch.stft(
|
17
|
+
waveform,
|
18
|
+
n_fft,
|
19
|
+
hop_length: hop_length,
|
20
|
+
win_length: win_length,
|
21
|
+
window: window,
|
22
|
+
center: center,
|
23
|
+
pad_mode: pad_mode,
|
24
|
+
normalized: false,
|
25
|
+
onesided: onesided,
|
26
|
+
return_complex: true
|
27
|
+
)
|
18
28
|
|
19
29
|
# unpack batch
|
20
|
-
spec_f = spec_f.reshape(shape[0..-2] + spec_f.shape[-
|
30
|
+
spec_f = spec_f.reshape(shape[0..-2] + spec_f.shape[-2..-1])
|
21
31
|
|
22
32
|
if normalized
|
23
|
-
spec_f
|
33
|
+
spec_f /= window.pow(2.0).sum.sqrt
|
24
34
|
end
|
25
|
-
if power
|
26
|
-
|
35
|
+
if !power.nil?
|
36
|
+
if power == 1
|
37
|
+
return spec_f.abs
|
38
|
+
end
|
39
|
+
return spec_f.abs.pow(power)
|
27
40
|
end
|
28
|
-
|
29
41
|
spec_f
|
30
42
|
end
|
31
43
|
|
@@ -240,6 +252,23 @@ module TorchAudio
|
|
240
252
|
Torch.pow(Torch.pow(10.0, db * 0.1), power) * ref
|
241
253
|
end
|
242
254
|
|
255
|
+
def create_dct(n_mfcc, n_mels, norm: nil)
|
256
|
+
n = Torch.arange(n_mels.to_f)
|
257
|
+
k = Torch.arange(n_mfcc.to_f).unsqueeze!(1)
|
258
|
+
dct = Torch.cos((n + 0.5) * k * Math::PI / n_mels.to_f)
|
259
|
+
|
260
|
+
if norm.nil?
|
261
|
+
dct *= 2.0
|
262
|
+
else
|
263
|
+
raise ArgumentError, "Invalid DCT norm value" unless norm == :ortho
|
264
|
+
|
265
|
+
dct[0] *= 1.0 / Math.sqrt(2.0)
|
266
|
+
dct *= Math.sqrt(2.0 / n_mels)
|
267
|
+
end
|
268
|
+
|
269
|
+
dct.t
|
270
|
+
end
|
271
|
+
|
243
272
|
private
|
244
273
|
|
245
274
|
def _apply_probability_distribution(waveform, density_function: "TPDF")
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module TorchAudio
|
2
|
+
module Transforms
|
3
|
+
class ComputeDeltas < Torch::NN::Module
|
4
|
+
def initialize(win_length: 5, mode: "replicate")
|
5
|
+
super()
|
6
|
+
@win_length = win_length
|
7
|
+
@mode = mode
|
8
|
+
end
|
9
|
+
|
10
|
+
def forward(specgram)
|
11
|
+
F.compute_deltas(specgram, win_length: @win_length, mode: @mode)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module TorchAudio
|
2
|
+
module Transforms
|
3
|
+
class Fade < Torch::NN::Module
|
4
|
+
def initialize(fade_in_len: 0, fade_out_len: 0, fade_shape: "linear")
|
5
|
+
super()
|
6
|
+
@fade_in_len = fade_in_len
|
7
|
+
@fade_out_len = fade_out_len
|
8
|
+
@fade_shape = fade_shape
|
9
|
+
end
|
10
|
+
|
11
|
+
def forward(waveform)
|
12
|
+
waveform_length = waveform.size[-1]
|
13
|
+
device = waveform.device
|
14
|
+
fade_in(waveform_length).to(device) * fade_out(waveform_length).to(device) * waveform
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def fade_in(waveform_length)
|
20
|
+
fade = Torch.linspace(0, 1, @fade_in_len)
|
21
|
+
ones = Torch.ones(waveform_length - @fade_in_len)
|
22
|
+
|
23
|
+
if @fade_shape == "linear"
|
24
|
+
fade = fade
|
25
|
+
end
|
26
|
+
|
27
|
+
if @fade_shape == "exponential"
|
28
|
+
fade = Torch.pow(2, (fade - 1)) * fade
|
29
|
+
end
|
30
|
+
|
31
|
+
if @fade_shape == "logarithmic"
|
32
|
+
fade = Torch.log10(0.1 + fade) + 1
|
33
|
+
end
|
34
|
+
|
35
|
+
if @fade_shape == "quarter_sine"
|
36
|
+
fade = Torch.sin(fade * Math::PI / 2)
|
37
|
+
end
|
38
|
+
|
39
|
+
if @fade_shape == "half_sine"
|
40
|
+
fade = Torch.sin(fade * Math::PI - Math::PI / 2) / 2 + 0.5
|
41
|
+
end
|
42
|
+
|
43
|
+
Torch.cat([fade, ones]).clamp!(0, 1)
|
44
|
+
end
|
45
|
+
|
46
|
+
def fade_out(waveform_length)
|
47
|
+
fade = Torch.linspace(0, 1, @fade_out_len)
|
48
|
+
ones = Torch.ones(waveform_length - @fade_out_len)
|
49
|
+
|
50
|
+
if @fade_shape == "linear"
|
51
|
+
fade = - fade + 1
|
52
|
+
end
|
53
|
+
|
54
|
+
if @fade_shape == "exponential"
|
55
|
+
fade = Torch.pow(2, - fade) * (1 - fade)
|
56
|
+
end
|
57
|
+
|
58
|
+
if @fade_shape == "logarithmic"
|
59
|
+
fade = Torch.log10(1.1 - fade) + 1
|
60
|
+
end
|
61
|
+
|
62
|
+
if @fade_shape == "quarter_sine"
|
63
|
+
fade = Torch.sin(fade * Math::PI / 2 + Math::PI / 2)
|
64
|
+
end
|
65
|
+
|
66
|
+
if @fade_shape == "half_sine"
|
67
|
+
fade = Torch.sin(fade * Math::PI + Math::PI / 2) / 2 + 0.5
|
68
|
+
end
|
69
|
+
|
70
|
+
Torch.cat([ones, fade]).clamp!(0, 1)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module TorchAudio
|
2
2
|
module Transforms
|
3
3
|
class MelSpectrogram < Torch::NN::Module
|
4
|
+
attr_reader :n_mels
|
5
|
+
|
4
6
|
def initialize(
|
5
7
|
sample_rate: 16000, n_fft: 400, win_length: nil, hop_length: nil, f_min: 0.0,
|
6
8
|
f_max: nil, pad: 0, n_mels: 128, window_fn: Torch.method(:hann_window),
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module TorchAudio
|
2
|
+
module Transforms
|
3
|
+
class MFCC < Torch::NN::Module
|
4
|
+
|
5
|
+
SUPPORTED_DCT_TYPES = [2]
|
6
|
+
|
7
|
+
def initialize(sample_rate: 16000, n_mfcc: 40, dct_type: 2, norm: :ortho, log_mels: false, melkwargs: {})
|
8
|
+
super()
|
9
|
+
|
10
|
+
raise ArgumentError, "DCT type not supported: #{dct_type}" unless SUPPORTED_DCT_TYPES.include?(dct_type)
|
11
|
+
|
12
|
+
@sample_rate = sample_rate
|
13
|
+
@n_mfcc = n_mfcc
|
14
|
+
@dct_type = dct_type
|
15
|
+
@norm = norm
|
16
|
+
@top_db = 80.0
|
17
|
+
@amplitude_to_db = TorchAudio::Transforms::AmplitudeToDB.new(stype: :power, top_db: @top_db)
|
18
|
+
|
19
|
+
@melspectrogram = TorchAudio::Transforms::MelSpectrogram.new(sample_rate: @sample_rate, **melkwargs)
|
20
|
+
|
21
|
+
raise ArgumentError, "Cannot select more MFCC coefficients than # mel bins" if @n_mfcc > @melspectrogram.n_mels
|
22
|
+
|
23
|
+
dct_mat = F.create_dct(@n_mfcc, @melspectrogram.n_mels, norm: @norm)
|
24
|
+
register_buffer('dct_mat', dct_mat)
|
25
|
+
|
26
|
+
@log_mels = log_mels
|
27
|
+
end
|
28
|
+
|
29
|
+
def forward(waveform)
|
30
|
+
mel_specgram = @melspectrogram.(waveform)
|
31
|
+
if @log_mels
|
32
|
+
mel_specgram = Torch.log(mel_specgram + 1e-6)
|
33
|
+
else
|
34
|
+
mel_specgram = @amplitude_to_db.(mel_specgram)
|
35
|
+
end
|
36
|
+
|
37
|
+
Torch
|
38
|
+
.matmul(mel_specgram.transpose(-2, -1), @dct_mat)
|
39
|
+
.transpose(-2, -1)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -3,7 +3,8 @@ module TorchAudio
|
|
3
3
|
class Spectrogram < Torch::NN::Module
|
4
4
|
def initialize(
|
5
5
|
n_fft: 400, win_length: nil, hop_length: nil, pad: 0,
|
6
|
-
window_fn: Torch.method(:hann_window), power: 2.0, normalized: false, wkwargs: nil
|
6
|
+
window_fn: Torch.method(:hann_window), power: 2.0, normalized: false, wkwargs: nil,
|
7
|
+
center: true, pad_mode: "reflect", onesided: true
|
7
8
|
)
|
8
9
|
|
9
10
|
super()
|
@@ -17,10 +18,16 @@ module TorchAudio
|
|
17
18
|
@pad = pad
|
18
19
|
@power = power
|
19
20
|
@normalized = normalized
|
21
|
+
@center = center
|
22
|
+
@pad_mode = pad_mode
|
23
|
+
@onesided = onesided
|
20
24
|
end
|
21
25
|
|
22
26
|
def forward(waveform)
|
23
|
-
F.spectrogram(
|
27
|
+
F.spectrogram(
|
28
|
+
waveform, @pad, @window, @n_fft, @hop_length, @win_length, @power, @normalized,
|
29
|
+
center: @center, pad_mode: @pad_mode, onesided: @onesided
|
30
|
+
)
|
24
31
|
end
|
25
32
|
end
|
26
33
|
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module TorchAudio
|
2
|
+
module Transforms
|
3
|
+
class Vol < Torch::NN::Module
|
4
|
+
def initialize(gain, gain_type: "amplitude")
|
5
|
+
super()
|
6
|
+
@gain = gain
|
7
|
+
@gain_type = gain_type
|
8
|
+
|
9
|
+
if ["amplitude", "power"].include?(gain_type) && gain < 0
|
10
|
+
raise ArgumentError, "If gain_type = amplitude or power, gain must be positive."
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def forward(waveform)
|
15
|
+
if @gain_type == "amplitude"
|
16
|
+
waveform = waveform * @gain
|
17
|
+
end
|
18
|
+
|
19
|
+
if @gain_type == "db"
|
20
|
+
waveform = F.gain(waveform, @gain)
|
21
|
+
end
|
22
|
+
|
23
|
+
if @gain_type == "power"
|
24
|
+
waveform = F.gain(waveform, 10 * Math.log10(@gain))
|
25
|
+
end
|
26
|
+
|
27
|
+
Torch.clamp(waveform, -1, 1)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/lib/torchaudio/version.rb
CHANGED
data/lib/torchaudio.rb
CHANGED
@@ -15,12 +15,16 @@ require "set"
|
|
15
15
|
require "torchaudio/datasets/utils"
|
16
16
|
require "torchaudio/datasets/yesno"
|
17
17
|
require "torchaudio/functional"
|
18
|
+
require "torchaudio/transforms/compute_deltas"
|
19
|
+
require "torchaudio/transforms/fade"
|
18
20
|
require "torchaudio/transforms/mel_scale"
|
19
21
|
require "torchaudio/transforms/mel_spectrogram"
|
20
22
|
require "torchaudio/transforms/mu_law_encoding"
|
21
23
|
require "torchaudio/transforms/mu_law_decoding"
|
22
24
|
require "torchaudio/transforms/spectrogram"
|
23
25
|
require "torchaudio/transforms/amplitude_to_db"
|
26
|
+
require "torchaudio/transforms/mfcc"
|
27
|
+
require "torchaudio/transforms/vol"
|
24
28
|
require "torchaudio/version"
|
25
29
|
|
26
30
|
module TorchAudio
|
@@ -91,7 +95,7 @@ module TorchAudio
|
|
91
95
|
end
|
92
96
|
|
93
97
|
def save_encinfo(filepath, src, channels_first: true, signalinfo: nil, encodinginfo: nil, filetype: nil)
|
94
|
-
ch_idx,
|
98
|
+
ch_idx, _len_idx = channels_first ? [0, 1] : [1, 0]
|
95
99
|
|
96
100
|
# check if save directory exists
|
97
101
|
abs_dirpath = File.dirname(File.expand_path(filepath))
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: torchaudio
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-07-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: torch-rb
|
@@ -16,28 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 0.11.1
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
26
|
+
version: 0.11.1
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rice
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 4.0.2
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 4.0.2
|
41
41
|
description:
|
42
42
|
email: andrew@ankane.org
|
43
43
|
executables: []
|
@@ -64,13 +64,17 @@ files:
|
|
64
64
|
- lib/torchaudio/datasets/yesno.rb
|
65
65
|
- lib/torchaudio/functional.rb
|
66
66
|
- lib/torchaudio/transforms/amplitude_to_db.rb
|
67
|
+
- lib/torchaudio/transforms/compute_deltas.rb
|
68
|
+
- lib/torchaudio/transforms/fade.rb
|
67
69
|
- lib/torchaudio/transforms/mel_scale.rb
|
68
70
|
- lib/torchaudio/transforms/mel_spectrogram.rb
|
71
|
+
- lib/torchaudio/transforms/mfcc.rb
|
69
72
|
- lib/torchaudio/transforms/mu_law_decoding.rb
|
70
73
|
- lib/torchaudio/transforms/mu_law_encoding.rb
|
71
74
|
- lib/torchaudio/transforms/spectrogram.rb
|
75
|
+
- lib/torchaudio/transforms/vol.rb
|
72
76
|
- lib/torchaudio/version.rb
|
73
|
-
homepage: https://github.com/ankane/torchaudio
|
77
|
+
homepage: https://github.com/ankane/torchaudio-ruby
|
74
78
|
licenses:
|
75
79
|
- BSD-2-Clause
|
76
80
|
metadata: {}
|
@@ -82,14 +86,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
82
86
|
requirements:
|
83
87
|
- - ">="
|
84
88
|
- !ruby/object:Gem::Version
|
85
|
-
version: '2.
|
89
|
+
version: '2.7'
|
86
90
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
87
91
|
requirements:
|
88
92
|
- - ">="
|
89
93
|
- !ruby/object:Gem::Version
|
90
94
|
version: '0'
|
91
95
|
requirements: []
|
92
|
-
rubygems_version: 3.
|
96
|
+
rubygems_version: 3.3.7
|
93
97
|
signing_key:
|
94
98
|
specification_version: 4
|
95
99
|
summary: Data manipulation and transformation for audio signal processing
|