torchaudio 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +26 -0
- data/README.md +93 -0
- data/ext/torchaudio/csrc/register.cpp +65 -0
- data/ext/torchaudio/csrc/sox.cpp +361 -0
- data/ext/torchaudio/csrc/sox.h +71 -0
- data/ext/torchaudio/csrc/sox_effects.cpp +54 -0
- data/ext/torchaudio/csrc/sox_effects.h +18 -0
- data/ext/torchaudio/csrc/sox_io.cpp +170 -0
- data/ext/torchaudio/csrc/sox_io.h +41 -0
- data/ext/torchaudio/csrc/sox_utils.cpp +245 -0
- data/ext/torchaudio/csrc/sox_utils.h +100 -0
- data/ext/torchaudio/ext.cpp +33 -0
- data/ext/torchaudio/extconf.rb +81 -0
- data/lib/torchaudio.rb +95 -0
- data/lib/torchaudio/datasets/utils.rb +92 -0
- data/lib/torchaudio/datasets/yesno.rb +59 -0
- data/lib/torchaudio/version.rb +3 -0
- metadata +145 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: b527976494325cc12e81342c25d318204d2d7c75bfba7036be4296769cdb30a0
|
4
|
+
data.tar.gz: 2cfde7bd1b0e7a1628818d5bd74657cfbfba6dfa83ef42897f3ad0f98e77f739
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8e6f34b014340b5ace3193ab589dae75ed0869ab7606402bd4b09de6042299e6f3a118d439dd381491f489ce9552bca4376a7d5b4693dddc3d1c5f5b26540900
|
7
|
+
data.tar.gz: d651c46f5185ceb70ae3d9c90154c77afe29a5c35854d1a9d98913096b7ab9ba39a745242dd268548ca87f9e109b56c96dee9dc5539cf066f9ad0f773eddbdcd
|
data/CHANGELOG.md
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
BSD 2-Clause License
|
2
|
+
|
3
|
+
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
|
4
|
+
Copyright (c) 2020 Andrew Kane,
|
5
|
+
All rights reserved.
|
6
|
+
|
7
|
+
Redistribution and use in source and binary forms, with or without
|
8
|
+
modification, are permitted provided that the following conditions are met:
|
9
|
+
|
10
|
+
* Redistributions of source code must retain the above copyright notice, this
|
11
|
+
list of conditions and the following disclaimer.
|
12
|
+
|
13
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
14
|
+
this list of conditions and the following disclaimer in the documentation
|
15
|
+
and/or other materials provided with the distribution.
|
16
|
+
|
17
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
18
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
19
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
20
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
21
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
22
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
23
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
24
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
25
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
26
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
# TorchAudio
|
2
|
+
|
3
|
+
:fire: An audio library for Torch.rb
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
First, [install SoX](#sox-installation). For Homebrew, use:
|
8
|
+
|
9
|
+
```sh
|
10
|
+
brew install sox
|
11
|
+
```
|
12
|
+
|
13
|
+
Add this line to your application’s Gemfile:
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
gem 'torchaudio'
|
17
|
+
```
|
18
|
+
|
19
|
+
## Getting Started
|
20
|
+
|
21
|
+
This library follows the [Python API](https://pytorch.org/audio/). Many methods and options are missing at the moment. PRs welcome!
|
22
|
+
|
23
|
+
## Datasets
|
24
|
+
|
25
|
+
Load a dataset
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
TorchAudio::Datasets::YESNO.new(".", download: true)
|
29
|
+
```
|
30
|
+
|
31
|
+
Supported datasets are:
|
32
|
+
|
33
|
+
- [YESNO](http://www.openslr.org/1/)
|
34
|
+
|
35
|
+
## Disclaimer
|
36
|
+
|
37
|
+
This library downloads and prepares public datasets. We don’t host any datasets. Be sure to adhere to the license for each dataset.
|
38
|
+
|
39
|
+
If you’re a dataset owner and wish to update any details or remove it from this project, let us know.
|
40
|
+
|
41
|
+
## SoX Installation
|
42
|
+
|
43
|
+
### Mac
|
44
|
+
|
45
|
+
```sh
|
46
|
+
brew install sox
|
47
|
+
```
|
48
|
+
|
49
|
+
### Windows
|
50
|
+
|
51
|
+
todo
|
52
|
+
|
53
|
+
### Ubuntu
|
54
|
+
|
55
|
+
```sh
|
56
|
+
sudo apt install sox libsox-dev libsox-fmt-all
|
57
|
+
```
|
58
|
+
|
59
|
+
### Travis CI
|
60
|
+
|
61
|
+
Add to `.travis.yml`:
|
62
|
+
|
63
|
+
```yml
|
64
|
+
addons:
|
65
|
+
apt:
|
66
|
+
packages:
|
67
|
+
- sox
|
68
|
+
- libsox-dev
|
69
|
+
- libsox-fmt-all
|
70
|
+
```
|
71
|
+
|
72
|
+
## History
|
73
|
+
|
74
|
+
View the [changelog](https://github.com/ankane/torchaudio/blob/master/CHANGELOG.md)
|
75
|
+
|
76
|
+
## Contributing
|
77
|
+
|
78
|
+
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
79
|
+
|
80
|
+
- [Report bugs](https://github.com/ankane/torchaudio/issues)
|
81
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/torchaudio/pulls)
|
82
|
+
- Write, clarify, or fix documentation
|
83
|
+
- Suggest or add new features
|
84
|
+
|
85
|
+
To get started with development:
|
86
|
+
|
87
|
+
```sh
|
88
|
+
git clone https://github.com/ankane/torchaudio.git
|
89
|
+
cd torchaudio
|
90
|
+
bundle install
|
91
|
+
bundle exec rake compile
|
92
|
+
bundle exec rake test
|
93
|
+
```
|
@@ -0,0 +1,65 @@
|
|
1
|
+
#ifndef TORCHAUDIO_REGISTER_H
|
2
|
+
#define TORCHAUDIO_REGISTER_H
|
3
|
+
|
4
|
+
#include <torchaudio/csrc/sox_effects.h>
|
5
|
+
#include <torchaudio/csrc/sox_io.h>
|
6
|
+
#include <torchaudio/csrc/sox_utils.h>
|
7
|
+
|
8
|
+
namespace torchaudio {
|
9
|
+
namespace {
|
10
|
+
|
11
|
+
////////////////////////////////////////////////////////////////////////////////
|
12
|
+
// sox_utils.h
|
13
|
+
////////////////////////////////////////////////////////////////////////////////
|
14
|
+
static auto registerTensorSignal =
|
15
|
+
torch::class_<sox_utils::TensorSignal>("torchaudio", "TensorSignal")
|
16
|
+
.def(torch::init<torch::Tensor, int64_t, bool>())
|
17
|
+
.def("get_tensor", &sox_utils::TensorSignal::getTensor)
|
18
|
+
.def("get_sample_rate", &sox_utils::TensorSignal::getSampleRate)
|
19
|
+
.def("get_channels_first", &sox_utils::TensorSignal::getChannelsFirst);
|
20
|
+
|
21
|
+
////////////////////////////////////////////////////////////////////////////////
|
22
|
+
// sox_io.h
|
23
|
+
////////////////////////////////////////////////////////////////////////////////
|
24
|
+
static auto registerSignalInfo =
|
25
|
+
torch::class_<sox_io::SignalInfo>("torchaudio", "SignalInfo")
|
26
|
+
.def("get_sample_rate", &sox_io::SignalInfo::getSampleRate)
|
27
|
+
.def("get_num_channels", &sox_io::SignalInfo::getNumChannels)
|
28
|
+
.def("get_num_frames", &sox_io::SignalInfo::getNumFrames);
|
29
|
+
|
30
|
+
static auto registerGetInfo = torch::RegisterOperators().op(
|
31
|
+
torch::RegisterOperators::options()
|
32
|
+
.schema(
|
33
|
+
"torchaudio::sox_io_get_info(str path) -> __torch__.torch.classes.torchaudio.SignalInfo info")
|
34
|
+
.catchAllKernel<decltype(sox_io::get_info), &sox_io::get_info>());
|
35
|
+
|
36
|
+
static auto registerLoadAudioFile = torch::RegisterOperators().op(
|
37
|
+
torch::RegisterOperators::options()
|
38
|
+
.schema(
|
39
|
+
"torchaudio::sox_io_load_audio_file(str path, int frame_offset, int num_frames, bool normalize, bool channels_first) -> __torch__.torch.classes.torchaudio.TensorSignal signal")
|
40
|
+
.catchAllKernel<
|
41
|
+
decltype(sox_io::load_audio_file),
|
42
|
+
&sox_io::load_audio_file>());
|
43
|
+
|
44
|
+
static auto registerSaveAudioFile = torch::RegisterOperators().op(
|
45
|
+
torch::RegisterOperators::options()
|
46
|
+
.schema(
|
47
|
+
"torchaudio::sox_io_save_audio_file(str path, __torch__.torch.classes.torchaudio.TensorSignal signal, float compression) -> ()")
|
48
|
+
.catchAllKernel<
|
49
|
+
decltype(sox_io::save_audio_file),
|
50
|
+
&sox_io::save_audio_file>());
|
51
|
+
|
52
|
+
////////////////////////////////////////////////////////////////////////////////
|
53
|
+
// sox_effects.h
|
54
|
+
////////////////////////////////////////////////////////////////////////////////
|
55
|
+
static auto registerSoxEffects =
|
56
|
+
torch::RegisterOperators(
|
57
|
+
"torchaudio::sox_effects_initialize_sox_effects",
|
58
|
+
&sox_effects::initialize_sox_effects)
|
59
|
+
.op("torchaudio::sox_effects_shutdown_sox_effects",
|
60
|
+
&sox_effects::shutdown_sox_effects)
|
61
|
+
.op("torchaudio::sox_effects_list_effects", &sox_effects::list_effects);
|
62
|
+
|
63
|
+
} // namespace
|
64
|
+
} // namespace torchaudio
|
65
|
+
#endif
|
@@ -0,0 +1,361 @@
|
|
1
|
+
#include <torchaudio/csrc/sox.h>
|
2
|
+
|
3
|
+
#include <algorithm>
|
4
|
+
#include <cstdint>
|
5
|
+
#include <stdexcept>
|
6
|
+
#include <vector>
|
7
|
+
|
8
|
+
namespace torch {
|
9
|
+
namespace audio {
|
10
|
+
namespace {
|
11
|
+
/// Helper struct to safely close the sox_format_t descriptor.
|
12
|
+
struct SoxDescriptor {
|
13
|
+
explicit SoxDescriptor(sox_format_t* fd) noexcept : fd_(fd) {}
|
14
|
+
SoxDescriptor(const SoxDescriptor& other) = delete;
|
15
|
+
SoxDescriptor(SoxDescriptor&& other) = delete;
|
16
|
+
SoxDescriptor& operator=(const SoxDescriptor& other) = delete;
|
17
|
+
SoxDescriptor& operator=(SoxDescriptor&& other) = delete;
|
18
|
+
~SoxDescriptor() {
|
19
|
+
if (fd_ != nullptr) {
|
20
|
+
sox_close(fd_);
|
21
|
+
}
|
22
|
+
}
|
23
|
+
sox_format_t* operator->() noexcept {
|
24
|
+
return fd_;
|
25
|
+
}
|
26
|
+
sox_format_t* get() noexcept {
|
27
|
+
return fd_;
|
28
|
+
}
|
29
|
+
|
30
|
+
private:
|
31
|
+
sox_format_t* fd_;
|
32
|
+
};
|
33
|
+
|
34
|
+
int64_t write_audio(SoxDescriptor& fd, at::Tensor tensor) {
|
35
|
+
std::vector<sox_sample_t> buffer(tensor.numel());
|
36
|
+
|
37
|
+
AT_DISPATCH_ALL_TYPES(tensor.scalar_type(), "write_audio_buffer", [&] {
|
38
|
+
auto* data = tensor.data_ptr<scalar_t>();
|
39
|
+
std::copy(data, data + tensor.numel(), buffer.begin());
|
40
|
+
});
|
41
|
+
|
42
|
+
const auto samples_written =
|
43
|
+
sox_write(fd.get(), buffer.data(), buffer.size());
|
44
|
+
|
45
|
+
return samples_written;
|
46
|
+
}
|
47
|
+
|
48
|
+
void read_audio(
|
49
|
+
SoxDescriptor& fd,
|
50
|
+
at::Tensor output,
|
51
|
+
int64_t buffer_length) {
|
52
|
+
std::vector<sox_sample_t> buffer(buffer_length);
|
53
|
+
|
54
|
+
int number_of_channels = fd->signal.channels;
|
55
|
+
const int64_t samples_read = sox_read(fd.get(), buffer.data(), buffer_length);
|
56
|
+
if (samples_read == 0) {
|
57
|
+
throw std::runtime_error(
|
58
|
+
"Error reading audio file: empty file or read failed in sox_read");
|
59
|
+
}
|
60
|
+
|
61
|
+
output.resize_({samples_read / number_of_channels, number_of_channels});
|
62
|
+
output = output.contiguous();
|
63
|
+
|
64
|
+
AT_DISPATCH_ALL_TYPES(output.scalar_type(), "read_audio_buffer", [&] {
|
65
|
+
auto* data = output.data_ptr<scalar_t>();
|
66
|
+
std::copy(buffer.begin(), buffer.begin() + samples_read, data);
|
67
|
+
});
|
68
|
+
}
|
69
|
+
} // namespace
|
70
|
+
|
71
|
+
std::tuple<sox_signalinfo_t, sox_encodinginfo_t> get_info(
|
72
|
+
const std::string& file_name
|
73
|
+
) {
|
74
|
+
SoxDescriptor fd(sox_open_read(
|
75
|
+
file_name.c_str(),
|
76
|
+
/*signal=*/nullptr,
|
77
|
+
/*encoding=*/nullptr,
|
78
|
+
/*filetype=*/nullptr));
|
79
|
+
if (fd.get() == nullptr) {
|
80
|
+
throw std::runtime_error("Error opening audio file");
|
81
|
+
}
|
82
|
+
return std::make_tuple(fd->signal, fd->encoding);
|
83
|
+
}
|
84
|
+
|
85
|
+
int read_audio_file(
|
86
|
+
const std::string& file_name,
|
87
|
+
at::Tensor output,
|
88
|
+
bool ch_first,
|
89
|
+
int64_t nframes,
|
90
|
+
int64_t offset,
|
91
|
+
sox_signalinfo_t* si,
|
92
|
+
sox_encodinginfo_t* ei,
|
93
|
+
const char* ft) {
|
94
|
+
|
95
|
+
SoxDescriptor fd(sox_open_read(file_name.c_str(), si, ei, ft));
|
96
|
+
if (fd.get() == nullptr) {
|
97
|
+
throw std::runtime_error("Error opening audio file");
|
98
|
+
}
|
99
|
+
|
100
|
+
// signal info
|
101
|
+
|
102
|
+
const int number_of_channels = fd->signal.channels;
|
103
|
+
const int sample_rate = fd->signal.rate;
|
104
|
+
const int64_t total_length = fd->signal.length;
|
105
|
+
|
106
|
+
// multiply offset and number of frames by number of channels
|
107
|
+
offset *= number_of_channels;
|
108
|
+
nframes *= number_of_channels;
|
109
|
+
|
110
|
+
if (total_length == 0) {
|
111
|
+
throw std::runtime_error("Error reading audio file: unknown length");
|
112
|
+
}
|
113
|
+
if (offset > total_length) {
|
114
|
+
throw std::runtime_error("Offset past EOF");
|
115
|
+
}
|
116
|
+
|
117
|
+
// calculate buffer length
|
118
|
+
int64_t buffer_length = total_length;
|
119
|
+
if (offset > 0) {
|
120
|
+
buffer_length -= offset;
|
121
|
+
}
|
122
|
+
if (nframes > 0 && buffer_length > nframes) {
|
123
|
+
buffer_length = nframes;
|
124
|
+
}
|
125
|
+
|
126
|
+
// seek to offset point before reading data
|
127
|
+
if (sox_seek(fd.get(), offset, 0) == SOX_EOF) {
|
128
|
+
throw std::runtime_error("sox_seek reached EOF, try reducing offset or num_samples");
|
129
|
+
}
|
130
|
+
|
131
|
+
// read data and fill output tensor
|
132
|
+
read_audio(fd, output, buffer_length);
|
133
|
+
|
134
|
+
// L x C -> C x L, if desired
|
135
|
+
if (ch_first) {
|
136
|
+
output.transpose_(1, 0);
|
137
|
+
}
|
138
|
+
|
139
|
+
return sample_rate;
|
140
|
+
}
|
141
|
+
|
142
|
+
void write_audio_file(
|
143
|
+
const std::string& file_name,
|
144
|
+
const at::Tensor& tensor,
|
145
|
+
sox_signalinfo_t* si,
|
146
|
+
sox_encodinginfo_t* ei,
|
147
|
+
const char* file_type) {
|
148
|
+
if (!tensor.is_contiguous()) {
|
149
|
+
throw std::runtime_error(
|
150
|
+
"Error writing audio file: input tensor must be contiguous");
|
151
|
+
}
|
152
|
+
|
153
|
+
#if SOX_LIB_VERSION_CODE >= 918272 // >= 14.3.0
|
154
|
+
si->mult = nullptr;
|
155
|
+
#endif
|
156
|
+
|
157
|
+
SoxDescriptor fd(sox_open_write(
|
158
|
+
file_name.c_str(),
|
159
|
+
si,
|
160
|
+
ei,
|
161
|
+
file_type,
|
162
|
+
/*oob=*/nullptr,
|
163
|
+
/*overwrite=*/nullptr));
|
164
|
+
|
165
|
+
if (fd.get() == nullptr) {
|
166
|
+
throw std::runtime_error(
|
167
|
+
"Error writing audio file: could not open file for writing");
|
168
|
+
}
|
169
|
+
|
170
|
+
const auto samples_written = write_audio(fd, tensor);
|
171
|
+
|
172
|
+
if (samples_written != tensor.numel()) {
|
173
|
+
throw std::runtime_error(
|
174
|
+
"Error writing audio file: could not write entire buffer");
|
175
|
+
}
|
176
|
+
}
|
177
|
+
|
178
|
+
int build_flow_effects(const std::string& file_name,
|
179
|
+
at::Tensor otensor,
|
180
|
+
bool ch_first,
|
181
|
+
sox_signalinfo_t* target_signal,
|
182
|
+
sox_encodinginfo_t* target_encoding,
|
183
|
+
const char* file_type,
|
184
|
+
std::vector<SoxEffect> pyeffs,
|
185
|
+
int max_num_eopts) {
|
186
|
+
|
187
|
+
/* This function builds an effects flow and puts the results into a tensor.
|
188
|
+
It can also be used to re-encode audio using any of the available encoding
|
189
|
+
options in SoX including sample rate and channel re-encoding. */
|
190
|
+
|
191
|
+
// open input
|
192
|
+
sox_format_t* input = sox_open_read(file_name.c_str(), nullptr, nullptr, nullptr);
|
193
|
+
if (input == nullptr) {
|
194
|
+
throw std::runtime_error("Error opening audio file");
|
195
|
+
}
|
196
|
+
|
197
|
+
// only used if target signal or encoding are null
|
198
|
+
sox_signalinfo_t empty_signal;
|
199
|
+
sox_encodinginfo_t empty_encoding;
|
200
|
+
|
201
|
+
// set signalinfo and encodinginfo if blank
|
202
|
+
if(target_signal == nullptr) {
|
203
|
+
target_signal = &empty_signal;
|
204
|
+
target_signal->rate = input->signal.rate;
|
205
|
+
target_signal->channels = input->signal.channels;
|
206
|
+
target_signal->length = SOX_UNSPEC;
|
207
|
+
target_signal->precision = input->signal.precision;
|
208
|
+
#if SOX_LIB_VERSION_CODE >= 918272 // >= 14.3.0
|
209
|
+
target_signal->mult = nullptr;
|
210
|
+
#endif
|
211
|
+
}
|
212
|
+
if(target_encoding == nullptr) {
|
213
|
+
target_encoding = &empty_encoding;
|
214
|
+
target_encoding->encoding = SOX_ENCODING_SIGN2; // Sample format
|
215
|
+
target_encoding->bits_per_sample = input->signal.precision; // Bits per sample
|
216
|
+
target_encoding->compression = 0.0; // Compression factor
|
217
|
+
target_encoding->reverse_bytes = sox_option_default; // Should bytes be reversed
|
218
|
+
target_encoding->reverse_nibbles = sox_option_default; // Should nibbles be reversed
|
219
|
+
target_encoding->reverse_bits = sox_option_default; // Should bits be reversed (pairs of bits?)
|
220
|
+
target_encoding->opposite_endian = sox_false; // Reverse endianness
|
221
|
+
}
|
222
|
+
|
223
|
+
// check for rate or channels effect and change the output signalinfo accordingly
|
224
|
+
for (SoxEffect se : pyeffs) {
|
225
|
+
if (se.ename == "rate") {
|
226
|
+
target_signal->rate = std::stod(se.eopts[0]);
|
227
|
+
} else if (se.ename == "channels") {
|
228
|
+
target_signal->channels = std::stoi(se.eopts[0]);
|
229
|
+
}
|
230
|
+
}
|
231
|
+
|
232
|
+
// create interm_signal for effects, intermediate steps change this in-place
|
233
|
+
sox_signalinfo_t interm_signal = input->signal;
|
234
|
+
|
235
|
+
#ifdef __APPLE__
|
236
|
+
// According to Mozilla Deepspeech sox_open_memstream_write doesn't work
|
237
|
+
// with OSX
|
238
|
+
char tmp_name[] = "/tmp/fileXXXXXX";
|
239
|
+
int tmp_fd = mkstemp(tmp_name);
|
240
|
+
close(tmp_fd);
|
241
|
+
sox_format_t* output = sox_open_write(tmp_name, target_signal,
|
242
|
+
target_encoding, "wav", nullptr, nullptr);
|
243
|
+
#else
|
244
|
+
// create buffer and buffer_size for output in memwrite
|
245
|
+
char* buffer;
|
246
|
+
size_t buffer_size;
|
247
|
+
// in-memory descriptor (this may not work for OSX)
|
248
|
+
sox_format_t* output = sox_open_memstream_write(&buffer,
|
249
|
+
&buffer_size,
|
250
|
+
target_signal,
|
251
|
+
target_encoding,
|
252
|
+
file_type, nullptr);
|
253
|
+
#endif
|
254
|
+
if (output == nullptr) {
|
255
|
+
throw std::runtime_error("Error opening output memstream/temporary file");
|
256
|
+
}
|
257
|
+
// Setup the effects chain to decode/resample
|
258
|
+
sox_effects_chain_t* chain =
|
259
|
+
sox_create_effects_chain(&input->encoding, &output->encoding);
|
260
|
+
|
261
|
+
sox_effect_t* e = sox_create_effect(sox_find_effect("input"));
|
262
|
+
char* io_args[1];
|
263
|
+
io_args[0] = (char*)input;
|
264
|
+
sox_effect_options(e, 1, io_args);
|
265
|
+
sox_add_effect(chain, e, &interm_signal, &input->signal);
|
266
|
+
free(e);
|
267
|
+
|
268
|
+
for(SoxEffect tae : pyeffs) {
|
269
|
+
if(tae.ename == "no_effects") break;
|
270
|
+
e = sox_create_effect(sox_find_effect(tae.ename.c_str()));
|
271
|
+
e->global_info->global_info->verbosity = 1;
|
272
|
+
if(tae.eopts[0] == "") {
|
273
|
+
sox_effect_options(e, 0, nullptr);
|
274
|
+
} else {
|
275
|
+
int num_opts = tae.eopts.size();
|
276
|
+
char* sox_args[max_num_eopts];
|
277
|
+
for(std::vector<std::string>::size_type i = 0; i != tae.eopts.size(); i++) {
|
278
|
+
sox_args[i] = (char*) tae.eopts[i].c_str();
|
279
|
+
}
|
280
|
+
if(sox_effect_options(e, num_opts, sox_args) != SOX_SUCCESS) {
|
281
|
+
#ifdef __APPLE__
|
282
|
+
unlink(tmp_name);
|
283
|
+
#endif
|
284
|
+
throw std::runtime_error("invalid effect options, see SoX docs for details");
|
285
|
+
}
|
286
|
+
}
|
287
|
+
sox_add_effect(chain, e, &interm_signal, &output->signal);
|
288
|
+
free(e);
|
289
|
+
}
|
290
|
+
|
291
|
+
e = sox_create_effect(sox_find_effect("output"));
|
292
|
+
io_args[0] = (char*)output;
|
293
|
+
sox_effect_options(e, 1, io_args);
|
294
|
+
sox_add_effect(chain, e, &interm_signal, &output->signal);
|
295
|
+
free(e);
|
296
|
+
|
297
|
+
// Finally run the effects chain
|
298
|
+
sox_flow_effects(chain, nullptr, nullptr);
|
299
|
+
sox_delete_effects_chain(chain);
|
300
|
+
|
301
|
+
// Close sox handles, buffer does not get properly sized until these are closed
|
302
|
+
sox_close(output);
|
303
|
+
sox_close(input);
|
304
|
+
|
305
|
+
int sr;
|
306
|
+
// Read the in-memory audio buffer or temp file that we just wrote.
|
307
|
+
#ifdef __APPLE__
|
308
|
+
/*
|
309
|
+
Temporary filetype must have a valid header. Wav seems to work here while
|
310
|
+
raw does not. Certain effects like chorus caused strange behavior on the mac.
|
311
|
+
*/
|
312
|
+
// read_audio_file reads the temporary file and returns the sr and otensor
|
313
|
+
sr = read_audio_file(tmp_name, otensor, ch_first, 0, 0,
|
314
|
+
target_signal, target_encoding, "wav");
|
315
|
+
// delete temporary audio file
|
316
|
+
unlink(tmp_name);
|
317
|
+
#else
|
318
|
+
// Resize output tensor to desired dimensions, different effects result in output->signal.length,
|
319
|
+
// interm_signal.length and buffer size being inconsistent with the result of the file output.
|
320
|
+
// We prioritize in the order: output->signal.length > interm_signal.length > buffer_size
|
321
|
+
// Could be related to: https://sourceforge.net/p/sox/bugs/314/
|
322
|
+
int nc, ns;
|
323
|
+
if (output->signal.length == 0) {
|
324
|
+
// sometimes interm_signal length is extremely large, but the buffer_size
|
325
|
+
// is double the length of the output signal
|
326
|
+
if (interm_signal.length > (buffer_size * 10)) {
|
327
|
+
ns = buffer_size / 2;
|
328
|
+
} else {
|
329
|
+
ns = interm_signal.length;
|
330
|
+
}
|
331
|
+
nc = interm_signal.channels;
|
332
|
+
} else {
|
333
|
+
nc = output->signal.channels;
|
334
|
+
ns = output->signal.length;
|
335
|
+
}
|
336
|
+
otensor.resize_({ns/nc, nc});
|
337
|
+
otensor = otensor.contiguous();
|
338
|
+
|
339
|
+
input = sox_open_mem_read(buffer, buffer_size, target_signal, target_encoding, file_type);
|
340
|
+
std::vector<sox_sample_t> samples(buffer_size);
|
341
|
+
const int64_t samples_read = sox_read(input, samples.data(), buffer_size);
|
342
|
+
assert(samples_read != nc * ns && samples_read != 0);
|
343
|
+
AT_DISPATCH_ALL_TYPES(otensor.scalar_type(), "effects_buffer", [&] {
|
344
|
+
auto* data = otensor.data_ptr<scalar_t>();
|
345
|
+
std::copy(samples.begin(), samples.begin() + samples_read, data);
|
346
|
+
});
|
347
|
+
// free buffer and close mem_read
|
348
|
+
sox_close(input);
|
349
|
+
free(buffer);
|
350
|
+
|
351
|
+
if (ch_first) {
|
352
|
+
otensor.transpose_(1, 0);
|
353
|
+
}
|
354
|
+
sr = target_signal->rate;
|
355
|
+
|
356
|
+
#endif
|
357
|
+
// return sample rate, output tensor modified in-place
|
358
|
+
return sr;
|
359
|
+
}
|
360
|
+
} // namespace audio
|
361
|
+
} // namespace torch
|