torchaudio 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,361 +0,0 @@
1
- #include <torchaudio/csrc/sox.h>
2
-
3
- #include <algorithm>
4
- #include <cstdint>
5
- #include <stdexcept>
6
- #include <vector>
7
-
8
- namespace torch {
9
- namespace audio {
10
- namespace {
11
- /// Helper struct to safely close the sox_format_t descriptor.
12
- struct SoxDescriptor {
13
- explicit SoxDescriptor(sox_format_t* fd) noexcept : fd_(fd) {}
14
- SoxDescriptor(const SoxDescriptor& other) = delete;
15
- SoxDescriptor(SoxDescriptor&& other) = delete;
16
- SoxDescriptor& operator=(const SoxDescriptor& other) = delete;
17
- SoxDescriptor& operator=(SoxDescriptor&& other) = delete;
18
- ~SoxDescriptor() {
19
- if (fd_ != nullptr) {
20
- sox_close(fd_);
21
- }
22
- }
23
- sox_format_t* operator->() noexcept {
24
- return fd_;
25
- }
26
- sox_format_t* get() noexcept {
27
- return fd_;
28
- }
29
-
30
- private:
31
- sox_format_t* fd_;
32
- };
33
-
34
- int64_t write_audio(SoxDescriptor& fd, at::Tensor tensor) {
35
- std::vector<sox_sample_t> buffer(tensor.numel());
36
-
37
- AT_DISPATCH_ALL_TYPES(tensor.scalar_type(), "write_audio_buffer", [&] {
38
- auto* data = tensor.data_ptr<scalar_t>();
39
- std::copy(data, data + tensor.numel(), buffer.begin());
40
- });
41
-
42
- const auto samples_written =
43
- sox_write(fd.get(), buffer.data(), buffer.size());
44
-
45
- return samples_written;
46
- }
47
-
48
- void read_audio(
49
- SoxDescriptor& fd,
50
- at::Tensor output,
51
- int64_t buffer_length) {
52
- std::vector<sox_sample_t> buffer(buffer_length);
53
-
54
- int number_of_channels = fd->signal.channels;
55
- const int64_t samples_read = sox_read(fd.get(), buffer.data(), buffer_length);
56
- if (samples_read == 0) {
57
- throw std::runtime_error(
58
- "Error reading audio file: empty file or read failed in sox_read");
59
- }
60
-
61
- output.resize_({samples_read / number_of_channels, number_of_channels});
62
- output = output.contiguous();
63
-
64
- AT_DISPATCH_ALL_TYPES(output.scalar_type(), "read_audio_buffer", [&] {
65
- auto* data = output.data_ptr<scalar_t>();
66
- std::copy(buffer.begin(), buffer.begin() + samples_read, data);
67
- });
68
- }
69
- } // namespace
70
-
71
- std::tuple<sox_signalinfo_t, sox_encodinginfo_t> get_info(
72
- const std::string& file_name
73
- ) {
74
- SoxDescriptor fd(sox_open_read(
75
- file_name.c_str(),
76
- /*signal=*/nullptr,
77
- /*encoding=*/nullptr,
78
- /*filetype=*/nullptr));
79
- if (fd.get() == nullptr) {
80
- throw std::runtime_error("Error opening audio file");
81
- }
82
- return std::make_tuple(fd->signal, fd->encoding);
83
- }
84
-
85
- int read_audio_file(
86
- const std::string& file_name,
87
- at::Tensor output,
88
- bool ch_first,
89
- int64_t nframes,
90
- int64_t offset,
91
- sox_signalinfo_t* si,
92
- sox_encodinginfo_t* ei,
93
- const char* ft) {
94
-
95
- SoxDescriptor fd(sox_open_read(file_name.c_str(), si, ei, ft));
96
- if (fd.get() == nullptr) {
97
- throw std::runtime_error("Error opening audio file");
98
- }
99
-
100
- // signal info
101
-
102
- const int number_of_channels = fd->signal.channels;
103
- const int sample_rate = fd->signal.rate;
104
- const int64_t total_length = fd->signal.length;
105
-
106
- // multiply offset and number of frames by number of channels
107
- offset *= number_of_channels;
108
- nframes *= number_of_channels;
109
-
110
- if (total_length == 0) {
111
- throw std::runtime_error("Error reading audio file: unknown length");
112
- }
113
- if (offset > total_length) {
114
- throw std::runtime_error("Offset past EOF");
115
- }
116
-
117
- // calculate buffer length
118
- int64_t buffer_length = total_length;
119
- if (offset > 0) {
120
- buffer_length -= offset;
121
- }
122
- if (nframes > 0 && buffer_length > nframes) {
123
- buffer_length = nframes;
124
- }
125
-
126
- // seek to offset point before reading data
127
- if (sox_seek(fd.get(), offset, 0) == SOX_EOF) {
128
- throw std::runtime_error("sox_seek reached EOF, try reducing offset or num_samples");
129
- }
130
-
131
- // read data and fill output tensor
132
- read_audio(fd, output, buffer_length);
133
-
134
- // L x C -> C x L, if desired
135
- if (ch_first) {
136
- output.transpose_(1, 0);
137
- }
138
-
139
- return sample_rate;
140
- }
141
-
142
- void write_audio_file(
143
- const std::string& file_name,
144
- const at::Tensor& tensor,
145
- sox_signalinfo_t* si,
146
- sox_encodinginfo_t* ei,
147
- const char* file_type) {
148
- if (!tensor.is_contiguous()) {
149
- throw std::runtime_error(
150
- "Error writing audio file: input tensor must be contiguous");
151
- }
152
-
153
- #if SOX_LIB_VERSION_CODE >= 918272 // >= 14.3.0
154
- si->mult = nullptr;
155
- #endif
156
-
157
- SoxDescriptor fd(sox_open_write(
158
- file_name.c_str(),
159
- si,
160
- ei,
161
- file_type,
162
- /*oob=*/nullptr,
163
- /*overwrite=*/nullptr));
164
-
165
- if (fd.get() == nullptr) {
166
- throw std::runtime_error(
167
- "Error writing audio file: could not open file for writing");
168
- }
169
-
170
- const auto samples_written = write_audio(fd, tensor);
171
-
172
- if (samples_written != tensor.numel()) {
173
- throw std::runtime_error(
174
- "Error writing audio file: could not write entire buffer");
175
- }
176
- }
177
-
178
- int build_flow_effects(const std::string& file_name,
179
- at::Tensor otensor,
180
- bool ch_first,
181
- sox_signalinfo_t* target_signal,
182
- sox_encodinginfo_t* target_encoding,
183
- const char* file_type,
184
- std::vector<SoxEffect> pyeffs,
185
- int max_num_eopts) {
186
-
187
- /* This function builds an effects flow and puts the results into a tensor.
188
- It can also be used to re-encode audio using any of the available encoding
189
- options in SoX including sample rate and channel re-encoding. */
190
-
191
- // open input
192
- sox_format_t* input = sox_open_read(file_name.c_str(), nullptr, nullptr, nullptr);
193
- if (input == nullptr) {
194
- throw std::runtime_error("Error opening audio file");
195
- }
196
-
197
- // only used if target signal or encoding are null
198
- sox_signalinfo_t empty_signal;
199
- sox_encodinginfo_t empty_encoding;
200
-
201
- // set signalinfo and encodinginfo if blank
202
- if(target_signal == nullptr) {
203
- target_signal = &empty_signal;
204
- target_signal->rate = input->signal.rate;
205
- target_signal->channels = input->signal.channels;
206
- target_signal->length = SOX_UNSPEC;
207
- target_signal->precision = input->signal.precision;
208
- #if SOX_LIB_VERSION_CODE >= 918272 // >= 14.3.0
209
- target_signal->mult = nullptr;
210
- #endif
211
- }
212
- if(target_encoding == nullptr) {
213
- target_encoding = &empty_encoding;
214
- target_encoding->encoding = SOX_ENCODING_SIGN2; // Sample format
215
- target_encoding->bits_per_sample = input->signal.precision; // Bits per sample
216
- target_encoding->compression = 0.0; // Compression factor
217
- target_encoding->reverse_bytes = sox_option_default; // Should bytes be reversed
218
- target_encoding->reverse_nibbles = sox_option_default; // Should nibbles be reversed
219
- target_encoding->reverse_bits = sox_option_default; // Should bits be reversed (pairs of bits?)
220
- target_encoding->opposite_endian = sox_false; // Reverse endianness
221
- }
222
-
223
- // check for rate or channels effect and change the output signalinfo accordingly
224
- for (SoxEffect se : pyeffs) {
225
- if (se.ename == "rate") {
226
- target_signal->rate = std::stod(se.eopts[0]);
227
- } else if (se.ename == "channels") {
228
- target_signal->channels = std::stoi(se.eopts[0]);
229
- }
230
- }
231
-
232
- // create interm_signal for effects, intermediate steps change this in-place
233
- sox_signalinfo_t interm_signal = input->signal;
234
-
235
- #ifdef __APPLE__
236
- // According to Mozilla Deepspeech sox_open_memstream_write doesn't work
237
- // with OSX
238
- char tmp_name[] = "/tmp/fileXXXXXX";
239
- int tmp_fd = mkstemp(tmp_name);
240
- close(tmp_fd);
241
- sox_format_t* output = sox_open_write(tmp_name, target_signal,
242
- target_encoding, "wav", nullptr, nullptr);
243
- #else
244
- // create buffer and buffer_size for output in memwrite
245
- char* buffer;
246
- size_t buffer_size;
247
- // in-memory descriptor (this may not work for OSX)
248
- sox_format_t* output = sox_open_memstream_write(&buffer,
249
- &buffer_size,
250
- target_signal,
251
- target_encoding,
252
- file_type, nullptr);
253
- #endif
254
- if (output == nullptr) {
255
- throw std::runtime_error("Error opening output memstream/temporary file");
256
- }
257
- // Setup the effects chain to decode/resample
258
- sox_effects_chain_t* chain =
259
- sox_create_effects_chain(&input->encoding, &output->encoding);
260
-
261
- sox_effect_t* e = sox_create_effect(sox_find_effect("input"));
262
- char* io_args[1];
263
- io_args[0] = (char*)input;
264
- sox_effect_options(e, 1, io_args);
265
- sox_add_effect(chain, e, &interm_signal, &input->signal);
266
- free(e);
267
-
268
- for(SoxEffect tae : pyeffs) {
269
- if(tae.ename == "no_effects") break;
270
- e = sox_create_effect(sox_find_effect(tae.ename.c_str()));
271
- e->global_info->global_info->verbosity = 1;
272
- if(tae.eopts[0] == "") {
273
- sox_effect_options(e, 0, nullptr);
274
- } else {
275
- int num_opts = tae.eopts.size();
276
- char* sox_args[max_num_eopts];
277
- for(std::vector<std::string>::size_type i = 0; i != tae.eopts.size(); i++) {
278
- sox_args[i] = (char*) tae.eopts[i].c_str();
279
- }
280
- if(sox_effect_options(e, num_opts, sox_args) != SOX_SUCCESS) {
281
- #ifdef __APPLE__
282
- unlink(tmp_name);
283
- #endif
284
- throw std::runtime_error("invalid effect options, see SoX docs for details");
285
- }
286
- }
287
- sox_add_effect(chain, e, &interm_signal, &output->signal);
288
- free(e);
289
- }
290
-
291
- e = sox_create_effect(sox_find_effect("output"));
292
- io_args[0] = (char*)output;
293
- sox_effect_options(e, 1, io_args);
294
- sox_add_effect(chain, e, &interm_signal, &output->signal);
295
- free(e);
296
-
297
- // Finally run the effects chain
298
- sox_flow_effects(chain, nullptr, nullptr);
299
- sox_delete_effects_chain(chain);
300
-
301
- // Close sox handles, buffer does not get properly sized until these are closed
302
- sox_close(output);
303
- sox_close(input);
304
-
305
- int sr;
306
- // Read the in-memory audio buffer or temp file that we just wrote.
307
- #ifdef __APPLE__
308
- /*
309
- Temporary filetype must have a valid header. Wav seems to work here while
310
- raw does not. Certain effects like chorus caused strange behavior on the mac.
311
- */
312
- // read_audio_file reads the temporary file and returns the sr and otensor
313
- sr = read_audio_file(tmp_name, otensor, ch_first, 0, 0,
314
- target_signal, target_encoding, "wav");
315
- // delete temporary audio file
316
- unlink(tmp_name);
317
- #else
318
- // Resize output tensor to desired dimensions, different effects result in output->signal.length,
319
- // interm_signal.length and buffer size being inconsistent with the result of the file output.
320
- // We prioritize in the order: output->signal.length > interm_signal.length > buffer_size
321
- // Could be related to: https://sourceforge.net/p/sox/bugs/314/
322
- int nc, ns;
323
- if (output->signal.length == 0) {
324
- // sometimes interm_signal length is extremely large, but the buffer_size
325
- // is double the length of the output signal
326
- if (interm_signal.length > (buffer_size * 10)) {
327
- ns = buffer_size / 2;
328
- } else {
329
- ns = interm_signal.length;
330
- }
331
- nc = interm_signal.channels;
332
- } else {
333
- nc = output->signal.channels;
334
- ns = output->signal.length;
335
- }
336
- otensor.resize_({ns/nc, nc});
337
- otensor = otensor.contiguous();
338
-
339
- input = sox_open_mem_read(buffer, buffer_size, target_signal, target_encoding, file_type);
340
- std::vector<sox_sample_t> samples(buffer_size);
341
- const int64_t samples_read = sox_read(input, samples.data(), buffer_size);
342
- assert(samples_read != nc * ns && samples_read != 0);
343
- AT_DISPATCH_ALL_TYPES(otensor.scalar_type(), "effects_buffer", [&] {
344
- auto* data = otensor.data_ptr<scalar_t>();
345
- std::copy(samples.begin(), samples.begin() + samples_read, data);
346
- });
347
- // free buffer and close mem_read
348
- sox_close(input);
349
- free(buffer);
350
-
351
- if (ch_first) {
352
- otensor.transpose_(1, 0);
353
- }
354
- sr = target_signal->rate;
355
-
356
- #endif
357
- // return sample rate, output tensor modified in-place
358
- return sr;
359
- }
360
- } // namespace audio
361
- } // namespace torch
@@ -1,71 +0,0 @@
1
- #include <sox.h>
2
-
3
- #include <string>
4
- #include <tuple>
5
- #include <vector>
6
- #include <unistd.h>
7
-
8
- // same as <torch/extension.h> without <torch/python.h>
9
- #include <torch/all.h>
10
-
11
- namespace at {
12
- struct Tensor;
13
- } // namespace at
14
-
15
- namespace torch { namespace audio {
16
-
17
- /// Reads an audio file from the given `path` into the `output` `Tensor` and
18
- /// returns the sample rate of the audio file.
19
- /// Throws `std::runtime_error` if the audio file could not be opened, or an
20
- /// error occurred during reading of the audio data.
21
- int read_audio_file(
22
- const std::string& file_name,
23
- at::Tensor output,
24
- bool ch_first,
25
- int64_t nframes,
26
- int64_t offset,
27
- sox_signalinfo_t* si,
28
- sox_encodinginfo_t* ei,
29
- const char* ft);
30
-
31
- /// Writes the data of a `Tensor` into an audio file at the given `path`, with
32
- /// a certain extension (e.g. `wav`or `mp3`) and sample rate.
33
- /// Throws `std::runtime_error` when the audio file could not be opened for
34
- /// writing, or an error occurred during writing of the audio data.
35
- void write_audio_file(
36
- const std::string& file_name,
37
- const at::Tensor& tensor,
38
- sox_signalinfo_t* si,
39
- sox_encodinginfo_t* ei,
40
- const char* file_type);
41
-
42
- /// Reads an audio file from the given `path` and returns a tuple of
43
- /// sox_signalinfo_t and sox_encodinginfo_t, which contain information about
44
- /// the audio file such as sample rate, length, bit precision, encoding and more.
45
- /// Throws `std::runtime_error` if the audio file could not be opened, or an
46
- /// error occurred during reading of the audio data.
47
- std::tuple<sox_signalinfo_t, sox_encodinginfo_t> get_info(
48
- const std::string& file_name);
49
-
50
- // Struct for build_flow_effects function
51
- struct SoxEffect {
52
- SoxEffect() : ename(""), eopts({""}) { }
53
- std::string ename;
54
- std::vector<std::string> eopts;
55
- };
56
-
57
- /// Build a SoX chain, flow the effects, and capture the results in a tensor.
58
- /// An audio file from the given `path` flows through an effects chain given
59
- /// by a list of effects and effect options to an output buffer which is encoded
60
- /// into memory to a target signal type and target signal encoding. The resulting
61
- /// buffer is then placed into a tensor. This function returns the output tensor
62
- /// and the sample rate of the output tensor.
63
- int build_flow_effects(const std::string& file_name,
64
- at::Tensor otensor,
65
- bool ch_first,
66
- sox_signalinfo_t* target_signal,
67
- sox_encodinginfo_t* target_encoding,
68
- const char* file_type,
69
- std::vector<SoxEffect> pyeffs,
70
- int max_num_eopts);
71
- }} // namespace torch::audio
@@ -1,54 +0,0 @@
1
- #include <sox.h>
2
- #include <torchaudio/csrc/sox_effects.h>
3
-
4
- using namespace torch::indexing;
5
-
6
- namespace torchaudio {
7
- namespace sox_effects {
8
-
9
- namespace {
10
-
11
- enum SoxEffectsResourceState { NotInitialized, Initialized, ShutDown };
12
- SoxEffectsResourceState SOX_RESOURCE_STATE = NotInitialized;
13
-
14
- } // namespace
15
-
16
- void initialize_sox_effects() {
17
- if (SOX_RESOURCE_STATE == ShutDown) {
18
- throw std::runtime_error(
19
- "SoX Effects has been shut down. Cannot initialize again.");
20
- }
21
- if (SOX_RESOURCE_STATE == NotInitialized) {
22
- if (sox_init() != SOX_SUCCESS) {
23
- throw std::runtime_error("Failed to initialize sox effects.");
24
- };
25
- SOX_RESOURCE_STATE = Initialized;
26
- }
27
- };
28
-
29
- void shutdown_sox_effects() {
30
- if (SOX_RESOURCE_STATE == NotInitialized) {
31
- throw std::runtime_error(
32
- "SoX Effects is not initialized. Cannot shutdown.");
33
- }
34
- if (SOX_RESOURCE_STATE == Initialized) {
35
- if (sox_quit() != SOX_SUCCESS) {
36
- throw std::runtime_error("Failed to initialize sox effects.");
37
- };
38
- SOX_RESOURCE_STATE = ShutDown;
39
- }
40
- }
41
-
42
- std::vector<std::string> list_effects() {
43
- std::vector<std::string> names;
44
- const sox_effect_fn_t* fns = sox_get_effect_fns();
45
- for (int i = 0; fns[i]; ++i) {
46
- const sox_effect_handler_t* handler = fns[i]();
47
- if (handler && handler->name)
48
- names.push_back(handler->name);
49
- }
50
- return names;
51
- }
52
-
53
- } // namespace sox_effects
54
- } // namespace torchaudio
@@ -1,18 +0,0 @@
1
- #ifndef TORCHAUDIO_SOX_EFFECTS_H
2
- #define TORCHAUDIO_SOX_EFFECTS_H
3
-
4
- #include <torch/script.h>
5
-
6
- namespace torchaudio {
7
- namespace sox_effects {
8
-
9
- void initialize_sox_effects();
10
-
11
- void shutdown_sox_effects();
12
-
13
- std::vector<std::string> list_effects();
14
-
15
- } // namespace sox_effects
16
- } // namespace torchaudio
17
-
18
- #endif
@@ -1,170 +0,0 @@
1
- #include <sox.h>
2
- #include <torchaudio/csrc/sox_io.h>
3
- #include <torchaudio/csrc/sox_utils.h>
4
-
5
- using namespace torch::indexing;
6
- using namespace torchaudio::sox_utils;
7
-
8
- namespace torchaudio {
9
- namespace sox_io {
10
-
11
- SignalInfo::SignalInfo(
12
- const int64_t sample_rate_,
13
- const int64_t num_channels_,
14
- const int64_t num_frames_)
15
- : sample_rate(sample_rate_),
16
- num_channels(num_channels_),
17
- num_frames(num_frames_){};
18
-
19
- int64_t SignalInfo::getSampleRate() const {
20
- return sample_rate;
21
- }
22
-
23
- int64_t SignalInfo::getNumChannels() const {
24
- return num_channels;
25
- }
26
-
27
- int64_t SignalInfo::getNumFrames() const {
28
- return num_frames;
29
- }
30
-
31
- c10::intrusive_ptr<SignalInfo> get_info(const std::string& path) {
32
- SoxFormat sf(sox_open_read(
33
- path.c_str(),
34
- /*signal=*/nullptr,
35
- /*encoding=*/nullptr,
36
- /*filetype=*/nullptr));
37
-
38
- if (static_cast<sox_format_t*>(sf) == nullptr) {
39
- throw std::runtime_error("Error opening audio file");
40
- }
41
-
42
- return c10::make_intrusive<SignalInfo>(
43
- static_cast<int64_t>(sf->signal.rate),
44
- static_cast<int64_t>(sf->signal.channels),
45
- static_cast<int64_t>(sf->signal.length / sf->signal.channels));
46
- }
47
-
48
- c10::intrusive_ptr<TensorSignal> load_audio_file(
49
- const std::string& path,
50
- const int64_t frame_offset,
51
- const int64_t num_frames,
52
- const bool normalize,
53
- const bool channels_first) {
54
- if (frame_offset < 0) {
55
- throw std::runtime_error(
56
- "Invalid argument: frame_offset must be non-negative.");
57
- }
58
- if (num_frames == 0 || num_frames < -1) {
59
- throw std::runtime_error(
60
- "Invalid argument: num_frames must be -1 or greater than 0.");
61
- }
62
-
63
- SoxFormat sf(sox_open_read(
64
- path.c_str(),
65
- /*signal=*/nullptr,
66
- /*encoding=*/nullptr,
67
- /*filetype=*/nullptr));
68
-
69
- validate_input_file(sf);
70
-
71
- const int64_t num_channels = sf->signal.channels;
72
- const int64_t num_total_samples = sf->signal.length;
73
- const int64_t sample_start = sf->signal.channels * frame_offset;
74
-
75
- if (sox_seek(sf, sample_start, 0) == SOX_EOF) {
76
- throw std::runtime_error("Error reading audio file: offset past EOF.");
77
- }
78
-
79
- const int64_t sample_end = [&]() {
80
- if (num_frames == -1)
81
- return num_total_samples;
82
- const int64_t sample_end_ = num_channels * num_frames + sample_start;
83
- if (num_total_samples < sample_end_) {
84
- // For lossy encoding, it is difficult to predict exact size of buffer for
85
- // reading the number of samples required.
86
- // So we allocate buffer size of given `num_frames` and ask sox to read as
87
- // much as possible. For lossless format, sox reads exact number of
88
- // samples, but for lossy encoding, sox can end up reading less. (i.e.
89
- // mp3) For the consistent behavior specification between lossy/lossless
90
- // format, we allow users to provide `num_frames` value that exceeds #of
91
- // available samples, and we adjust it here.
92
- return num_total_samples;
93
- }
94
- return sample_end_;
95
- }();
96
-
97
- const int64_t max_samples = sample_end - sample_start;
98
-
99
- // Read samples into buffer
100
- std::vector<sox_sample_t> buffer;
101
- buffer.reserve(max_samples);
102
- const int64_t num_samples = sox_read(sf, buffer.data(), max_samples);
103
- if (num_samples == 0) {
104
- throw std::runtime_error(
105
- "Error reading audio file: empty file or read operation failed.");
106
- }
107
- // NOTE: num_samples may be smaller than max_samples if the input
108
- // format is compressed (i.e. mp3).
109
-
110
- // Convert to Tensor
111
- auto tensor = convert_to_tensor(
112
- buffer.data(),
113
- num_samples,
114
- num_channels,
115
- get_dtype(sf->encoding.encoding, sf->signal.precision),
116
- normalize,
117
- channels_first);
118
-
119
- return c10::make_intrusive<TensorSignal>(
120
- tensor, static_cast<int64_t>(sf->signal.rate), channels_first);
121
- }
122
-
123
- void save_audio_file(
124
- const std::string& file_name,
125
- const c10::intrusive_ptr<TensorSignal>& signal,
126
- const double compression) {
127
- const auto tensor = signal->getTensor();
128
- const auto sample_rate = signal->getSampleRate();
129
- const auto channels_first = signal->getChannelsFirst();
130
-
131
- validate_input_tensor(tensor);
132
-
133
- const auto filetype = get_filetype(file_name);
134
- const auto signal_info =
135
- get_signalinfo(tensor, sample_rate, channels_first, filetype);
136
- const auto encoding_info =
137
- get_encodinginfo(filetype, tensor.dtype(), compression);
138
-
139
- SoxFormat sf(sox_open_write(
140
- file_name.c_str(),
141
- &signal_info,
142
- &encoding_info,
143
- /*filetype=*/filetype.c_str(),
144
- /*oob=*/nullptr,
145
- /*overwrite_permitted=*/nullptr));
146
-
147
- if (static_cast<sox_format_t*>(sf) == nullptr) {
148
- throw std::runtime_error("Error saving audio file: failed to open file.");
149
- }
150
-
151
- auto tensor_ = tensor;
152
- if (channels_first) {
153
- tensor_ = tensor_.t();
154
- }
155
-
156
- const int64_t frames_per_chunk = 65536;
157
- for (int64_t i = 0; i < tensor_.size(0); i += frames_per_chunk) {
158
- auto chunk = tensor_.index({Slice(i, i + frames_per_chunk), Slice()});
159
- chunk = unnormalize_wav(chunk).contiguous();
160
-
161
- const size_t numel = chunk.numel();
162
- if (sox_write(sf, chunk.data_ptr<int32_t>(), numel) != numel) {
163
- throw std::runtime_error(
164
- "Error saving audio file: failed to write the entier buffer.");
165
- }
166
- }
167
- }
168
-
169
- } // namespace sox_io
170
- } // namespace torchaudio