RubyGems - torchaudio - Versions diffs - 0.4.0 → 0.5.0 - Mend

torchaudio 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +11 -0
data/LICENSE.txt +1 -1
data/README.md +3 -47
data/lib/torchaudio/transforms/mel_spectrogram.rb +12 -4
data/lib/torchaudio/transforms/spectrogram.rb +11 -4
data/lib/torchaudio/version.rb +1 -1
data/lib/torchaudio.rb +113 -99
metadata +5 -35
data/ext/torchaudio/csrc/register.cpp +0 -65
data/ext/torchaudio/csrc/sox.cpp +0 -361
data/ext/torchaudio/csrc/sox.h +0 -71
data/ext/torchaudio/csrc/sox_effects.cpp +0 -54
data/ext/torchaudio/csrc/sox_effects.h +0 -18
data/ext/torchaudio/csrc/sox_io.cpp +0 -170
data/ext/torchaudio/csrc/sox_io.h +0 -41
data/ext/torchaudio/csrc/sox_utils.cpp +0 -245
data/ext/torchaudio/csrc/sox_utils.h +0 -100
data/ext/torchaudio/ext.cpp +0 -33
data/ext/torchaudio/extconf.rb +0 -79

data/ext/torchaudio/csrc/sox_io.h DELETED Viewed

@@ -1,41 +0,0 @@
-#ifndef TORCHAUDIO_SOX_IO_H
-#define TORCHAUDIO_SOX_IO_H
-#include <torch/script.h>
-#include <torchaudio/csrc/sox_utils.h>
-namespace torchaudio {
-namespace sox_io {
-struct SignalInfo : torch::CustomClassHolder {
-  int64_t sample_rate;
-  int64_t num_channels;
-  int64_t num_frames;
-  SignalInfo(
-      const int64_t sample_rate_,
-      const int64_t num_channels_,
-      const int64_t num_frames_);
-  int64_t getSampleRate() const;
-  int64_t getNumChannels() const;
-  int64_t getNumFrames() const;
-};
-c10::intrusive_ptr<SignalInfo> get_info(const std::string& path);
-c10::intrusive_ptr<torchaudio::sox_utils::TensorSignal> load_audio_file(
-    const std::string& path,
-    const int64_t frame_offset = 0,
-    const int64_t num_frames = -1,
-    const bool normalize = true,
-    const bool channels_first = true);
-void save_audio_file(
-    const std::string& file_name,
-    const c10::intrusive_ptr<torchaudio::sox_utils::TensorSignal>& signal,
-    const double compression = 0.);
-} // namespace sox_io
-} // namespace torchaudio
-#endif

data/ext/torchaudio/csrc/sox_utils.cpp DELETED Viewed

@@ -1,245 +0,0 @@
-#include <c10/core/ScalarType.h>
-#include <sox.h>
-#include <torchaudio/csrc/sox_utils.h>
-namespace torchaudio {
-namespace sox_utils {
-TensorSignal::TensorSignal(
-    torch::Tensor tensor_,
-    int64_t sample_rate_,
-    bool channels_first_)
-    : tensor(tensor_),
-      sample_rate(sample_rate_),
-      channels_first(channels_first_){};
-torch::Tensor TensorSignal::getTensor() const {
-  return tensor;
-}
-int64_t TensorSignal::getSampleRate() const {
-  return sample_rate;
-}
-bool TensorSignal::getChannelsFirst() const {
-  return channels_first;
-}
-SoxFormat::SoxFormat(sox_format_t* fd) noexcept : fd_(fd) {}
-SoxFormat::~SoxFormat() {
-  if (fd_ != nullptr) {
-    sox_close(fd_);
-  }
-}
-sox_format_t* SoxFormat::operator->() const noexcept {
-  return fd_;
-}
-SoxFormat::operator sox_format_t*() const noexcept {
-  return fd_;
-}
-void validate_input_file(const SoxFormat& sf) {
-  if (static_cast<sox_format_t*>(sf) == nullptr) {
-    throw std::runtime_error("Error loading audio file: failed to open file.");
-  }
-  if (sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
-    throw std::runtime_error("Error loading audio file: unknown encoding.");
-  }
-  if (sf->signal.length == 0) {
-    throw std::runtime_error("Error reading audio file: unkown length.");
-  }
-}
-void validate_input_tensor(const torch::Tensor tensor) {
-  if (!tensor.device().is_cpu()) {
-    throw std::runtime_error("Input tensor has to be on CPU.");
-  }
-  if (tensor.ndimension() != 2) {
-    throw std::runtime_error("Input tensor has to be 2D.");
-  }
-  const auto dtype = tensor.dtype();
-  if (!(dtype == torch::kFloat32 || dtype == torch::kInt32 ||
-        dtype == torch::kInt16 || dtype == torch::kUInt8)) {
-    throw std::runtime_error(
-        "Input tensor has to be one of float32, int32, int16 or uint8 type.");
-  }
-}
-caffe2::TypeMeta get_dtype(
-    const sox_encoding_t encoding,
-    const unsigned precision) {
-  const auto dtype = [&]() {
-    switch (encoding) {
-      case SOX_ENCODING_UNSIGNED: // 8-bit PCM WAV
-        return torch::kUInt8;
-      case SOX_ENCODING_SIGN2: // 16-bit or 32-bit PCM WAV
-        switch (precision) {
-          case 16:
-            return torch::kInt16;
-          case 32:
-            return torch::kInt32;
-          default:
-            throw std::runtime_error(
-                "Only 16 and 32 bits are supported for signed PCM.");
-        }
-      default:
-        // default to float32 for the other formats, including
-        // 32-bit flaoting-point WAV,
-        // MP3,
-        // FLAC,
-        // VORBIS etc...
-        return torch::kFloat32;
-    }
-  }();
-  return c10::scalarTypeToTypeMeta(dtype);
-}
-torch::Tensor convert_to_tensor(
-    sox_sample_t* buffer,
-    const int32_t num_samples,
-    const int32_t num_channels,
-    const caffe2::TypeMeta dtype,
-    const bool normalize,
-    const bool channels_first) {
-  auto t = torch::from_blob(
-      buffer, {num_samples / num_channels, num_channels}, torch::kInt32);
-  // Note: Tensor created from_blob does not own data but borrwos
-  // So make sure to create a new copy after processing samples.
-  if (normalize || dtype == torch::kFloat32) {
-    t = t.to(torch::kFloat32);
-    t *= (t > 0) / 2147483647. + (t < 0) / 2147483648.;
-  } else if (dtype == torch::kInt32) {
-    t = t.clone();
-  } else if (dtype == torch::kInt16) {
-    t.floor_divide_(1 << 16);
-    t = t.to(torch::kInt16);
-  } else if (dtype == torch::kUInt8) {
-    t.floor_divide_(1 << 24);
-    t += 128;
-    t = t.to(torch::kUInt8);
-  } else {
-    throw std::runtime_error("Unsupported dtype.");
-  }
-  if (channels_first) {
-    t = t.transpose(1, 0);
-  }
-  return t.contiguous();
-}
-torch::Tensor unnormalize_wav(const torch::Tensor input_tensor) {
-  const auto dtype = input_tensor.dtype();
-  auto tensor = input_tensor;
-  if (dtype == torch::kFloat32) {
-    double multi_pos = 2147483647.;
-    double multi_neg = -2147483648.;
-    auto mult = (tensor > 0) * multi_pos - (tensor < 0) * multi_neg;
-    tensor = tensor.to(torch::dtype(torch::kFloat64));
-    tensor *= mult;
-    tensor.clamp_(multi_neg, multi_pos);
-    tensor = tensor.to(torch::dtype(torch::kInt32));
-  } else if (dtype == torch::kInt32) {
-    // already denormalized
-  } else if (dtype == torch::kInt16) {
-    tensor = tensor.to(torch::dtype(torch::kInt32));
-    tensor *= ((tensor != 0) * 65536);
-  } else if (dtype == torch::kUInt8) {
-    tensor = tensor.to(torch::dtype(torch::kInt32));
-    tensor -= 128;
-    tensor *= 16777216;
-  } else {
-    throw std::runtime_error("Unexpected dtype.");
-  }
-  return tensor;
-}
-const std::string get_filetype(const std::string path) {
-  std::string ext = path.substr(path.find_last_of(".") + 1);
-  std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
-  return ext;
-}
-sox_encoding_t get_encoding(
-    const std::string filetype,
-    const caffe2::TypeMeta dtype) {
-  if (filetype == "mp3")
-    return SOX_ENCODING_MP3;
-  if (filetype == "flac")
-    return SOX_ENCODING_FLAC;
-  if (filetype == "ogg" || filetype == "vorbis")
-    return SOX_ENCODING_VORBIS;
-  if (filetype == "wav") {
-    if (dtype == torch::kUInt8)
-      return SOX_ENCODING_UNSIGNED;
-    if (dtype == torch::kInt16)
-      return SOX_ENCODING_SIGN2;
-    if (dtype == torch::kInt32)
-      return SOX_ENCODING_SIGN2;
-    if (dtype == torch::kFloat32)
-      return SOX_ENCODING_FLOAT;
-    throw std::runtime_error("Unsupported dtype.");
-  }
-  throw std::runtime_error("Unsupported file type.");
-}
-unsigned get_precision(
-    const std::string filetype,
-    const caffe2::TypeMeta dtype) {
-  if (filetype == "mp3")
-    return SOX_UNSPEC;
-  if (filetype == "flac")
-    return 24;
-  if (filetype == "ogg" || filetype == "vorbis")
-    return SOX_UNSPEC;
-  if (filetype == "wav") {
-    if (dtype == torch::kUInt8)
-      return 8;
-    if (dtype == torch::kInt16)
-      return 16;
-    if (dtype == torch::kInt32)
-      return 32;
-    if (dtype == torch::kFloat32)
-      return 32;
-    throw std::runtime_error("Unsupported dtype.");
-  }
-  throw std::runtime_error("Unsupported file type.");
-}
-sox_signalinfo_t get_signalinfo(
-    const torch::Tensor& tensor,
-    const int64_t sample_rate,
-    const bool channels_first,
-    const std::string filetype) {
-  return sox_signalinfo_t{
-      /*rate=*/static_cast<sox_rate_t>(sample_rate),
-      /*channels=*/static_cast<unsigned>(tensor.size(channels_first ? 0 : 1)),
-      /*precision=*/get_precision(filetype, tensor.dtype()),
-      /*length=*/static_cast<uint64_t>(tensor.numel())};
-}
-sox_encodinginfo_t get_encodinginfo(
-    const std::string filetype,
-    const caffe2::TypeMeta dtype,
-    const double compression) {
-  const double compression_ = [&]() {
-    if (filetype == "mp3")
-      return compression;
-    if (filetype == "flac")
-      return compression;
-    if (filetype == "ogg" || filetype == "vorbis")
-      return compression;
-    if (filetype == "wav")
-      return 0.;
-    throw std::runtime_error("Unsupported file type.");
-  }();
-  return sox_encodinginfo_t{/*encoding=*/get_encoding(filetype, dtype),
-                            /*bits_per_sample=*/get_precision(filetype, dtype),
-                            /*compression=*/compression_,
-                            /*reverse_bytes=*/sox_option_default,
-                            /*reverse_nibbles=*/sox_option_default,
-                            /*reverse_bits=*/sox_option_default,
-                            /*opposite_endian=*/sox_false};
-}
-} // namespace sox_utils
-} // namespace torchaudio

data/ext/torchaudio/csrc/sox_utils.h DELETED Viewed

@@ -1,100 +0,0 @@
-#ifndef TORCHAUDIO_SOX_UTILS_H
-#define TORCHAUDIO_SOX_UTILS_H
-#include <sox.h>
-#include <torch/script.h>
-namespace torchaudio {
-namespace sox_utils {
-struct TensorSignal : torch::CustomClassHolder {
-  torch::Tensor tensor;
-  int64_t sample_rate;
-  bool channels_first;
-  TensorSignal(
-      torch::Tensor tensor_,
-      int64_t sample_rate_,
-      bool channels_first_);
-  torch::Tensor getTensor() const;
-  int64_t getSampleRate() const;
-  bool getChannelsFirst() const;
-};
-/// helper class to automatically close sox_format_t*
-struct SoxFormat {
-  explicit SoxFormat(sox_format_t* fd) noexcept;
-  SoxFormat(const SoxFormat& other) = delete;
-  SoxFormat(SoxFormat&& other) = delete;
-  SoxFormat& operator=(const SoxFormat& other) = delete;
-  SoxFormat& operator=(SoxFormat&& other) = delete;
-  ~SoxFormat();
-  sox_format_t* operator->() const noexcept;
-  operator sox_format_t*() const noexcept;
- private:
-  sox_format_t* fd_;
-};
-///
-/// Verify that input file is found, has known encoding, and not empty
-void validate_input_file(const SoxFormat& sf);
-///
-/// Verify that input Tensor is 2D, CPU and either uin8, int16, int32 or float32
-void validate_input_tensor(const torch::Tensor);
-///
-/// Get target dtype for the given encoding and precision.
-caffe2::TypeMeta get_dtype(
-    const sox_encoding_t encoding,
-    const unsigned precision);
-///
-/// Convert sox_sample_t buffer to uint8/int16/int32/float32 Tensor
-/// NOTE: This function might modify the values in the input buffer to
-/// reduce the number of memory copy.
-/// @param buffer Pointer to buffer that contains audio data.
-/// @param num_samples The number of samples to read.
-/// @param num_channels The number of channels. Used to reshape the resulting
-/// Tensor.
-/// @param dtype Target dtype. Determines the output dtype and value range in
-/// conjunction with normalization.
-/// @param noramlize Perform normalization. Only effective when dtype is not
-/// kFloat32. When effective, the output tensor is kFloat32 type and value range
-/// is [-1.0, 1.0]
-/// @param channels_first When True, output Tensor has shape of [num_channels,
-/// num_frames].
-torch::Tensor convert_to_tensor(
-    sox_sample_t* buffer,
-    const int32_t num_samples,
-    const int32_t num_channels,
-    const caffe2::TypeMeta dtype,
-    const bool normalize,
-    const bool channels_first);
-///
-/// Convert float32/int32/int16/uint8 Tensor to int32 for Torch -> Sox
-/// conversion.
-torch::Tensor unnormalize_wav(const torch::Tensor);
-/// Extract extension from file path
-const std::string get_filetype(const std::string path);
-/// Get sox_signalinfo_t for passing a torch::Tensor object.
-sox_signalinfo_t get_signalinfo(
-    const torch::Tensor& tensor,
-    const int64_t sample_rate,
-    const bool channels_first,
-    const std::string filetype);
-/// Get sox_encofinginfo_t for saving audoi file
-sox_encodinginfo_t get_encodinginfo(
-    const std::string filetype,
-    const caffe2::TypeMeta dtype,
-    const double compression);
-} // namespace sox_utils
-} // namespace torchaudio
-#endif

data/ext/torchaudio/ext.cpp DELETED Viewed

@@ -1,33 +0,0 @@
-#include <torchaudio/csrc/sox.h>
-#include <rice/rice.hpp>
-#include <rice/stl.hpp>
-extern "C"
-void Init_ext()
-{
-  auto rb_mTorchAudio = Rice::define_module("TorchAudio");
-  auto rb_mExt = Rice::define_module_under(rb_mTorchAudio, "Ext")
-    .define_singleton_function(
-      "read_audio_file",
-      [](const std::string& file_name, at::Tensor output, bool ch_first, int64_t nframes, int64_t offset, sox_signalinfo_t* si, sox_encodinginfo_t* ei, const char* ft) {
-        return torch::audio::read_audio_file(file_name, output, ch_first, nframes, offset, si, ei, ft);
-      })
-    .define_singleton_function(
-      "write_audio_file",
-      [](const std::string& file_name, const at::Tensor& tensor, sox_signalinfo_t* si, sox_encodinginfo_t* ei, const char* file_type) {
-        return torch::audio::write_audio_file(file_name, tensor, si, ei, file_type);
-      });
-  auto rb_cSignalInfo = Rice::define_class_under<sox_signalinfo_t>(rb_mExt, "SignalInfo")
-    .define_constructor(Rice::Constructor<sox_signalinfo_t>())
-    .define_method("rate", [](sox_signalinfo_t& self) { return self.rate; })
-    .define_method("channels", [](sox_signalinfo_t& self) { return self.channels; })
-    .define_method("precision", [](sox_signalinfo_t& self) { return self.precision; })
-    .define_method("length", [](sox_signalinfo_t& self) { return self.length; })
-    .define_method("rate=", [](sox_signalinfo_t& self, sox_rate_t rate) { self.rate = rate; })
-    .define_method("channels=", [](sox_signalinfo_t& self, unsigned channels) { self.channels = channels; })
-    .define_method("precision=", [](sox_signalinfo_t& self, unsigned precision) { self.precision = precision; })
-    .define_method("length=", [](sox_signalinfo_t& self, sox_uint64_t length) { self.length = length; });
-}

data/ext/torchaudio/extconf.rb DELETED Viewed

@@ -1,79 +0,0 @@
-require "mkmf-rice"
-$CXXFLAGS += " -std=c++17 $(optflags)"
-ext = File.expand_path(".", __dir__)
-csrc = File.expand_path("csrc", __dir__)
-$srcs = Dir["{#{ext},#{csrc}}/*.cpp"]
-$INCFLAGS << " -I#{File.expand_path("..", __dir__)}"
-$VPATH << csrc
-#
-# keep rest synced with Torch
-#
-# change to 0 for Linux pre-cxx11 ABI version
-$CXXFLAGS += " -D_GLIBCXX_USE_CXX11_ABI=1"
-apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
-if apple_clang
-  # silence torch warnings
-  $CXXFLAGS += " -Wno-deprecated-declarations"
-else
-  # silence rice warnings
-  $CXXFLAGS += " -Wno-noexcept-type"
-  # silence torch warnings
-  $CXXFLAGS += " -Wno-duplicated-cond -Wno-suggest-attribute=noreturn"
-end
-paths = [
-  "/usr/local",
-  "/opt/homebrew",
-  "/home/linuxbrew/.linuxbrew"
-]
-inc, lib = dir_config("torch")
-inc ||= paths.map { |v| "#{v}/include" }.find { |v| Dir.exist?("#{v}/torch") }
-lib ||= paths.map { |v| "#{v}/lib" }.find { |v| Dir["#{v}/*torch_cpu*"].any? }
-unless inc && lib
-  abort "LibTorch not found"
-end
-cuda_inc, cuda_lib = dir_config("cuda")
-cuda_inc ||= "/usr/local/cuda/include"
-cuda_lib ||= "/usr/local/cuda/lib64"
-$LDFLAGS += " -L#{lib}" if Dir.exist?(lib)
-abort "LibTorch not found" unless have_library("torch")
-have_library("mkldnn")
-have_library("nnpack")
-with_cuda = false
-if Dir["#{lib}/*torch_cuda*"].any?
-  $LDFLAGS += " -L#{cuda_lib}" if Dir.exist?(cuda_lib)
-  with_cuda = have_library("cuda") && have_library("cudnn")
-end
-$INCFLAGS += " -I#{inc}"
-$INCFLAGS += " -I#{inc}/torch/csrc/api/include"
-$LDFLAGS += " -Wl,-rpath,#{lib}"
-$LDFLAGS += ":#{cuda_lib}/stubs:#{cuda_lib}" if with_cuda
-# https://github.com/pytorch/pytorch/blob/v1.5.0/torch/utils/cpp_extension.py#L1232-L1238
-$LDFLAGS += " -lc10 -ltorch_cpu -ltorch"
-if with_cuda
-  $LDFLAGS += " -lcuda -lnvrtc -lnvToolsExt -lcudart -lc10_cuda -ltorch_cuda -lcufft -lcurand -lcublas -lcudnn"
-  # TODO figure out why this is needed
-  $LDFLAGS += " -Wl,--no-as-needed,#{lib}/libtorch.so"
-end
-abort "SoX not found" unless have_library("sox")
-# create makefile
-create_makefile("torchaudio/ext")