RubyGems - torchaudio - Versions diffs - 0.1.0 - Mend

torchaudio 0.1.0

Files changed (20) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +3 -0
data/LICENSE.txt +26 -0
data/README.md +93 -0
data/ext/torchaudio/csrc/register.cpp +65 -0
data/ext/torchaudio/csrc/sox.cpp +361 -0
data/ext/torchaudio/csrc/sox.h +71 -0
data/ext/torchaudio/csrc/sox_effects.cpp +54 -0
data/ext/torchaudio/csrc/sox_effects.h +18 -0
data/ext/torchaudio/csrc/sox_io.cpp +170 -0
data/ext/torchaudio/csrc/sox_io.h +41 -0
data/ext/torchaudio/csrc/sox_utils.cpp +245 -0
data/ext/torchaudio/csrc/sox_utils.h +100 -0
data/ext/torchaudio/ext.cpp +33 -0
data/ext/torchaudio/extconf.rb +81 -0
data/lib/torchaudio.rb +95 -0
data/lib/torchaudio/datasets/utils.rb +92 -0
data/lib/torchaudio/datasets/yesno.rb +59 -0
data/lib/torchaudio/version.rb +3 -0
metadata +145 -0

data/ext/torchaudio/csrc/sox_utils.h ADDED

@@ -0,0 +1,100 @@
+#ifndef TORCHAUDIO_SOX_UTILS_H
+#define TORCHAUDIO_SOX_UTILS_H
+#include <sox.h>
+#include <torch/script.h>
+namespace torchaudio {
+namespace sox_utils {
+struct TensorSignal : torch::CustomClassHolder {
+  torch::Tensor tensor;
+  int64_t sample_rate;
+  bool channels_first;
+  TensorSignal(
+      torch::Tensor tensor_,
+      int64_t sample_rate_,
+      bool channels_first_);
+  torch::Tensor getTensor() const;
+  int64_t getSampleRate() const;
+  bool getChannelsFirst() const;
+};
+/// helper class to automatically close sox_format_t*
+struct SoxFormat {
+  explicit SoxFormat(sox_format_t* fd) noexcept;
+  SoxFormat(const SoxFormat& other) = delete;
+  SoxFormat(SoxFormat&& other) = delete;
+  SoxFormat& operator=(const SoxFormat& other) = delete;
+  SoxFormat& operator=(SoxFormat&& other) = delete;
+  ~SoxFormat();
+  sox_format_t* operator->() const noexcept;
+  operator sox_format_t*() const noexcept;
+ private:
+  sox_format_t* fd_;
+};
+///
+/// Verify that input file is found, has known encoding, and not empty
+void validate_input_file(const SoxFormat& sf);
+///
+/// Verify that input Tensor is 2D, CPU and either uin8, int16, int32 or float32
+void validate_input_tensor(const torch::Tensor);
+///
+/// Get target dtype for the given encoding and precision.
+caffe2::TypeMeta get_dtype(
+    const sox_encoding_t encoding,
+    const unsigned precision);
+///
+/// Convert sox_sample_t buffer to uint8/int16/int32/float32 Tensor
+/// NOTE: This function might modify the values in the input buffer to
+/// reduce the number of memory copy.
+/// @param buffer Pointer to buffer that contains audio data.
+/// @param num_samples The number of samples to read.
+/// @param num_channels The number of channels. Used to reshape the resulting
+/// Tensor.
+/// @param dtype Target dtype. Determines the output dtype and value range in
+/// conjunction with normalization.
+/// @param noramlize Perform normalization. Only effective when dtype is not
+/// kFloat32. When effective, the output tensor is kFloat32 type and value range
+/// is [-1.0, 1.0]
+/// @param channels_first When True, output Tensor has shape of [num_channels,
+/// num_frames].
+torch::Tensor convert_to_tensor(
+    sox_sample_t* buffer,
+    const int32_t num_samples,
+    const int32_t num_channels,
+    const caffe2::TypeMeta dtype,
+    const bool normalize,
+    const bool channels_first);
+///
+/// Convert float32/int32/int16/uint8 Tensor to int32 for Torch -> Sox
+/// conversion.
+torch::Tensor unnormalize_wav(const torch::Tensor);
+/// Extract extension from file path
+const std::string get_filetype(const std::string path);
+/// Get sox_signalinfo_t for passing a torch::Tensor object.
+sox_signalinfo_t get_signalinfo(
+    const torch::Tensor& tensor,
+    const int64_t sample_rate,
+    const bool channels_first,
+    const std::string filetype);
+/// Get sox_encofinginfo_t for saving audoi file
+sox_encodinginfo_t get_encodinginfo(
+    const std::string filetype,
+    const caffe2::TypeMeta dtype,
+    const double compression);
+} // namespace sox_utils
+} // namespace torchaudio
+#endif

data/ext/torchaudio/ext.cpp ADDED

@@ -0,0 +1,33 @@
+#include <torchaudio/csrc/sox.h>
+#include <rice/Module.hpp>
+using namespace Rice;
+template<>
+inline
+sox_signalinfo_t* from_ruby<sox_signalinfo_t*>(Object x)
+{
+  if (x.is_nil()) {
+    return nullptr;
+  }
+  throw std::runtime_error("Unsupported signalinfo");
+}
+template<>
+inline
+sox_encodinginfo_t* from_ruby<sox_encodinginfo_t*>(Object x)
+{
+  if (x.is_nil()) {
+    return nullptr;
+  }
+  throw std::runtime_error("Unsupported encodinginfo");
+}
+extern "C"
+void Init_ext()
+{
+  Module rb_mTorchAudio = define_module("TorchAudio");
+  Module rb_mNN = define_module_under(rb_mTorchAudio, "Ext")
+    .define_singleton_method("read_audio_file", &torch::audio::read_audio_file);
+}

data/ext/torchaudio/extconf.rb ADDED

@@ -0,0 +1,81 @@
+require "mkmf-rice"
+abort "Missing stdc++" unless have_library("stdc++")
+$CXXFLAGS += " -std=c++14"
+abort "SoX not found" unless have_library("sox")
+ext = File.expand_path(".", __dir__)
+csrc = File.expand_path("csrc", __dir__)
+$srcs = Dir["{#{ext},#{csrc}}/*.cpp"]
+$INCFLAGS << " -I#{File.expand_path("..", __dir__)}"
+$VPATH << csrc
+#
+# keep rest synced with Torch
+#
+# change to 0 for Linux pre-cxx11 ABI version
+$CXXFLAGS += " -D_GLIBCXX_USE_CXX11_ABI=1"
+apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
+# check omp first
+if have_library("omp") || have_library("gomp")
+  $CXXFLAGS += " -DAT_PARALLEL_OPENMP=1"
+  $CXXFLAGS += " -Xclang" if apple_clang
+  $CXXFLAGS += " -fopenmp"
+end
+if apple_clang
+  # silence ruby/intern.h warning
+  $CXXFLAGS += " -Wno-deprecated-register"
+  # silence torch warnings
+  $CXXFLAGS += " -Wno-shorten-64-to-32 -Wno-missing-noreturn"
+else
+  # silence rice warnings
+  $CXXFLAGS += " -Wno-noexcept-type"
+  # silence torch warnings
+  $CXXFLAGS += " -Wno-duplicated-cond -Wno-suggest-attribute=noreturn"
+end
+inc, lib = dir_config("torch")
+inc ||= "/usr/local/include"
+lib ||= "/usr/local/lib"
+cuda_inc, cuda_lib = dir_config("cuda")
+cuda_inc ||= "/usr/local/cuda/include"
+cuda_lib ||= "/usr/local/cuda/lib64"
+$LDFLAGS += " -L#{lib}" if Dir.exist?(lib)
+abort "LibTorch not found" unless have_library("torch")
+have_library("mkldnn")
+have_library("nnpack")
+with_cuda = false
+if Dir["#{lib}/*torch_cuda*"].any?
+  $LDFLAGS += " -L#{cuda_lib}" if Dir.exist?(cuda_lib)
+  with_cuda = have_library("cuda") && have_library("cudnn")
+end
+$INCFLAGS += " -I#{inc}"
+$INCFLAGS += " -I#{inc}/torch/csrc/api/include"
+$LDFLAGS += " -Wl,-rpath,#{lib}"
+$LDFLAGS += ":#{cuda_lib}/stubs:#{cuda_lib}" if with_cuda
+# https://github.com/pytorch/pytorch/blob/v1.5.0/torch/utils/cpp_extension.py#L1232-L1238
+$LDFLAGS += " -lc10 -ltorch_cpu -ltorch"
+if with_cuda
+  $LDFLAGS += " -lcuda -lnvrtc -lnvToolsExt -lcudart -lc10_cuda -ltorch_cuda -lcufft -lcurand -lcublas -lcudnn"
+  # TODO figure out why this is needed
+  $LDFLAGS += " -Wl,--no-as-needed,#{lib}/libtorch.so"
+end
+# create makefile
+create_makefile("torchaudio/ext")

data/lib/torchaudio.rb ADDED

@@ -0,0 +1,95 @@
+# dependencies
+require "torch"
+# ext
+require "torchaudio/ext"
+# stdlib
+require "csv"
+require "digest"
+require "fileutils"
+require "rubygems/package"
+require "set"
+# modules
+require "torchaudio/datasets/utils"
+require "torchaudio/datasets/yesno"
+require "torchaudio/version"
+module TorchAudio
+  class Error < StandardError; end
+  class << self
+    def load(
+      filepath, out: nil, normalization: true, channels_first: true, num_frames: 0,
+      offset: 0, signalinfo: nil, encodinginfo: nil, filetype: nil
+    )
+      filepath = filepath.to_s
+      # check if valid file
+      unless File.exist?(filepath)
+        raise ArgumentError, "#{filepath} not found or is a directory"
+      end
+      # initialize output tensor
+      if !out.nil?
+        check_input(out)
+      else
+        out = Torch::FloatTensor.new
+      end
+      if num_frames < -1
+        raise ArgumentError, "Expected value for num_samples -1 (entire file) or >=0"
+      end
+      if offset < 0
+        raise ArgumentError, "Expected positive offset value"
+      end
+      # same logic as C++
+      # could also make read_audio_file work with nil
+      filetype ||= File.extname(filepath)[1..-1]
+      sample_rate =
+        Ext.read_audio_file(
+          filepath,
+          out,
+          channels_first,
+          num_frames,
+          offset,
+          signalinfo,
+          encodinginfo,
+          filetype
+        )
+      # normalize if needed
+      normalize_audio(out, normalization)
+      [out, sample_rate]
+    end
+    def load_wav(filepath, **kwargs)
+      kwargs[:normalization] = 1 << 16
+      load(filepath, **kwargs)
+    end
+    private
+    def check_input(src)
+      raise ArgumentError, "Expected a tensor, got #{src.class.name}" unless Torch.tensor?(src)
+      raise ArgumentError, "Expected a CPU based tensor, got #{src.class.name}" if src.cuda?
+    end
+    def normalize_audio(signal, normalization)
+      return unless normalization
+      normalization = 1 << 31 if normalization == true
+      if normalization.is_a?(Numeric)
+        signal.div!(normalization)
+      elsif normalization.respond_to?(:call)
+        signal.div!(normalization.call(signal))
+      end
+    end
+  end
+end

data/lib/torchaudio/datasets/utils.rb ADDED

@@ -0,0 +1,92 @@
+module TorchAudio
+  module Datasets
+    module Utils
+      class << self
+        def download_url(url, download_folder, filename: nil, hash_value: nil, hash_type: "sha256")
+          filename ||= File.basename(url)
+          filepath = File.join(download_folder, filename)
+          if File.exist?(filepath)
+            raise "#{filepath} already exists. Delete the file manually and retry."
+          end
+          puts "Downloading #{url}..."
+          download_url_to_file(url, filepath, hash_value, hash_type)
+        end
+        # follows redirects
+        def download_url_to_file(url, dst, hash_value, hash_type)
+          uri = URI(url)
+          tmp = nil
+          location = nil
+          Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https") do |http|
+            request = Net::HTTP::Get.new(uri)
+            http.request(request) do |response|
+              case response
+              when Net::HTTPRedirection
+                location = response["location"]
+              when Net::HTTPSuccess
+                tmp = "#{Dir.tmpdir}/#{Time.now.to_f}" # TODO better name
+                File.open(tmp, "wb") do |f|
+                  response.read_body do |chunk|
+                    f.write(chunk)
+                  end
+                end
+              else
+                raise Error, "Bad response"
+              end
+            end
+          end
+          if location
+            download_url_to_file(location, dst)
+          else
+            # check hash
+            # TODO use hash_type
+            if Digest::MD5.file(tmp).hexdigest != hash_value
+              raise "The hash of #{dst} does not match. Delete the file manually and retry."
+            end
+            FileUtils.mv(tmp, dst)
+            dst
+          end
+        end
+        # extract_tar_gz doesn't list files, so just return to_path
+        def extract_archive(from_path, to_path: nil, overwrite: nil)
+          to_path ||= File.dirname(from_path)
+          if from_path.end_with?(".tar.gz") || from_path.end_with?(".tgz")
+            File.open(from_path, "rb") do |io|
+              Gem::Package.new("").extract_tar_gz(io, to_path)
+            end
+            return to_path
+          end
+          raise "We currently only support tar.gz and tgz archives."
+        end
+        def walk_files(root, suffix, prefix: false, remove_suffix: false)
+          return enum_for(:walk_files, root, suffix, prefix: prefix, remove_suffix: remove_suffix) unless block_given?
+          Dir.glob("**/*", base: root).sort.each do |f|
+            if f.end_with?(suffix)
+              if remove_suffix
+                f = f[0..(-suffix.length - 1)]
+              end
+              if prefix
+                raise "Not implemented yet"
+                # f = File.join(dirpath, f)
+              end
+              yield f
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/torchaudio/datasets/yesno.rb ADDED

@@ -0,0 +1,59 @@
+module TorchAudio
+  module Datasets
+    class YESNO < Torch::Utils::Data::Dataset
+      URL = "http://www.openslr.org/resources/1/waves_yesno.tar.gz"
+      FOLDER_IN_ARCHIVE = "waves_yesno"
+      CHECKSUMS = {
+        "http://www.openslr.org/resources/1/waves_yesno.tar.gz" => "962ff6e904d2df1126132ecec6978786"
+      }
+      def initialize(root, url: URL, folder_in_archive: FOLDER_IN_ARCHIVE, download: false)
+        archive = File.basename(url)
+        archive = File.join(root, archive)
+        @path = File.join(root, folder_in_archive)
+        if download
+          unless Dir.exist?(@path)
+            unless File.exist?(archive)
+              checksum = CHECKSUMS.fetch(url)
+              Utils.download_url(url, root, hash_value: checksum, hash_type: "md5")
+            end
+            Utils.extract_archive(archive)
+          end
+        end
+        unless Dir.exist?(@path)
+          raise "Dataset not found. Please use `download: true` to download it."
+        end
+        walker = Utils.walk_files(@path, ext_audio, prefix: false, remove_suffix: true)
+        @walker = walker.to_a
+      end
+      def [](n)
+        fileid = @walker[n]
+        load_yesno_item(fileid, @path, ext_audio)
+      end
+      def length
+        @walker.length
+      end
+      alias_method :size, :length
+      private
+      def load_yesno_item(fileid, path, ext_audio)
+        labels = fileid.split("_").map(&:to_i)
+        file_audio = File.join(path, fileid + ext_audio)
+        waveform, sample_rate = TorchAudio.load(file_audio)
+        [waveform, sample_rate, labels]
+      end
+      def ext_audio
+        ".wav"
+      end
+    end
+  end
+end