RubyGems - torch-rb - Versions diffs - 0.3.3 → 0.4.0 - Mend

torch-rb 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +28 -0
data/README.md +2 -1
data/codegen/function.rb +134 -0
data/codegen/generate_functions.rb +546 -0
data/{lib/torch/native → codegen}/native_functions.yaml +0 -0
data/ext/torch/ext.cpp +76 -87
data/ext/torch/extconf.rb +5 -2
data/ext/torch/nn_functions.h +6 -0
data/ext/torch/ruby_arg_parser.cpp +593 -0
data/ext/torch/ruby_arg_parser.h +373 -0
data/ext/torch/{templates.hpp → templates.h} +87 -97
data/ext/torch/tensor_functions.h +6 -0
data/ext/torch/torch_functions.h +6 -0
data/ext/torch/utils.h +42 -0
data/ext/torch/{templates.cpp → wrap_outputs.h} +44 -7
data/lib/torch.rb +56 -77
data/lib/torch/nn/functional.rb +142 -18
data/lib/torch/nn/init.rb +5 -19
data/lib/torch/nn/leaky_relu.rb +3 -3
data/lib/torch/nn/module.rb +9 -1
data/lib/torch/nn/upsample.rb +31 -0
data/lib/torch/optim/adadelta.rb +1 -1
data/lib/torch/optim/adam.rb +2 -2
data/lib/torch/optim/adamax.rb +1 -1
data/lib/torch/optim/adamw.rb +1 -1
data/lib/torch/optim/asgd.rb +1 -1
data/lib/torch/optim/sgd.rb +3 -3
data/lib/torch/tensor.rb +36 -115
data/lib/torch/utils/data/data_loader.rb +2 -0
data/lib/torch/utils/data/tensor_dataset.rb +2 -0
data/lib/torch/version.rb +1 -1
metadata +28 -9
data/lib/torch/native/dispatcher.rb +0 -48
data/lib/torch/native/function.rb +0 -115
data/lib/torch/native/generator.rb +0 -163
data/lib/torch/native/parser.rb +0 -140

data/{lib/torch/native → codegen}/native_functions.yaml RENAMED

File without changes

data/ext/torch/ext.cpp CHANGED

@@ -7,13 +7,14 @@
 #include <rice/Constructor.hpp>
 #include <rice/Hash.hpp>
-#include "templates.hpp"
+#include "templates.h"
+#include "utils.h"
 // generated with:
 // rake generate:functions
-#include "torch_functions.hpp"
-#include "tensor_functions.hpp"
-#include "nn_functions.hpp"
+#include "torch_functions.h"
+#include "tensor_functions.h"
+#include "nn_functions.h"
 using namespace Rice;
 using torch::indexing::TensorIndex;
@@ -29,11 +30,47 @@ void handle_error(torch::Error const & ex)
   throw Exception(rb_eRuntimeError, ex.what_without_backtrace());
 }
+Class rb_cTensor;
 std::vector<TensorIndex> index_vector(Array a) {
-  auto indices = std::vector<TensorIndex>();
+  Object obj;
+  std::vector<TensorIndex> indices;
   indices.reserve(a.size());
   for (size_t i = 0; i < a.size(); i++) {
-    indices.push_back(from_ruby<TensorIndex>(a[i]));
+    obj = a[i];
+    if (obj.is_instance_of(rb_cInteger)) {
+      indices.push_back(from_ruby<int64_t>(obj));
+    } else if (obj.is_instance_of(rb_cRange)) {
+      torch::optional<int64_t> start_index = from_ruby<int64_t>(obj.call("begin"));
+      torch::optional<int64_t> stop_index = -1;
+      Object end = obj.call("end");
+      if (!end.is_nil()) {
+        stop_index = from_ruby<int64_t>(end);
+      }
+      Object exclude_end = obj.call("exclude_end?");
+      if (!exclude_end) {
+        if (stop_index.value() == -1) {
+          stop_index = torch::nullopt;
+        } else {
+          stop_index = stop_index.value() + 1;
+        }
+      }
+      indices.push_back(torch::indexing::Slice(start_index, stop_index));
+    } else if (obj.is_instance_of(rb_cTensor)) {
+      indices.push_back(from_ruby<Tensor>(obj));
+    } else if (obj.is_nil()) {
+      indices.push_back(torch::indexing::None);
+    } else if (obj == True || obj == False) {
+      indices.push_back(from_ruby<bool>(obj));
+    } else {
+      throw Exception(rb_eArgError, "Unsupported index type: %s", rb_obj_classname(obj));
+    }
   }
   return indices;
 }
@@ -45,9 +82,10 @@ void Init_ext()
   rb_mTorch.add_handler<torch::Error>(handle_error);
   add_torch_functions(rb_mTorch);
-  Class rb_cTensor = define_class_under<torch::Tensor>(rb_mTorch, "Tensor");
+  rb_cTensor = define_class_under<torch::Tensor>(rb_mTorch, "Tensor");
   rb_cTensor.add_handler<torch::Error>(handle_error);
   add_tensor_functions(rb_cTensor);
+  THPVariableClass = rb_cTensor.value();
   Module rb_mNN = define_module_under(rb_mTorch, "NN");
   rb_mNN.add_handler<torch::Error>(handle_error);
@@ -68,13 +106,6 @@ void Init_ext()
         return generator.seed();
       });
-  Class rb_cTensorIndex = define_class_under<TensorIndex>(rb_mTorch, "TensorIndex")
-    .define_singleton_method("boolean", *[](bool value) { return TensorIndex(value); })
-    .define_singleton_method("integer", *[](int64_t value) { return TensorIndex(value); })
-    .define_singleton_method("tensor", *[](torch::Tensor& value) { return TensorIndex(value); })
-    .define_singleton_method("slice", *[](torch::optional<int64_t> start_index, torch::optional<int64_t> stop_index) { return TensorIndex(torch::indexing::Slice(start_index, stop_index)); })
-    .define_singleton_method("none", *[]() { return TensorIndex(torch::indexing::None); });
   // https://pytorch.org/cppdocs/api/structc10_1_1_i_value.html
   Class rb_cIValue = define_class_under<torch::IValue>(rb_mTorch, "IValue")
     .add_handler<torch::Error>(handle_error)
@@ -224,67 +255,6 @@ void Init_ext()
       *[] {
         return torch::get_parallel_info();
       })
-    // begin tensor creation
-    .define_singleton_method(
-      "_arange",
-      *[](Scalar start, Scalar end, Scalar step, const torch::TensorOptions &options) {
-        return torch::arange(start, end, step, options);
-      })
-    .define_singleton_method(
-      "_empty",
-      *[](IntArrayRef size, const torch::TensorOptions &options) {
-        return torch::empty(size, options);
-      })
-    .define_singleton_method(
-      "_eye",
-      *[](int64_t m, int64_t n, const torch::TensorOptions &options) {
-        return torch::eye(m, n, options);
-      })
-    .define_singleton_method(
-      "_full",
-      *[](IntArrayRef size, Scalar fill_value, const torch::TensorOptions& options) {
-        return torch::full(size, fill_value, options);
-      })
-    .define_singleton_method(
-      "_linspace",
-      *[](Scalar start, Scalar end, int64_t steps, const torch::TensorOptions& options) {
-        return torch::linspace(start, end, steps, options);
-      })
-    .define_singleton_method(
-      "_logspace",
-      *[](Scalar start, Scalar end, int64_t steps, double base, const torch::TensorOptions& options) {
-        return torch::logspace(start, end, steps, base, options);
-      })
-    .define_singleton_method(
-      "_ones",
-      *[](IntArrayRef size, const torch::TensorOptions &options) {
-        return torch::ones(size, options);
-      })
-    .define_singleton_method(
-      "_rand",
-      *[](IntArrayRef size, const torch::TensorOptions &options) {
-        return torch::rand(size, options);
-      })
-    .define_singleton_method(
-      "_randint",
-      *[](int64_t low, int64_t high, IntArrayRef size, const torch::TensorOptions &options) {
-        return torch::randint(low, high, size, options);
-      })
-    .define_singleton_method(
-      "_randn",
-      *[](IntArrayRef size, const torch::TensorOptions &options) {
-        return torch::randn(size, options);
-      })
-    .define_singleton_method(
-      "_randperm",
-      *[](int64_t n, const torch::TensorOptions &options) {
-        return torch::randperm(n, options);
-      })
-    .define_singleton_method(
-      "_zeros",
-      *[](IntArrayRef size, const torch::TensorOptions &options) {
-        return torch::zeros(size, options);
-      })
     // begin operations
     .define_singleton_method(
       "_save",
@@ -301,20 +271,15 @@ void Init_ext()
         // https://github.com/pytorch/pytorch/issues/20356#issuecomment-567663701
         return torch::pickle_load(v);
       })
-    .define_singleton_method(
-      "_binary_cross_entropy_with_logits",
-      *[](const Tensor &input, const Tensor &target, OptionalTensor weight, OptionalTensor pos_weight, MyReduction reduction) {
-        return torch::binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction);
-      })
     .define_singleton_method(
       "_from_blob",
-      *[](String s, IntArrayRef size, const torch::TensorOptions &options) {
+      *[](String s, std::vector<int64_t> size, const torch::TensorOptions &options) {
         void *data = const_cast<char *>(s.c_str());
         return torch::from_blob(data, size, options);
       })
     .define_singleton_method(
       "_tensor",
-      *[](Array a, IntArrayRef size, const torch::TensorOptions &options) {
+      *[](Array a, std::vector<int64_t> size, const torch::TensorOptions &options) {
         auto dtype = options.dtype();
         torch::Tensor t;
         if (dtype == torch::kBool) {
@@ -347,6 +312,25 @@ void Init_ext()
     .define_method("numel", &torch::Tensor::numel)
     .define_method("element_size", &torch::Tensor::element_size)
     .define_method("requires_grad", &torch::Tensor::requires_grad)
+    // in C++ for performance
+    .define_method(
+      "shape",
+      *[](Tensor& self) {
+        Array a;
+        for (auto &size : self.sizes()) {
+          a.push(size);
+        }
+        return a;
+      })
+    .define_method(
+      "_strides",
+      *[](Tensor& self) {
+        Array a;
+        for (auto &stride : self.strides()) {
+          a.push(stride);
+        }
+        return a;
+      })
     .define_method(
       "_index",
       *[](Tensor& self, Array indices) {
@@ -379,11 +363,6 @@ void Init_ext()
       *[](Tensor& self, bool requires_grad) {
         return self.set_requires_grad(requires_grad);
       })
-    .define_method(
-      "_backward",
-      *[](Tensor& self, OptionalTensor gradient, bool create_graph, bool retain_graph) {
-        return self.backward(gradient, create_graph, retain_graph);
-      })
     .define_method(
       "grad",
       *[](Tensor& self) {
@@ -430,9 +409,19 @@ void Init_ext()
           tensor = tensor.to(device);
         }
+        if (!tensor.is_contiguous()) {
+          tensor = tensor.contiguous();
+        }
         auto data_ptr = (const char *) tensor.data_ptr();
         return std::string(data_ptr, tensor.numel() * tensor.element_size());
       })
+    // for TorchVision
+    .define_method(
+      "_data_ptr",
+      *[](Tensor& self) {
+        return reinterpret_cast<uintptr_t>(self.data_ptr());
+      })
     // TODO figure out a better way to do this
     .define_method(
       "_flat_data",

data/ext/torch/extconf.rb CHANGED

@@ -17,6 +17,9 @@ if have_library("omp") || have_library("gomp")
 end
 if apple_clang
+  # silence rice warnings
+  $CXXFLAGS += " -Wno-deprecated-declarations"
   # silence ruby/intern.h warning
   $CXXFLAGS += " -Wno-deprecated-register"
@@ -66,8 +69,8 @@ end
 # generate C++ functions
 puts "Generating C++ functions..."
-require_relative "../../lib/torch/native/generator"
-Torch::Native::Generator.generate_cpp_functions
+require_relative "../../codegen/generate_functions"
+generate_functions
 # create makefile
 create_makefile("torch/ext")

data/ext/torch/nn_functions.h ADDED

@@ -0,0 +1,6 @@
+// generated by rake generate:functions
+// do not edit by hand
+#pragma once
+void add_nn_functions(Module m);

data/ext/torch/ruby_arg_parser.cpp ADDED

@@ -0,0 +1,593 @@
+// adapted from PyTorch - python_arg_parser.cpp
+#include "ruby_arg_parser.h"
+VALUE THPVariableClass = Qnil;
+static std::unordered_map<std::string, ParameterType> type_map = {
+  {"Tensor", ParameterType::TENSOR},
+  {"Scalar", ParameterType::SCALAR},
+  {"int64_t", ParameterType::INT64},
+  {"double", ParameterType::DOUBLE},
+  {"complex", ParameterType::COMPLEX},
+  {"TensorList", ParameterType::TENSOR_LIST},
+  {"IntArrayRef", ParameterType::INT_LIST},
+  {"ArrayRef<double>", ParameterType::FLOAT_LIST},
+  {"Generator", ParameterType::GENERATOR},
+  {"bool", ParameterType::BOOL},
+  {"Storage", ParameterType::STORAGE},
+  // {"PyObject*", ParameterType::PYOBJECT},
+  {"ScalarType", ParameterType::SCALARTYPE},
+  {"Layout", ParameterType::LAYOUT},
+  {"MemoryFormat", ParameterType::MEMORY_FORMAT},
+  {"QScheme", ParameterType::QSCHEME},
+  {"Device", ParameterType::DEVICE},
+  {"std::string", ParameterType::STRING},
+  {"Dimname", ParameterType::DIMNAME},
+  {"DimnameList", ParameterType::DIMNAME_LIST},
+};
+static const std::unordered_map<std::string, std::vector<std::string>> numpy_compatibility_arg_names = {
+  {"dim", {"axis"}},
+  {"keepdim", {"keepdims"}},
+  {"input", {"x", "a", "x1"}},
+  {"other", {"x2"}},
+};
+static bool should_allow_numbers_as_tensors(const std::string& name) {
+  static std::unordered_set<std::string> allowed = {
+    "add", "add_", "add_out",
+    "div", "div_", "div_out",
+    "mul", "mul_", "mul_out",
+    "sub", "sub_", "sub_out",
+    "true_divide", "true_divide_", "true_divide_out",
+    "floor_divide", "floor_divide_", "floor_divide_out"
+  };
+  return allowed.find(name) != allowed.end();
+}
+FunctionParameter::FunctionParameter(const std::string& fmt, bool keyword_only)
+  : optional(false)
+  , allow_none(false)
+  , keyword_only(keyword_only)
+  , size(0)
+  , default_scalar(0)
+{
+  auto space = fmt.find(' ');
+  if (space == std::string::npos) {
+    throw std::runtime_error("FunctionParameter(): missing type: " + fmt);
+  }
+  auto type_str = fmt.substr(0, space);
+  auto question = type_str.find('?');
+  if (question != std::string::npos) {
+    allow_none = true;
+    type_str = type_str.substr(0, question);
+  }
+  // Parse and remove brackets from type_str
+  auto bracket = type_str.find('[');
+  if (bracket != std::string::npos) {
+    auto size_str = type_str.substr(bracket + 1, type_str.length() - bracket - 2);
+    size = atoi(size_str.c_str());
+    type_str = type_str.substr(0, bracket);
+  }
+  auto name_str = fmt.substr(space + 1);
+  auto it = type_map.find(type_str);
+  if (it == type_map.end()) {
+    throw std::runtime_error("FunctionParameter(): invalid type string: " + type_str);
+  }
+  type_ = it->second;
+  auto eq = name_str.find('=');
+  if (eq != std::string::npos) {
+    name = name_str.substr(0, eq);
+    optional = true;
+    set_default_str(name_str.substr(eq + 1));
+  } else {
+    name = name_str;
+  }
+  ruby_name = THPUtils_internSymbol(name);
+  auto np_compat_it = numpy_compatibility_arg_names.find(name);
+  if (np_compat_it != numpy_compatibility_arg_names.end()) {
+    for (const auto& str: np_compat_it->second) {
+      numpy_python_names.push_back(THPUtils_internSymbol(str));
+    }
+  }
+}
+bool is_tensor_list(VALUE obj, int argnum, bool throw_error) {
+  if (!RB_TYPE_P(obj, T_ARRAY)) {
+    return false;
+  }
+  auto size = RARRAY_LEN(obj);
+  for (int idx = 0; idx < size; idx++) {
+    VALUE iobj = rb_ary_entry(obj, idx);
+    if (!THPVariable_Check(iobj)) {
+      if (throw_error) {
+        rb_raise(rb_eArgError, "expected Tensor as element %d in argument %d, but got %s",
+            static_cast<int>(idx), argnum, rb_obj_classname(obj));
+      }
+      return false;
+    }
+  }
+  return true;
+}
+// argnum is needed for raising the TypeError, it's used in the error message.
+auto FunctionParameter::check(VALUE obj, int argnum) -> bool
+{
+  switch (type_) {
+    case ParameterType::TENSOR: {
+      if (THPVariable_Check(obj)) {
+        return true;
+      }
+      return allow_numbers_as_tensors && THPUtils_checkScalar(obj);
+    }
+    case ParameterType::SCALAR:
+    case ParameterType::COMPLEX:
+      if (RB_TYPE_P(obj, T_COMPLEX)) {
+        return true;
+      }
+      // fallthrough
+    case ParameterType::DOUBLE: {
+      if (RB_FLOAT_TYPE_P(obj) || FIXNUM_P(obj)) {
+        return true;
+      }
+      if (THPVariable_Check(obj)) {
+        auto var = from_ruby<torch::Tensor>(obj);
+        return !var.requires_grad() && var.dim() == 0;
+      }
+      return false;
+    }
+    case ParameterType::INT64: {
+      if (FIXNUM_P(obj)) {
+        return true;
+      }
+      if (THPVariable_Check(obj)) {
+        auto var = from_ruby<torch::Tensor>(obj);
+        return at::isIntegralType(var.scalar_type(), /*includeBool=*/false) && !var.requires_grad() && var.dim() == 0;
+      }
+      return false;
+    }
+    case ParameterType::DIMNAME: return false; // return THPUtils_checkDimname(obj);
+    case ParameterType::DIMNAME_LIST: {
+      return false;
+      // if (THPUtils_checkDimnameList(obj)) {
+      //   return true;
+      // }
+      // // if a size is specified (e.g. DimnameList[1]) we also allow passing a single Dimname
+      // return size == 1 && THPUtils_checkDimname(obj);
+    }
+    case ParameterType::TENSOR_LIST: {
+      return is_tensor_list(obj, argnum, true /* throw_error */);
+    }
+    case ParameterType::INT_LIST: {
+      if (RB_TYPE_P(obj, T_ARRAY)) {
+        return true;
+      }
+      // if a size is specified (e.g. IntArrayRef[2]) we also allow passing a single int
+      return size > 0 && FIXNUM_P(obj);
+    }
+    case ParameterType::FLOAT_LIST: return (RB_TYPE_P(obj, T_ARRAY));
+    case ParameterType::GENERATOR: return false; // return THPGenerator_Check(obj);
+    case ParameterType::BOOL: return obj == Qtrue || obj == Qfalse;
+    case ParameterType::STORAGE: return false; // return isStorage(obj);
+    // case ParameterType::PYOBJECT: return true;
+    case ParameterType::SCALARTYPE: return SYMBOL_P(obj);
+    case ParameterType::LAYOUT: return SYMBOL_P(obj);
+    case ParameterType::MEMORY_FORMAT: return false; // return THPMemoryFormat_Check(obj);
+    case ParameterType::QSCHEME: return false; // return THPQScheme_Check(obj);
+    case ParameterType::DEVICE: return RB_TYPE_P(obj, T_STRING); // TODO check device
+    case ParameterType::STRING: return RB_TYPE_P(obj, T_STRING);
+    default: throw std::runtime_error("unknown parameter type");
+  }
+}
+std::string FunctionParameter::type_name() const {
+  switch (type_) {
+    case ParameterType::TENSOR: return "Tensor";
+    case ParameterType::SCALAR: return "Number";
+    case ParameterType::INT64: return "int";
+    case ParameterType::DOUBLE: return "float";
+    case ParameterType::COMPLEX: return "complex";
+    case ParameterType::TENSOR_LIST: return "array of Tensors";
+    case ParameterType::INT_LIST: return "array of ints";
+    case ParameterType::FLOAT_LIST: return "array of floats";
+    case ParameterType::GENERATOR: return "torch.Generator";
+    case ParameterType::BOOL: return "bool";
+    case ParameterType::STORAGE: return "torch.Storage";
+    // case ParameterType::PYOBJECT: return "object";
+    case ParameterType::SCALARTYPE: return "torch.dtype";
+    case ParameterType::LAYOUT: return "torch.layout";
+    case ParameterType::MEMORY_FORMAT: return "torch.memory_format";
+    case ParameterType::QSCHEME: return "torch.qscheme";
+    case ParameterType::DEVICE: return "torch.device";
+    case ParameterType::STRING: return "str";
+    case ParameterType::DIMNAME: return "name";
+    case ParameterType::DIMNAME_LIST: return "array of names";
+    default: throw std::runtime_error("unknown parameter type");
+  }
+}
+static inline c10::optional<int64_t> parse_as_integer(const std::string& s) {
+  if (s.empty())
+    return c10::nullopt;
+  char *str_end;
+  long ans = strtol(s.c_str(), &str_end, 0);
+  // *str_end == 0 if the entire string was parsed as an integer.
+  return (*str_end == 0) ? c10::optional<int64_t>(ans) : c10::nullopt;
+}
+/*
+Parse default value of IntArrayRef declared at native_functions.yaml
+There are two kinds of default values:
+1. IntArrayRef[2] x=1 (where size=2, value={1,1}
+2. IntArrayRef x={1,2,3} (where size=3, value={1,2,3}, note that there cannot be space after comma since native_parse.py uses ', ' to split args)
+*/
+static inline std::vector<int64_t> parse_intlist_args(const std::string& s, int64_t size) {
+  size_t n = s.size();
+  if (s.empty()) return std::vector<int64_t>();
+  // case 1. s is an int (e.g., s=2)
+  if (s[0] != '{') {
+    return std::vector<int64_t>(size, std::stol(s));
+  }
+  // case 2. s is a list of dims (e.g., s={1,2})
+  // since already checked left brace '{' above, here only checks right brace '}'
+  TORCH_CHECK(s[n - 1] == '}', "Default value of IntArrayRef is missing right brace '}', found ", s[n - 1]);
+  auto args = std::vector<int64_t>();
+  std::istringstream ss(s.substr(1, s.length() - 2)); // exclude '{' and '}'
+  std::string tok;
+  while(std::getline(ss, tok, ',')) {
+    args.emplace_back(std::stol(tok));
+  }
+  return args;
+}
+void FunctionParameter::set_default_str(const std::string& str) {
+  if (str == "None") {
+    allow_none = true;
+  }
+  if (type_ == ParameterType::TENSOR) {
+    if (str != "None") {
+      throw std::runtime_error("default value for Tensor must be none, got: " + str);
+    }
+  } else if (type_ == ParameterType::INT64) {
+    default_int = atol(str.c_str());
+  } else if (type_ == ParameterType::BOOL) {
+    default_bool = (str == "True" || str == "true");
+  } else if (type_ == ParameterType::DOUBLE) {
+    default_double = atof(str.c_str());
+  } else if (type_ == ParameterType::COMPLEX) {
+    default_complex[0] = atof(str.c_str()); // TODO: parse "x + xj"?
+    default_complex[1] = 0;
+  } else if (type_ == ParameterType::SCALAR) {
+    if (str != "None") {
+      // we sometimes rely on integer-vs-float values, e.g. with arange.
+      const auto as_integer = parse_as_integer(str);
+      default_scalar = as_integer.has_value() ? at::Scalar(as_integer.value()) :
+                                                at::Scalar(atof(str.c_str()));
+    }
+  } else if (type_ == ParameterType::INT_LIST) {
+    if (str != "None") {
+      default_intlist = parse_intlist_args(str, size);
+    }
+  } else if (type_ == ParameterType::FLOAT_LIST) {
+    if (str != "None") {
+      throw std::runtime_error("Defaults not supported for float[]");
+    }
+  } else if (type_ == ParameterType::SCALARTYPE) {
+    if (str == "None") {
+      default_scalartype = at::ScalarType::Undefined;
+    } else if (str == "torch.int64") {
+      default_scalartype = at::ScalarType::Long;
+    } else {
+      throw std::runtime_error("invalid default value for ScalarType: " + str);
+    }
+  } else if (type_ == ParameterType::LAYOUT) {
+    if (str == "None") {
+      TORCH_INTERNAL_ASSERT_DEBUG_ONLY(allow_none);
+    } else if (str == "torch.strided") {
+      default_layout = at::Layout::Strided;
+    } else if (str == "torch.sparse_coo") {
+      default_layout = at::Layout::Sparse;
+    } else {
+      throw std::runtime_error("invalid default value for layout: " + str);
+    }
+  } else if (type_ == ParameterType::DEVICE) {
+    if (str != "None") {
+      throw std::runtime_error("invalid device: " + str);
+    }
+  } else if (type_ == ParameterType::STRING) {
+    if (str != "None" && str != "") {
+      throw std::runtime_error("invalid default string: " + str);
+    }
+  }
+}
+FunctionSignature::FunctionSignature(const std::string& fmt, int index)
+  : min_args(0)
+  , max_args(0)
+  , max_pos_args(0)
+  , index(index)
+  , hidden(false)
+  , deprecated(false)
+{
+  auto open_paren = fmt.find('(');
+  if (open_paren == std::string::npos) {
+    throw std::runtime_error("missing opening parenthesis: " + fmt);
+  }
+  name = fmt.substr(0, open_paren);
+  bool allow_numbers_as_tensors = should_allow_numbers_as_tensors(name);
+  auto last_offset = open_paren + 1;
+  auto next_offset = last_offset;
+  bool keyword_only = false;
+  bool done = false;
+  while (!done) {
+    auto offset = fmt.find(", ", last_offset);
+    if (offset == std::string::npos) {
+      offset = fmt.find(')', last_offset);
+      done = true;
+      next_offset = offset+ 1;
+      // this 'if' happens for an empty parameter list, i.e. fn().
+      if (offset == last_offset) {
+        last_offset = next_offset;
+        break;
+      }
+    } else {
+      next_offset = offset + 2;
+    }
+    if (offset == std::string::npos) {
+      throw std::runtime_error("missing closing parenthesis: " + fmt);
+    }
+    if (offset == last_offset) {
+      throw std::runtime_error("malformed signature: " + fmt);
+    }
+    auto param_str = fmt.substr(last_offset, offset - last_offset);
+    last_offset = next_offset;
+    if (param_str == "*") {
+      keyword_only = true;
+    } else {
+      params.emplace_back(param_str, keyword_only);
+      params.back().allow_numbers_as_tensors = allow_numbers_as_tensors;
+    }
+  }
+  if (fmt.substr(last_offset) == "|deprecated") {
+    hidden = true;
+    // TODO: raise warning when parsing deprecated signatures
+    deprecated = true;
+  } else if (fmt.substr(last_offset) == "|hidden") {
+    hidden = true;
+  }
+  max_args = params.size();
+  // count the number of non-optional args
+  for (auto& param : params) {
+    if (!param.optional) {
+      min_args++;
+    }
+    if (!param.keyword_only) {
+      max_pos_args++;
+    }
+  }
+}
+std::string FunctionSignature::toString() const {
+  // TODO: consider printing more proper schema strings with defaults, optionals, etc.
+  std::ostringstream ss;
+  bool keyword_already = false;
+  ss << "(";
+  int i = 0;
+  for (auto& param : params) {
+    if (i != 0) {
+      ss << ", ";
+    }
+    if (param.keyword_only && !keyword_already) {
+      ss << "*, ";
+      keyword_already = true;
+    }
+    ss << param.type_name() << " " << param.name;
+    i++;
+  }
+  ss << ")";
+  return ss.str();
+}
+[[noreturn]]
+static void extra_args(const FunctionSignature& signature, ssize_t nargs) {
+  const long max_pos_args = signature.max_pos_args;
+  const long min_args = signature.min_args;
+  const long nargs_ = nargs;
+  if (min_args != max_pos_args) {
+    rb_raise(rb_eArgError, "%s() takes from %ld to %ld positional arguments but %ld were given",
+        signature.name.c_str(), min_args, max_pos_args, nargs_);
+  }
+  rb_raise(rb_eArgError, "%s() takes %ld positional argument%s but %ld %s given",
+      signature.name.c_str(),
+      max_pos_args, max_pos_args == 1 ? "" : "s",
+      nargs_, nargs == 1 ? "was" : "were");
+}
+[[noreturn]]
+static void missing_args(const FunctionSignature& signature, int idx) {
+  int num_missing = 0;
+  std::stringstream ss;
+  auto& params = signature.params;
+  for (auto it = params.begin() + idx; it != params.end(); ++it) {
+    if (!it->optional) {
+      if (num_missing > 0) {
+        ss << ", ";
+      }
+      ss << '"' << it->name << '"';
+      num_missing++;
+    }
+  }
+  rb_raise(rb_eArgError, "%s() missing %d required positional argument%s: %s",
+      signature.name.c_str(),
+      num_missing,
+      num_missing == 1 ? "s" : "",
+      ss.str().c_str());
+}
+static ssize_t find_param(FunctionSignature& signature, VALUE name) {
+  ssize_t i = 0;
+  for (auto& param : signature.params) {
+    bool cmp = name == param.ruby_name;
+    if (cmp) {
+      return i;
+    }
+    i++;
+  }
+  return -1;
+}
+[[noreturn]]
+static void extra_kwargs(FunctionSignature& signature, VALUE kwargs, ssize_t num_pos_args) {
+  VALUE key;
+  VALUE keys = rb_funcall(kwargs, rb_intern("keys"), 0);
+  if (RARRAY_LEN(keys) > 0) {
+    key = rb_ary_entry(keys, 0);
+    if (!THPUtils_checkSymbol(key)) {
+      rb_raise(rb_eArgError, "keywords must be symbols, not %s", rb_obj_classname(key));
+    }
+    auto param_idx = find_param(signature, key);
+    if (param_idx < 0) {
+      rb_raise(rb_eArgError, "%s() got an unexpected keyword argument '%s'",
+          signature.name.c_str(), THPUtils_unpackSymbol(key).c_str());
+    }
+    if (param_idx < num_pos_args) {
+      rb_raise(rb_eArgError, "%s() got multiple values for argument '%s'",
+          signature.name.c_str(), THPUtils_unpackSymbol(key).c_str());
+    }
+  }
+  // this should never be hit
+  rb_raise(rb_eArgError, "invalid keyword arguments");
+}
+VALUE missing = Qundef;
+bool FunctionSignature::parse(VALUE self, VALUE args, VALUE kwargs, std::vector<VALUE> &dst,  // NOLINT
+                              bool raise_exception) {
+  auto nargs = NIL_P(args) ? 0 : RARRAY_LEN(args);
+  ssize_t remaining_kwargs = NIL_P(kwargs) ? 0 :  RHASH_SIZE(kwargs);
+  ssize_t arg_pos = 0;
+  bool allow_varargs_intlist = false;
+  // if there is a single positional IntArrayRef argument, i.e. expand(..), view(...),
+  // allow a var-args style IntArrayRef, so expand(5,3) behaves as expand((5,3))
+  if (max_pos_args == 1 && params[0].type_ == ParameterType::INT_LIST) {
+    allow_varargs_intlist = true;
+  }
+  if (nargs > max_pos_args && !allow_varargs_intlist) {
+    if (raise_exception) {
+      // foo() takes takes 2 positional arguments but 3 were given
+      extra_args(*this, nargs);
+    }
+    return false;
+  }
+  // if (!overloaded_args.empty()) {
+  //   overloaded_args.clear();
+  // }
+  int i = 0;
+  // if (self != nullptr && !THPVariable_CheckExact(self) && check_has_torch_function(self)) {
+  //   append_overloaded_arg(&this->overloaded_args, self);
+  // }
+  for (auto& param : params) {
+    VALUE obj = missing;
+    bool is_kwd = false;
+    if (arg_pos < nargs) {
+      // extra positional args given after single positional IntArrayRef arg
+      if (param.keyword_only) {
+        if (raise_exception) {
+          extra_args(*this, nargs);
+        }
+        return false;
+      }
+      obj = rb_ary_entry(args, arg_pos);
+    } else if (!NIL_P(kwargs)) {
+      obj = rb_hash_lookup2(kwargs, param.ruby_name, missing);
+      // for (VALUE numpy_name: param.numpy_python_names) {
+      //   if (obj) {
+      //     break;
+      //   }
+      //   obj = rb_hash_aref(kwargs, numpy_name);
+      // }
+      is_kwd = true;
+    }
+    if ((obj == missing && param.optional) || (NIL_P(obj) && param.allow_none)) {
+      dst[i++] = Qnil;
+    } else if (obj == missing) {
+      if (raise_exception) {
+        // foo() missing 1 required positional argument: "b"
+        missing_args(*this, i);
+      }
+      return false;
+    } else if (param.check(obj, i)) {
+      dst[i++] = obj;
+    // XXX: the Variable check is necessary because sizes become tensors when
+    // tracer is enabled. This behavior easily leads to ambiguities, and we
+    // should avoid having complex signatures that make use of it...
+    } else if (allow_varargs_intlist && arg_pos == 0 && !is_kwd &&
+               THPUtils_checkIndex(obj)) {
+      // take all positional arguments as this parameter
+      // e.g. permute(1, 2, 3) -> permute((1, 2, 3))
+      dst[i++] = args;
+      arg_pos = nargs;
+      continue;
+    } else if (raise_exception) {
+      if (is_kwd) {
+        // foo(): argument 'other' must be str, not int
+        rb_raise(rb_eArgError, "%s(): argument '%s' must be %s, not %s",
+            name.c_str(), param.name.c_str(), param.type_name().c_str(),
+            rb_obj_classname(obj));
+      } else {
+        // foo(): argument 'other' (position 2) must be str, not int
+        rb_raise(rb_eArgError, "%s(): argument '%s' (position %ld) must be %s, not %s",
+            name.c_str(), param.name.c_str(), static_cast<long>(arg_pos + 1),
+            param.type_name().c_str(), rb_obj_classname(obj));
+      }
+    } else {
+      return false;
+    }
+    if (!is_kwd) {
+      arg_pos++;
+    } else if (obj != missing) {
+      remaining_kwargs--;
+    }
+  }
+  if (remaining_kwargs > 0) {
+    if (raise_exception) {
+      // foo() got an unexpected keyword argument "b"
+      extra_kwargs(*this, kwargs, nargs);
+    }
+    return false;
+  }
+  return true;
+}