RubyGems - torch-rb - Versions diffs - 0.1.0 → 0.1.5 - Mend

torch-rb 0.1.0 → 0.1.5

Files changed (94) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +40 -0
data/LICENSE.txt +46 -22
data/README.md +85 -19
data/ext/torch/ext.cpp +274 -256
data/ext/torch/extconf.rb +9 -0
data/ext/torch/nn_functions.cpp +595 -0
data/ext/torch/nn_functions.hpp +6 -0
data/ext/torch/templates.hpp +250 -0
data/ext/torch/tensor_functions.cpp +1860 -0
data/ext/torch/tensor_functions.hpp +6 -0
data/ext/torch/torch_functions.cpp +2875 -0
data/ext/torch/torch_functions.hpp +6 -0
data/lib/torch.rb +199 -84
data/lib/torch/ext.bundle +0 -0
data/lib/torch/inspector.rb +52 -25
data/lib/torch/native/dispatcher.rb +48 -0
data/lib/torch/native/function.rb +78 -0
data/lib/torch/native/generator.rb +149 -0
data/lib/torch/native/native_functions.yaml +6837 -0
data/lib/torch/native/parser.rb +97 -0
data/lib/torch/nn/alpha_dropout.rb +9 -0
data/lib/torch/nn/avg_pool2d.rb +14 -0
data/lib/torch/nn/avg_poolnd.rb +9 -0
data/lib/torch/nn/bce_loss.rb +13 -0
data/lib/torch/nn/bce_with_logits_loss.rb +15 -0
data/lib/torch/nn/bilinear.rb +38 -0
data/lib/torch/nn/conv2d.rb +14 -29
data/lib/torch/nn/convnd.rb +41 -0
data/lib/torch/nn/cosine_embedding_loss.rb +14 -0
data/lib/torch/nn/cosine_similarity.rb +15 -0
data/lib/torch/nn/cross_entropy_loss.rb +14 -0
data/lib/torch/nn/ctc_loss.rb +15 -0
data/lib/torch/nn/dropout.rb +9 -0
data/lib/torch/nn/dropout2d.rb +9 -0
data/lib/torch/nn/dropout3d.rb +9 -0
data/lib/torch/nn/dropoutnd.rb +15 -0
data/lib/torch/nn/embedding.rb +52 -0
data/lib/torch/nn/embedding_bag.rb +34 -0
data/lib/torch/nn/feature_alpha_dropout.rb +9 -0
data/lib/torch/nn/functional.rb +194 -11
data/lib/torch/nn/hinge_embedding_loss.rb +14 -0
data/lib/torch/nn/identity.rb +14 -0
data/lib/torch/nn/init.rb +58 -1
data/lib/torch/nn/kl_div_loss.rb +13 -0
data/lib/torch/nn/l1_loss.rb +13 -0
data/lib/torch/nn/leaky_relu.rb +20 -0
data/lib/torch/nn/linear.rb +12 -11
data/lib/torch/nn/log_softmax.rb +14 -0
data/lib/torch/nn/loss.rb +10 -0
data/lib/torch/nn/margin_ranking_loss.rb +14 -0
data/lib/torch/nn/max_pool2d.rb +9 -0
data/lib/torch/nn/max_poolnd.rb +19 -0
data/lib/torch/nn/module.rb +184 -19
data/lib/torch/nn/mse_loss.rb +2 -2
data/lib/torch/nn/multi_label_margin_loss.rb +13 -0
data/lib/torch/nn/multi_label_soft_margin_loss.rb +13 -0
data/lib/torch/nn/multi_margin_loss.rb +17 -0
data/lib/torch/nn/nll_loss.rb +14 -0
data/lib/torch/nn/pairwise_distance.rb +16 -0
data/lib/torch/nn/parameter.rb +4 -0
data/lib/torch/nn/poisson_nll_loss.rb +16 -0
data/lib/torch/nn/prelu.rb +19 -0
data/lib/torch/nn/relu.rb +8 -3
data/lib/torch/nn/rnn.rb +22 -0
data/lib/torch/nn/rnn_base.rb +154 -0
data/lib/torch/nn/sequential.rb +1 -10
data/lib/torch/nn/sigmoid.rb +9 -0
data/lib/torch/nn/smooth_l1_loss.rb +13 -0
data/lib/torch/nn/soft_margin_loss.rb +13 -0
data/lib/torch/nn/softmax.rb +18 -0
data/lib/torch/nn/softmax2d.rb +10 -0
data/lib/torch/nn/softmin.rb +14 -0
data/lib/torch/nn/softplus.rb +19 -0
data/lib/torch/nn/triplet_margin_loss.rb +18 -0
data/lib/torch/nn/weighted_loss.rb +10 -0
data/lib/torch/optim/adadelta.rb +57 -0
data/lib/torch/optim/adagrad.rb +71 -0
data/lib/torch/optim/adam.rb +81 -0
data/lib/torch/optim/adamax.rb +68 -0
data/lib/torch/optim/adamw.rb +82 -0
data/lib/torch/optim/asgd.rb +65 -0
data/lib/torch/optim/lr_scheduler/lr_scheduler.rb +33 -0
data/lib/torch/optim/lr_scheduler/step_lr.rb +17 -0
data/lib/torch/optim/optimizer.rb +62 -0
data/lib/torch/optim/rmsprop.rb +76 -0
data/lib/torch/optim/rprop.rb +68 -0
data/lib/torch/optim/sgd.rb +60 -0
data/lib/torch/random.rb +10 -0
data/lib/torch/tensor.rb +92 -21
data/lib/torch/utils/data/data_loader.rb +15 -0
data/lib/torch/utils/data/tensor_dataset.rb +8 -1
data/lib/torch/version.rb +1 -1
metadata +74 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 38e16e7f07d004fd9625f168694356d551c79cbc62b0131fe1403e4c0995f296
-  data.tar.gz: 66bf6ae0e4dd373a7542fbfb1cfb9dbd89fc455e16166e6a76d0945b32fecf38
+  metadata.gz: 6b47306ed525e1a20d25cb8324d4658f750c18afa5704c9b7bafc215d8f568c1
+  data.tar.gz: dad6ddf955b111989b061e5af146006a32c83dc1ea1ca5005a6b6e34bc9a4892
 SHA512:
-  metadata.gz: d100e3a21ac877fe93ac61e9b5e0d8a5e61126684fc037dda3e9f703b040188b1e1523aa4111dff4aaf92ada1001597c5f60674b9583b14d31afd18dbf1ff18d
-  data.tar.gz: c234dee79e26d3ee25ade2aaddd75f155dea6d59d8b9c5af2c571423a7aaa8a6489f5cfce89f09f390468a951b1644a4212c19525a79816be09214f0938860a8
+  metadata.gz: 5d26e3642bf7cd921b9b570052df353d4c32b1bd955a6fbbf5f30249631fa4c0d4624f4fa91a1c06f61b3b0d6461cd117ab4df185cf013e915d2f63e52dbcf7c
+  data.tar.gz: 1728ce9b579f41f7a567e63d7256c82bb352840b67f16d88aac930a99e5abbf5a5f4061c5f9da16fb47d1664567e7956d276a8b2b44f13d2263032486afb53e8

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,43 @@
+## 0.1.5 (2019-12-06)
+- Added many more functions
+- Added tensor classes - `FloatTensor`, `LongTensor`, etc
+- Improved modules
+## 0.1.4 (2019-12-01)
+- Added distance functions
+- Added more activations
+- Added more linear layers
+- Added more loss functions
+- Added more init methods
+- Added support for tensor assignment
+## 0.1.3 (2019-11-30)
+- Changed to BSD 3-Clause license to match PyTorch
+- Added many optimizers
+- Added `StepLR` learning rate scheduler
+- Added dropout
+- Added embedding
+- Added support for `bool` type
+- Improved performance of `from_numo`
+## 0.1.2 (2019-11-27)
+- Added SGD optimizer
+- Added support for gradient to `backward` method
+- Added `argmax`, `eq`, `leaky_relu`, `prelu`, and `reshape` methods
+- Improved indexing
+- Fixed `zero_grad`
+- Fixed error with infinite values
+## 0.1.1 (2019-11-26)
+- Added support for `uint8` and `int8` types
+- Fixed `undefined symbol` error on Linux
+- Fixed C++ error messages
 ## 0.1.0 (2019-11-26)
 - First release

data/LICENSE.txt CHANGED Viewed

@@ -1,22 +1,46 @@
-Copyright (c) 2019 Andrew Kane
-MIT License
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+BSD 3-Clause License
+From Torch-rb:
+Copyright (c) 2019-     Andrew Kane
+From PyTorch (for ported code):
+Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
+Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
+Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
+Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
+Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
+Copyright (c) 2011-2013 NYU                      (Clement Farabet)
+Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
+Copyright (c) 2006      Idiap Research Institute (Samy Bengio)
+Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
+   and IDIAP Research Institute nor the names of its contributors may be
+   used to endorse or promote products derived from this software without
+   specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.

data/README.md CHANGED Viewed

@@ -2,14 +2,16 @@
 :fire: Deep learning for Ruby, powered by [LibTorch](https://pytorch.org)
-**Note:** This gem is currently experimental. There may be breaking changes between each release.
+This gem is currently experimental. There may be breaking changes between each release. Please report any issues you experience.
+[![Build Status](https://travis-ci.org/ankane/torch-rb.svg?branch=master)](https://travis-ci.org/ankane/torch-rb)
 ## Installation
 First, [install LibTorch](#libtorch-installation). For Homebrew, use:
 ```sh
-brew install ankane/brew/libtorch
+brew install libtorch
 ```
 Add this line to your application’s Gemfile:
@@ -18,6 +20,8 @@ Add this line to your application’s Gemfile:
 gem 'torch-rb'
 ```
+It can take a few minutes to compile the extension.
 ## Getting Started
 This library follows the [PyTorch API](https://pytorch.org/docs/stable/torch.html). There are a few changes to make it more Ruby-like:
@@ -26,9 +30,11 @@ This library follows the [PyTorch API](https://pytorch.org/docs/stable/torch.htm
 - Methods that return booleans use `?` instead of `is_`  (`tensor?` instead of `is_tensor`)
 - Numo is used instead of NumPy (`x.numo` instead of `x.numpy()`)
-Many methods and options are missing at the moment. PRs welcome!
+Some methods and options are missing at the moment. PRs welcome!
-Some examples below are from [Deep Learning with PyTorch: A 60 Minutes Blitz](https://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html).
+## Tutorial
+Some examples below are from [Deep Learning with PyTorch: A 60 Minutes Blitz](https://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html)
 ### Tensors
@@ -143,7 +149,7 @@ Convert a Numo array to a tensor
 ```ruby
 b = Numo::NArray.cast([1, 2, 3])
-Torch.from_numpy(b)
+Torch.from_numo(b)
 ```
 ### Autograd
@@ -171,17 +177,17 @@ out.backward
 Get gradients
 ```ruby
-x.grad
+x.grad # tensor([[4.5, 4.5], [4.5, 4.5]])
 ```
 Stop autograd from tracking history
 ```ruby
 x.requires_grad # true
-(x ** 2).requires_grad # true
+(x**2).requires_grad # true
 Torch.no_grad do
-  (x ** 2).requires_grad # false
+  (x**2).requires_grad # false
 end
 ```
@@ -221,7 +227,7 @@ class Net < Torch::NN::Module
 end
 ```
-And run
+Create an instance of it
 ```ruby
 net = Net.new
@@ -229,6 +235,58 @@ input = Torch.randn(1, 1, 32, 32)
 net.call(input)
 ```
+Get trainable parameters
+```ruby
+net.parameters
+```
+Zero the gradient buffers and backprop with random gradients
+```ruby
+net.zero_grad
+out.backward(Torch.randn(1, 10))
+```
+Define a loss function
+```ruby
+output = net.call(input)
+target = Torch.randn(10)
+target = target.view(1, -1)
+criterion = Torch::NN::MSELoss.new
+loss = criterion.call(output, target)
+```
+Backprop
+```ruby
+net.zero_grad
+p net.conv1.bias.grad
+loss.backward
+p net.conv1.bias.grad
+```
+Update the weights
+```ruby
+learning_rate = 0.01
+net.parameters.each do |f|
+  f.data.sub!(f.grad.data * learning_rate)
+end
+```
+Use an optimizer
+```ruby
+optimizer = Torch::Optim::SGD.new(net.parameters, lr: 0.01)
+optimizer.zero_grad
+output = net.call(input)
+loss = criterion.call(output, target)
+loss.backward
+optimizer.step
+```
 ### Tensor Creation
 Here’s a list of functions to create tensors (descriptions from the [C++ docs](https://pytorch.org/cppdocs/notes/tensor_creation.html)):
@@ -242,7 +300,7 @@ Here’s a list of functions to create tensors (descriptions from the [C++ docs]
 - `empty` returns a tensor with uninitialized values
   ```ruby
-  Torch.empty(3)
+  Torch.empty(3) # tensor([7.0054e-45, 0.0000e+00, 0.0000e+00])
   ```
 - `eye` returns an identity matrix
@@ -278,19 +336,19 @@ Here’s a list of functions to create tensors (descriptions from the [C++ docs]
 - `rand` returns a tensor filled with values drawn from a uniform distribution on [0, 1)
   ```ruby
-  Torch.rand(3)
+  Torch.rand(3) # tensor([0.5444, 0.8799, 0.5571])
   ```
 - `randint` returns a tensor with integers randomly drawn from an interval
   ```ruby
-  Torch.randint(1, 10, [3])
+  Torch.randint(1, 10, [3]) # tensor([7, 6, 4])
   ```
 - `randn` returns a tensor filled with values drawn from a unit normal distribution
   ```ruby
-  Torch.randn(3)
+  Torch.randn(3) # tensor([-0.7147,  0.6614,  1.1453])
   ```
 - `randperm` returns a tensor filled with a random permutation of integers in some interval
@@ -305,12 +363,20 @@ Here’s a list of functions to create tensors (descriptions from the [C++ docs]
   Torch.zeros(3) # tensor([0, 0, 0])
   ```
+## Examples
+Here are a few full examples:
+- [Image classification with MNIST](examples/mnist) ([日本語版](https://qiita.com/kojix2/items/c19c36dc1bf73ea93409))
+- [Collaborative filtering with MovieLens](examples/movielens)
+- [Word embeddings](examples/nlp)
 ## LibTorch Installation
-[Download LibTorch](https://pytorch.org/) and run:
+[Download LibTorch](https://pytorch.org/). For Linux, use the `cxx11 ABI` version. Then run:
 ```sh
-gem install torch-rb -- --with-torch-dir=/path/to/libtorch
+bundle config build.torch-rb --with-torch-dir=/path/to/libtorch
 ```
 ### Homebrew
@@ -318,10 +384,10 @@ gem install torch-rb -- --with-torch-dir=/path/to/libtorch
 For Mac, you can use Homebrew.
 ```sh
-brew install ankane/brew/libtorch
+brew install libtorch
 ```
-Then install the gem (no need for `--with-torch-dir`).
+Then install the gem (no need for `bundle config`).
 ## rbenv
@@ -349,9 +415,9 @@ To get started with development:
 ```sh
 git clone https://github.com/ankane/torch-rb.git
-cd torch
+cd torch-rb
 bundle install
-bundle exec rake compile
+bundle exec rake compile -- --with-torch-dir=/path/to/libtorch
 bundle exec rake test
 ```

data/ext/torch/ext.cpp CHANGED Viewed

@@ -6,95 +6,36 @@
 #include <rice/Class.hpp>
 #include <rice/Constructor.hpp>
-using namespace Rice;
+#include "templates.hpp"
-template<>
-inline
-long long from_ruby<long long>(Object x)
-{
-  return NUM2LL(x);
-}
-template<>
-inline
-Object to_ruby<long long>(long long const & x)
-{
-  return LL2NUM(x);
-}
-template<>
-inline
-unsigned long long from_ruby<unsigned long long>(Object x)
-{
-  return NUM2ULL(x);
-}
+// generated with:
+// rake generate:functions
+#include "torch_functions.hpp"
+#include "tensor_functions.hpp"
+#include "nn_functions.hpp"
-template<>
-inline
-Object to_ruby<unsigned long long>(unsigned long long const & x)
-{
-  return ULL2NUM(x);
-}
-template<>
-inline
-short from_ruby<short>(Object x)
-{
-  return NUM2SHORT(x);
-}
-template<>
-inline
-Object to_ruby<short>(short const & x)
-{
-  return INT2NUM(x);
-}
+using namespace Rice;
-template<>
-inline
-unsigned short from_ruby<unsigned short>(Object x)
-{
-  return NUM2USHORT(x);
+Object tensor_array(std::tuple<torch::Tensor, torch::Tensor> x) {
+  Array a;
+  a.push(to_ruby<torch::Tensor>(std::get<0>(x)));
+  a.push(to_ruby<torch::Tensor>(std::get<1>(x)));
+  return Object(a);
 }
-template<>
-inline
-Object to_ruby<unsigned short>(unsigned short const & x)
+extern "C"
+void Init_ext()
 {
-  return UINT2NUM(x);
-}
+  Module rb_mTorch = define_module("Torch");
+  add_torch_functions(rb_mTorch);
-// need to wrap torch::IntArrayRef() since
-// it doesn't own underlying data
-class IntArrayRef {
-  std::vector<int64_t> vec;
-  public:
-    IntArrayRef(Object o) {
-      Array a = Array(o);
-      for (size_t i = 0; i < a.size(); i++) {
-        vec.push_back(from_ruby<int64_t>(a[i]));
-      }
-    }
-    operator torch::IntArrayRef() {
-      return torch::IntArrayRef(vec);
-    }
-};
+  Class rb_cTensor = define_class_under<torch::Tensor>(rb_mTorch, "Tensor");
+  add_tensor_functions(rb_cTensor);
-template<>
-inline
-IntArrayRef from_ruby<IntArrayRef>(Object x)
-{
-  return IntArrayRef(x);
-}
-// for now
-typedef float Scalar;
+  Module rb_mNN = define_module_under(rb_mTorch, "NN");
+  add_nn_functions(rb_mNN);
-extern "C"
-void Init_ext()
-{
-  Module rb_mTorch = define_module("Torch")
-    .define_singleton_method(
+  rb_mTorch.define_singleton_method(
       "grad_enabled?",
       *[]() {
         return torch::GradMode::is_enabled();
@@ -104,11 +45,6 @@ void Init_ext()
       *[](bool enabled) {
         torch::GradMode::set_enabled(enabled);
       })
-    .define_singleton_method(
-      "floating_point?",
-      *[](torch::Tensor& input) {
-        return torch::is_floating_point(input);
-      })
     .define_singleton_method(
       "manual_seed",
       *[](uint64_t seed) {
@@ -178,172 +114,117 @@ void Init_ext()
     // begin operations
     .define_singleton_method(
       "_mean",
-      *[](torch::Tensor& input) {
+      *[](Tensor& input) {
         return torch::mean(input);
       })
     .define_singleton_method(
       "_mean_dim",
-      *[](torch::Tensor& input, int64_t dim, bool keepdim) {
+      *[](Tensor& input, int64_t dim, bool keepdim) {
         return torch::mean(input, dim, keepdim);
       })
     .define_singleton_method(
       "_sum",
-      *[](torch::Tensor& input) {
+      *[](Tensor& input) {
         return torch::sum(input);
       })
     .define_singleton_method(
       "_sum_dim",
-      *[](torch::Tensor& input, int64_t dim, bool keepdim) {
+      *[](Tensor& input, int64_t dim, bool keepdim) {
         return torch::sum(input, dim, keepdim);
       })
     .define_singleton_method(
-      "_norm",
-      *[](torch::Tensor& input) {
-        return torch::norm(input);
-      })
-    .define_singleton_method(
-      "_min",
-      *[](torch::Tensor& input) {
-        return torch::min(input);
+      "_max_out",
+      *[](Tensor &max, Tensor &max_indices, const Tensor &input, int64_t dim, bool keepdim) {
+        return tensor_array(torch::_max_out(max, max_indices, input, dim, keepdim));
       })
     .define_singleton_method(
-      "_max",
-      *[](torch::Tensor& input) {
-        return torch::max(input);
+      "_topk",
+      *[](Tensor& input, int64_t k) {
+        return tensor_array(torch::topk(input, k));
       })
     .define_singleton_method(
-      "_exp",
-      *[](torch::Tensor& input) {
-        return torch::exp(input);
+      "_softmax",
+      *[](const Tensor &input, int64_t dim) {
+        return torch::softmax(input, dim);
       })
     .define_singleton_method(
-      "_log",
-      *[](torch::Tensor& input) {
-        return torch::log(input);
+      "_log_softmax",
+      *[](Tensor& input, int64_t dim) {
+        return torch::log_softmax(input, dim);
       })
     .define_singleton_method(
-      "_unsqueeze",
-      *[](torch::Tensor& input, int64_t dim) {
-        return torch::unsqueeze(input, dim);
-      })
-    .define_singleton_method(
-      "_dot",
-      *[](torch::Tensor& input, torch::Tensor& tensor) {
-        return torch::dot(input, tensor);
-      })
-    .define_singleton_method(
-      "_matmul",
-      *[](torch::Tensor& input, torch::Tensor& other) {
-        return torch::matmul(input, other);
-      })
-    .define_singleton_method(
-      "_add",
-      *[](torch::Tensor& input, torch::Tensor& other) {
-        return torch::add(input, other);
-      })
-    .define_singleton_method(
-      "_add_scalar",
-      *[](torch::Tensor& input, float other) {
-        return torch::add(input, other);
-      })
-    .define_singleton_method(
-      "_add_out",
-      *[](torch::Tensor& out, torch::Tensor& input, torch::Tensor& other) {
-        return torch::add_out(out, input, other);
-      })
-    .define_singleton_method(
-      "_sub",
-      *[](torch::Tensor& input, torch::Tensor& other) {
-        return torch::sub(input, other);
-      })
-    .define_singleton_method(
-      "_sub_scalar",
-      *[](torch::Tensor& input, float other) {
-        return torch::sub(input, other);
-      })
-    .define_singleton_method(
-      "_mul",
-      *[](torch::Tensor& input, torch::Tensor& other) {
-        return torch::mul(input, other);
-      })
-    .define_singleton_method(
-      "_mul_scalar",
-      *[](torch::Tensor& input, float other) {
-        return torch::mul(input, other);
-      })
-    .define_singleton_method(
-      "_div",
-      *[](torch::Tensor& input, torch::Tensor& other) {
-        return torch::div(input, other);
-      })
-    .define_singleton_method(
-      "_div_scalar",
-      *[](torch::Tensor& input, float other) {
-        return torch::div(input, other);
-      })
-    .define_singleton_method(
-      "_remainder",
-      *[](torch::Tensor& input, torch::Tensor& other) {
-        return torch::remainder(input, other);
-      })
-    .define_singleton_method(
-      "_remainder_scalar",
-      *[](torch::Tensor& input, float other) {
-        return torch::remainder(input, other);
+      "relu",
+      *[](Tensor& input) {
+        return torch::relu(input);
       })
     .define_singleton_method(
-      "_pow",
-      *[](torch::Tensor& input, Scalar exponent) {
-        return torch::pow(input, exponent);
+      "prelu",
+      *[](torch::Tensor& input, torch::Tensor& weight) {
+        return torch::prelu(input, weight);
       })
     .define_singleton_method(
-      "_neg",
-      *[](torch::Tensor& input) {
-        return torch::neg(input);
+      "leaky_relu",
+      *[](torch::Tensor& input, Scalar negative_slope) {
+        return torch::leaky_relu(input, negative_slope);
       })
     .define_singleton_method(
-      "relu",
-      *[](torch::Tensor& input) {
-        return torch::relu(input);
+      "conv2d",
+      *[](Tensor& input, Tensor& weight, Tensor& bias, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, int64_t groups) {
+        return torch::conv2d(input, weight, bias, stride, padding, dilation, groups);
       })
+    // linear layers
     .define_singleton_method(
-      "conv2d",
-      *[](torch::Tensor& input, torch::Tensor& weight, torch::Tensor& bias) {
-        return torch::conv2d(input, weight, bias);
+      "bilinear",
+      *[](const Tensor &input1, const Tensor &input2, const Tensor &weight, const Tensor &bias) {
+        return torch::bilinear(input1, input2, weight, bias);
       })
     .define_singleton_method(
       "linear",
-      *[](torch::Tensor& input, torch::Tensor& weight, torch::Tensor& bias) {
+      *[](Tensor& input, Tensor& weight, Tensor& bias) {
         return torch::linear(input, weight, bias);
       })
+    // pooling layers
     .define_singleton_method(
       "max_pool2d",
-      *[](torch::Tensor& input, IntArrayRef kernel_size) {
+      *[](Tensor& input, IntArrayRef kernel_size) {
         return torch::max_pool2d(input, kernel_size);
       })
     .define_singleton_method(
-      "mse_loss",
-      *[](torch::Tensor& input, torch::Tensor& target, std::string reduction) {
-        auto red = reduction == "mean" ? Reduction::Mean : Reduction::Sum;
-        return torch::mse_loss(input, target, red);
+      "avg_pool2d",
+      *[](Tensor& input, IntArrayRef kernel_size) {
+        return torch::avg_pool2d(input, kernel_size);
+      })
+    .define_singleton_method(
+      "_binary_cross_entropy_with_logits",
+      *[](const Tensor &input, const Tensor &target, OptionalTensor weight, OptionalTensor pos_weight, MyReduction reduction) {
+        return torch::binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction);
       })
+    .define_singleton_method("numel", &torch::numel)
     .define_singleton_method(
-      "nll_loss",
-      *[](torch::Tensor& input, torch::Tensor& target) {
-        return torch::nll_loss(input, target);
+      "_from_blob",
+      *[](String s, IntArrayRef size, const torch::TensorOptions &options) {
+        void *data = const_cast<char *>(s.c_str());
+        return torch::from_blob(data, size, options);
       })
     .define_singleton_method(
       "_tensor",
       *[](Object o, IntArrayRef size, const torch::TensorOptions &options) {
         Array a = Array(o);
-        std::vector<float> vec;
-        for (size_t i = 0; i < a.size(); i++) {
-          vec.push_back(from_ruby<float>(a[i]));
+        auto dtype = options.dtype();
+        torch::Tensor t;
+        if (dtype == torch::kBool) {
+          throw std::runtime_error("Cannot create bool from tensor method yet");
+        } else {
+          std::vector<float> vec;
+          for (size_t i = 0; i < a.size(); i++) {
+            vec.push_back(from_ruby<float>(a[i]));
+          }
+          t = torch::tensor(vec, options);
         }
-        return torch::tensor(vec, options).reshape(size);
+        return t.reshape(size);
       });
-  Class rb_cTensor = define_class_under<torch::Tensor>(rb_mTorch, "Tensor")
+  rb_cTensor
     .define_method("cuda?", &torch::Tensor::is_cuda)
     .define_method("distributed?", &torch::Tensor::is_distributed)
     .define_method("complex?", &torch::Tensor::is_complex)
@@ -352,108 +233,162 @@ void Init_ext()
     .define_method("sparse?", &torch::Tensor::is_sparse)
     .define_method("quantized?", &torch::Tensor::is_quantized)
     .define_method("dim", &torch::Tensor::dim)
-    .define_method("numel", &torch::Tensor::numel)
     .define_method("element_size", &torch::Tensor::element_size)
     .define_method("requires_grad", &torch::Tensor::requires_grad)
+    .define_method("view_as", &torch::Tensor::view_as)
+    .define_method(
+      "addcmul!",
+      *[](Tensor& self, Scalar value, const Tensor & tensor1, const Tensor & tensor2) {
+        return self.addcmul_(tensor1, tensor2, value);
+      })
+    .define_method(
+      "addcdiv!",
+      *[](Tensor& self, Scalar value, const Tensor & tensor1, const Tensor & tensor2) {
+        return self.addcdiv_(tensor1, tensor2, value);
+      })
     .define_method(
       "zero!",
-      *[](torch::Tensor& self) {
+      *[](Tensor& self) {
         return self.zero_();
       })
     .define_method(
-      "detach!",
-      *[](torch::Tensor& self) {
-        return self.detach_();
+      "detach",
+      *[](Tensor& self) {
+        return self.detach();
       })
     .define_method(
-      "_access",
-      *[](torch::Tensor& self, int64_t index) {
-        return self[index];
+      "detach!",
+      *[](Tensor& self) {
+        return self.detach_();
       })
     .define_method(
       "_requires_grad!",
-      *[](torch::Tensor& self, bool requires_grad) {
+      *[](Tensor& self, bool requires_grad) {
         return self.set_requires_grad(requires_grad);
       })
     .define_method(
-      "backward",
-      *[](torch::Tensor& self) {
-        return self.backward();
+      "_backward",
+      *[](Tensor& self, Object gradient) {
+        return gradient.is_nil() ? self.backward() : self.backward(from_ruby<torch::Tensor>(gradient));
       })
     .define_method(
       "grad",
-      *[](torch::Tensor& self) {
+      *[](Tensor& self) {
         return self.grad();
       })
     .define_method(
       "_dtype",
-      *[](torch::Tensor& self) {
+      *[](Tensor& self) {
         return (int) at::typeMetaToScalarType(self.dtype());
       })
+    .define_method(
+      "_type",
+      *[](Tensor& self, int dtype) {
+        return self.toType((torch::ScalarType) dtype);
+      })
     .define_method(
       "_layout",
-      *[](torch::Tensor& self) {
+      *[](Tensor& self) {
         std::stringstream s;
         s << self.layout();
         return s.str();
       })
     .define_method(
       "device",
-      *[](torch::Tensor& self) {
+      *[](Tensor& self) {
         std::stringstream s;
         s << self.device();
         return s.str();
       })
     .define_method(
-      "_view",
-      *[](torch::Tensor& self, IntArrayRef size) {
-        return self.view(size);
+      "resize_as!",
+      *[](Tensor& self, Tensor& other) {
+        return self.resize_as_(other);
       })
     .define_method(
-      "add!",
-      *[](torch::Tensor& self, torch::Tensor& other) {
-        self.add_(other);
+      "fill!",
+      *[](Tensor& self, Scalar value) {
+        return self.fill_(value);
       })
     .define_method(
-      "sub!",
-      *[](torch::Tensor& self, torch::Tensor& other) {
-        self.sub_(other);
+      "relu!",
+      *[](Tensor& self) {
+        return self.relu_();
+      })
+    .define_method(
+      "normal!",
+      *[](Tensor& self, double mean, double std) {
+        return self.normal_(mean, std);
+      })
+    .define_method(
+      "random!",
+      *[](Tensor& self, int64_t to) {
+        return self.random_(to);
       })
     .define_method(
-      "mul!",
-      *[](torch::Tensor& self, torch::Tensor& other) {
-        self.mul_(other);
+      "sub!",
+      *[](Tensor& self, Tensor& other) {
+        return self.sub_(other);
       })
     .define_method(
       "div!",
-      *[](torch::Tensor& self, torch::Tensor& other) {
-        self.div_(other);
+      *[](Tensor& self, Tensor& other) {
+        return self.div_(other);
       })
     .define_method(
-      "log_softmax",
-      *[](torch::Tensor& self, int64_t dim) {
-        return self.log_softmax(dim);
+      "sqrt!",
+      *[](Tensor& self) {
+        return self.sqrt_();
       })
     .define_method(
-      "_data",
-      *[](torch::Tensor& self) {
+      "unsqueeze!",
+      *[](Tensor& self, int64_t dim) {
+        return self.unsqueeze_(dim);
+      })
+    .define_method(
+      "copy!",
+      *[](Tensor& self, Tensor& src) {
+        return self.copy_(src);
+      })
+    .define_method(
+      "clone",
+      *[](Tensor& self) {
+        return self.clone();
+      })
+    .define_method(
+      "data",
+      *[](Tensor& self) {
+        return self.data();
+      })
+    .define_method(
+      "_flat_data",
+      *[](Tensor& self) {
         Array a;
         auto dtype = self.dtype();
         // TODO DRY if someone knows C++
-        // TODO kByte (uint8), kChar (int8), kBool (bool)
-        if (dtype == torch::kShort) {
-          short* data = self.data_ptr<short>();
+        if (dtype == torch::kByte) {
+          uint8_t* data = self.data_ptr<uint8_t>();
+          for (int i = 0; i < self.numel(); i++) {
+            a.push(data[i]);
+          }
+        } else if (dtype == torch::kChar) {
+          int8_t* data = self.data_ptr<int8_t>();
+          for (int i = 0; i < self.numel(); i++) {
+            a.push(to_ruby<int>(data[i]));
+          }
+        } else if (dtype == torch::kShort) {
+          int16_t* data = self.data_ptr<int16_t>();
           for (int i = 0; i < self.numel(); i++) {
             a.push(data[i]);
           }
         } else if (dtype == torch::kInt) {
-          int* data = self.data_ptr<int>();
+          int32_t* data = self.data_ptr<int32_t>();
           for (int i = 0; i < self.numel(); i++) {
             a.push(data[i]);
           }
         } else if (dtype == torch::kLong) {
-          long long* data = self.data_ptr<long long>();
+          int64_t* data = self.data_ptr<int64_t>();
           for (int i = 0; i < self.numel(); i++) {
             a.push(data[i]);
           }
@@ -467,19 +402,24 @@ void Init_ext()
           for (int i = 0; i < self.numel(); i++) {
             a.push(data[i]);
           }
+        } else if (dtype == torch::kBool) {
+          bool* data = self.data_ptr<bool>();
+          for (int i = 0; i < self.numel(); i++) {
+            a.push(data[i] ? True : False);
+          }
         } else {
-          throw "Unsupported type";
+          throw std::runtime_error("Unsupported type");
         }
         return a;
       })
     .define_method(
-      "_size",
-      *[](torch::Tensor& self, int i) {
-        return self.size(i);
+      "_to",
+      *[](Tensor& self, torch::Device device, int dtype, bool non_blocking, bool copy) {
+        return self.to(device, (torch::ScalarType) dtype, non_blocking, copy);
       })
     .define_singleton_method(
       "_make_subclass",
-      *[](torch::Tensor& rd, bool requires_grad) {
+      *[](Tensor& rd, bool requires_grad) {
         auto data = torch::autograd::as_variable_ref(rd).detach();
         data.unsafeGetTensorImpl()->set_allow_tensor_metadata_change(true);
         auto var = data.set_requires_grad(requires_grad);
@@ -499,8 +439,11 @@ void Init_ext()
         torch::Layout l;
         if (layout == "strided") {
           l = torch::kStrided;
+        } else if (layout == "sparse") {
+          l = torch::kSparse;
+          throw std::runtime_error("Sparse layout not supported yet");
         } else {
-          throw "Unsupported layout";
+          throw std::runtime_error("Unsupported layout: " + layout);
         }
         return self.layout(l);
       })
@@ -513,7 +456,7 @@ void Init_ext()
         } else if (device == "cuda") {
           d = torch::kCUDA;
         } else {
-          throw "Unsupported device";
+          throw std::runtime_error("Unsupported device: " + device);
         }
         return self.device(d);
       })
@@ -523,24 +466,99 @@ void Init_ext()
         return self.requires_grad(requires_grad);
       });
-  Module rb_mNN = define_module_under(rb_mTorch, "NN");
   Module rb_mInit = define_module_under(rb_mNN, "Init")
     .define_singleton_method(
-      "kaiming_uniform_",
-      *[](torch::Tensor& input, double a) {
-        return torch::nn::init::kaiming_uniform_(input, a);
+      "_calculate_gain",
+      *[](NonlinearityType nonlinearity, double param) {
+        return torch::nn::init::calculate_gain(nonlinearity, param);
       })
     .define_singleton_method(
-      "uniform_",
-      *[](torch::Tensor& input, double to, double from) {
-        return torch::nn::init::uniform_(input, to, from);
+      "_uniform!",
+      *[](Tensor tensor, double low, double high) {
+        return torch::nn::init::uniform_(tensor, low, high);
+      })
+    .define_singleton_method(
+      "_normal!",
+      *[](Tensor tensor, double mean, double std) {
+        return torch::nn::init::normal_(tensor, mean, std);
+      })
+    .define_singleton_method(
+      "_constant!",
+      *[](Tensor tensor, Scalar value) {
+        return torch::nn::init::constant_(tensor, value);
+      })
+    .define_singleton_method(
+      "_ones!",
+      *[](Tensor tensor) {
+        return torch::nn::init::ones_(tensor);
+      })
+    .define_singleton_method(
+      "_zeros!",
+      *[](Tensor tensor) {
+        return torch::nn::init::zeros_(tensor);
+      })
+    .define_singleton_method(
+      "_eye!",
+      *[](Tensor tensor) {
+        return torch::nn::init::eye_(tensor);
+      })
+    .define_singleton_method(
+      "_dirac!",
+      *[](Tensor tensor) {
+        return torch::nn::init::dirac_(tensor);
+      })
+    .define_singleton_method(
+      "_xavier_uniform!",
+      *[](Tensor tensor, double gain) {
+        return torch::nn::init::xavier_uniform_(tensor, gain);
+      })
+    .define_singleton_method(
+      "_xavier_normal!",
+      *[](Tensor tensor, double gain) {
+        return torch::nn::init::xavier_normal_(tensor, gain);
+      })
+    .define_singleton_method(
+      "_kaiming_uniform!",
+      *[](Tensor tensor, double a, FanModeType mode, NonlinearityType nonlinearity) {
+        return torch::nn::init::kaiming_uniform_(tensor, a, mode, nonlinearity);
+      })
+    .define_singleton_method(
+      "_kaiming_normal!",
+      *[](Tensor tensor, double a, FanModeType mode, NonlinearityType nonlinearity) {
+        return torch::nn::init::kaiming_normal_(tensor, a, mode, nonlinearity);
+      })
+    .define_singleton_method(
+      "_orthogonal!",
+      *[](Tensor tensor, double gain) {
+        return torch::nn::init::orthogonal_(tensor, gain);
+      })
+    .define_singleton_method(
+      "_sparse!",
+      *[](Tensor tensor, double sparsity, double std) {
+        return torch::nn::init::sparse_(tensor, sparsity, std);
       });
   Class rb_cParameter = define_class_under<torch::autograd::Variable, torch::Tensor>(rb_mNN, "Parameter")
     .define_method(
       "grad",
       *[](torch::autograd::Variable& self) {
-        return self.grad();
+        auto grad = self.grad();
+        return grad.defined() ? to_ruby<torch::Tensor>(grad) : Nil;
+      });
+  Class rb_cDevice = define_class_under<torch::Device>(rb_mTorch, "Device")
+    .define_constructor(Constructor<torch::Device, std::string>())
+    .define_method("index", &torch::Device::index)
+    .define_method("index?", &torch::Device::has_index)
+    .define_method(
+      "type",
+      *[](torch::Device& self) {
+        std::stringstream s;
+        s << self.type();
+        return s.str();
       });
+  Module rb_mCUDA = define_module_under(rb_mTorch, "CUDA")
+    .define_singleton_method("available?", &torch::cuda::is_available)
+    .define_singleton_method("device_count", &torch::cuda::device_count);
 }