RubyGems - torch-rb - Versions diffs - 0.1.0 → 0.1.5 - Mend

torch-rb 0.1.0 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +40 -0
data/LICENSE.txt +46 -22
data/README.md +85 -19
data/ext/torch/ext.cpp +274 -256
data/ext/torch/extconf.rb +9 -0
data/ext/torch/nn_functions.cpp +595 -0
data/ext/torch/nn_functions.hpp +6 -0
data/ext/torch/templates.hpp +250 -0
data/ext/torch/tensor_functions.cpp +1860 -0
data/ext/torch/tensor_functions.hpp +6 -0
data/ext/torch/torch_functions.cpp +2875 -0
data/ext/torch/torch_functions.hpp +6 -0
data/lib/torch.rb +199 -84
data/lib/torch/ext.bundle +0 -0
data/lib/torch/inspector.rb +52 -25
data/lib/torch/native/dispatcher.rb +48 -0
data/lib/torch/native/function.rb +78 -0
data/lib/torch/native/generator.rb +149 -0
data/lib/torch/native/native_functions.yaml +6837 -0
data/lib/torch/native/parser.rb +97 -0
data/lib/torch/nn/alpha_dropout.rb +9 -0
data/lib/torch/nn/avg_pool2d.rb +14 -0
data/lib/torch/nn/avg_poolnd.rb +9 -0
data/lib/torch/nn/bce_loss.rb +13 -0
data/lib/torch/nn/bce_with_logits_loss.rb +15 -0
data/lib/torch/nn/bilinear.rb +38 -0
data/lib/torch/nn/conv2d.rb +14 -29
data/lib/torch/nn/convnd.rb +41 -0
data/lib/torch/nn/cosine_embedding_loss.rb +14 -0
data/lib/torch/nn/cosine_similarity.rb +15 -0
data/lib/torch/nn/cross_entropy_loss.rb +14 -0
data/lib/torch/nn/ctc_loss.rb +15 -0
data/lib/torch/nn/dropout.rb +9 -0
data/lib/torch/nn/dropout2d.rb +9 -0
data/lib/torch/nn/dropout3d.rb +9 -0
data/lib/torch/nn/dropoutnd.rb +15 -0
data/lib/torch/nn/embedding.rb +52 -0
data/lib/torch/nn/embedding_bag.rb +34 -0
data/lib/torch/nn/feature_alpha_dropout.rb +9 -0
data/lib/torch/nn/functional.rb +194 -11
data/lib/torch/nn/hinge_embedding_loss.rb +14 -0
data/lib/torch/nn/identity.rb +14 -0
data/lib/torch/nn/init.rb +58 -1
data/lib/torch/nn/kl_div_loss.rb +13 -0
data/lib/torch/nn/l1_loss.rb +13 -0
data/lib/torch/nn/leaky_relu.rb +20 -0
data/lib/torch/nn/linear.rb +12 -11
data/lib/torch/nn/log_softmax.rb +14 -0
data/lib/torch/nn/loss.rb +10 -0
data/lib/torch/nn/margin_ranking_loss.rb +14 -0
data/lib/torch/nn/max_pool2d.rb +9 -0
data/lib/torch/nn/max_poolnd.rb +19 -0
data/lib/torch/nn/module.rb +184 -19
data/lib/torch/nn/mse_loss.rb +2 -2
data/lib/torch/nn/multi_label_margin_loss.rb +13 -0
data/lib/torch/nn/multi_label_soft_margin_loss.rb +13 -0
data/lib/torch/nn/multi_margin_loss.rb +17 -0
data/lib/torch/nn/nll_loss.rb +14 -0
data/lib/torch/nn/pairwise_distance.rb +16 -0
data/lib/torch/nn/parameter.rb +4 -0
data/lib/torch/nn/poisson_nll_loss.rb +16 -0
data/lib/torch/nn/prelu.rb +19 -0
data/lib/torch/nn/relu.rb +8 -3
data/lib/torch/nn/rnn.rb +22 -0
data/lib/torch/nn/rnn_base.rb +154 -0
data/lib/torch/nn/sequential.rb +1 -10
data/lib/torch/nn/sigmoid.rb +9 -0
data/lib/torch/nn/smooth_l1_loss.rb +13 -0
data/lib/torch/nn/soft_margin_loss.rb +13 -0
data/lib/torch/nn/softmax.rb +18 -0
data/lib/torch/nn/softmax2d.rb +10 -0
data/lib/torch/nn/softmin.rb +14 -0
data/lib/torch/nn/softplus.rb +19 -0
data/lib/torch/nn/triplet_margin_loss.rb +18 -0
data/lib/torch/nn/weighted_loss.rb +10 -0
data/lib/torch/optim/adadelta.rb +57 -0
data/lib/torch/optim/adagrad.rb +71 -0
data/lib/torch/optim/adam.rb +81 -0
data/lib/torch/optim/adamax.rb +68 -0
data/lib/torch/optim/adamw.rb +82 -0
data/lib/torch/optim/asgd.rb +65 -0
data/lib/torch/optim/lr_scheduler/lr_scheduler.rb +33 -0
data/lib/torch/optim/lr_scheduler/step_lr.rb +17 -0
data/lib/torch/optim/optimizer.rb +62 -0
data/lib/torch/optim/rmsprop.rb +76 -0
data/lib/torch/optim/rprop.rb +68 -0
data/lib/torch/optim/sgd.rb +60 -0
data/lib/torch/random.rb +10 -0
data/lib/torch/tensor.rb +92 -21
data/lib/torch/utils/data/data_loader.rb +15 -0
data/lib/torch/utils/data/tensor_dataset.rb +8 -1
data/lib/torch/version.rb +1 -1
metadata +74 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 38e16e7f07d004fd9625f168694356d551c79cbc62b0131fe1403e4c0995f296
-  data.tar.gz: 66bf6ae0e4dd373a7542fbfb1cfb9dbd89fc455e16166e6a76d0945b32fecf38
+  metadata.gz: 6b47306ed525e1a20d25cb8324d4658f750c18afa5704c9b7bafc215d8f568c1
+  data.tar.gz: dad6ddf955b111989b061e5af146006a32c83dc1ea1ca5005a6b6e34bc9a4892
 SHA512:
-  metadata.gz: d100e3a21ac877fe93ac61e9b5e0d8a5e61126684fc037dda3e9f703b040188b1e1523aa4111dff4aaf92ada1001597c5f60674b9583b14d31afd18dbf1ff18d
-  data.tar.gz: c234dee79e26d3ee25ade2aaddd75f155dea6d59d8b9c5af2c571423a7aaa8a6489f5cfce89f09f390468a951b1644a4212c19525a79816be09214f0938860a8
+  metadata.gz: 5d26e3642bf7cd921b9b570052df353d4c32b1bd955a6fbbf5f30249631fa4c0d4624f4fa91a1c06f61b3b0d6461cd117ab4df185cf013e915d2f63e52dbcf7c
+  data.tar.gz: 1728ce9b579f41f7a567e63d7256c82bb352840b67f16d88aac930a99e5abbf5a5f4061c5f9da16fb47d1664567e7956d276a8b2b44f13d2263032486afb53e8

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,43 @@
+## 0.1.5 (2019-12-06)
+- Added many more functions
+- Added tensor classes - `FloatTensor`, `LongTensor`, etc
+- Improved modules
+## 0.1.4 (2019-12-01)
+- Added distance functions
+- Added more activations
+- Added more linear layers
+- Added more loss functions
+- Added more init methods
+- Added support for tensor assignment
+## 0.1.3 (2019-11-30)
+- Changed to BSD 3-Clause license to match PyTorch
+- Added many optimizers
+- Added `StepLR` learning rate scheduler
+- Added dropout
+- Added embedding
+- Added support for `bool` type
+- Improved performance of `from_numo`
+## 0.1.2 (2019-11-27)
+- Added SGD optimizer
+- Added support for gradient to `backward` method
+- Added `argmax`, `eq`, `leaky_relu`, `prelu`, and `reshape` methods
+- Improved indexing
+- Fixed `zero_grad`
+- Fixed error with infinite values
+## 0.1.1 (2019-11-26)
+- Added support for `uint8` and `int8` types
+- Fixed `undefined symbol` error on Linux
+- Fixed C++ error messages
 ## 0.1.0 (2019-11-26)
 - First release

data/LICENSE.txt CHANGED Viewed

@@ -1,22 +1,46 @@
-Copyright (c) 2019 Andrew Kane
-MIT License
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+BSD 3-Clause License
+From Torch-rb:
+Copyright (c) 2019-     Andrew Kane
+From PyTorch (for ported code):
+Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
+Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
+Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
+Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
+Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
+Copyright (c) 2011-2013 NYU                      (Clement Farabet)
+Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
+Copyright (c) 2006      Idiap Research Institute (Samy Bengio)
+Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
+   and IDIAP Research Institute nor the names of its contributors may be
+   used to endorse or promote products derived from this software without
+   specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.

data/README.md CHANGED Viewed

@@ -2,14 +2,16 @@
 :fire: Deep learning for Ruby, powered by [LibTorch](https://pytorch.org)
-**Note:** This gem is currently experimental. There may be breaking changes between each release.
+This gem is currently experimental. There may be breaking changes between each release. Please report any issues you experience.
+[![Build Status](https://travis-ci.org/ankane/torch-rb.svg?branch=master)](https://travis-ci.org/ankane/torch-rb)
 ## Installation
 First, [install LibTorch](#libtorch-installation). For Homebrew, use:
 ```sh
-brew install ankane/brew/libtorch
+brew install libtorch
 ```
 Add this line to your application’s Gemfile:
@@ -18,6 +20,8 @@ Add this line to your application’s Gemfile:
 gem 'torch-rb'
 ```
+It can take a few minutes to compile the extension.
 ## Getting Started
 This library follows the [PyTorch API](https://pytorch.org/docs/stable/torch.html). There are a few changes to make it more Ruby-like:
@@ -26,9 +30,11 @@ This library follows the [PyTorch API](https://pytorch.org/docs/stable/torch.htm
 - Methods that return booleans use `?` instead of `is_`  (`tensor?` instead of `is_tensor`)
 - Numo is used instead of NumPy (`x.numo` instead of `x.numpy()`)
-Many methods and options are missing at the moment. PRs welcome!
+Some methods and options are missing at the moment. PRs welcome!
-Some examples below are from [Deep Learning with PyTorch: A 60 Minutes Blitz](https://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html).
+## Tutorial
+Some examples below are from [Deep Learning with PyTorch: A 60 Minutes Blitz](https://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html)
 ### Tensors
@@ -143,7 +149,7 @@ Convert a Numo array to a tensor
 ```ruby
 b = Numo::NArray.cast([1, 2, 3])
-Torch.from_numpy(b)
+Torch.from_numo(b)
 ```
 ### Autograd
@@ -171,17 +177,17 @@ out.backward
 Get gradients
 ```ruby
-x.grad
+x.grad # tensor([[4.5, 4.5], [4.5, 4.5]])
 ```
 Stop autograd from tracking history
 ```ruby
 x.requires_grad # true
-(x ** 2).requires_grad # true
+(x**2).requires_grad # true
 Torch.no_grad do
-  (x ** 2).requires_grad # false
+  (x**2).requires_grad # false
 end
 ```
@@ -221,7 +227,7 @@ class Net < Torch::NN::Module
 end
 ```
-And run
+Create an instance of it
 ```ruby
 net = Net.new
@@ -229,6 +235,58 @@ input = Torch.randn(1, 1, 32, 32)
 net.call(input)
 ```
+Get trainable parameters
+```ruby
+net.parameters
+```
+Zero the gradient buffers and backprop with random gradients
+```ruby
+net.zero_grad
+out.backward(Torch.randn(1, 10))
+```
+Define a loss function
+```ruby
+output = net.call(input)
+target = Torch.randn(10)
+target = target.view(1, -1)
+criterion = Torch::NN::MSELoss.new
+loss = criterion.call(output, target)
+```
+Backprop
+```ruby
+net.zero_grad
+p net.conv1.bias.grad
+loss.backward
+p net.conv1.bias.grad
+```
+Update the weights
+```ruby
+learning_rate = 0.01
+net.parameters.each do |f|
+  f.data.sub!(f.grad.data * learning_rate)
+end
+```
+Use an optimizer
+```ruby
+optimizer = Torch::Optim::SGD.new(net.parameters, lr: 0.01)
+optimizer.zero_grad
+output = net.call(input)
+loss = criterion.call(output, target)
+loss.backward
+optimizer.step
+```
 ### Tensor Creation
 Here’s a list of functions to create tensors (descriptions from the [C++ docs](https://pytorch.org/cppdocs/notes/tensor_creation.html)):
@@ -242,7 +300,7 @@ Here’s a list of functions to create tensors (descriptions from the [C++ docs]
 - `empty` returns a tensor with uninitialized values
   ```ruby
-  Torch.empty(3)
+  Torch.empty(3) # tensor([7.0054e-45, 0.0000e+00, 0.0000e+00])
   ```
 - `eye` returns an identity matrix
@@ -278,19 +336,19 @@ Here’s a list of functions to create tensors (descriptions from the [C++ docs]
 - `rand` returns a tensor filled with values drawn from a uniform distribution on [0, 1)
   ```ruby
-  Torch.rand(3)
+  Torch.rand(3) # tensor([0.5444, 0.8799, 0.5571])
   ```
 - `randint` returns a tensor with integers randomly drawn from an interval
   ```ruby
-  Torch.randint(1, 10, [3])
+  Torch.randint(1, 10, [3]) # tensor([7, 6, 4])
   ```
 - `randn` returns a tensor filled with values drawn from a unit normal distribution
   ```ruby
-  Torch.randn(3)
+  Torch.randn(3) # tensor([-0.7147,  0.6614,  1.1453])
   ```
 - `randperm` returns a tensor filled with a random permutation of integers in some interval
@@ -305,12 +363,20 @@ Here’s a list of functions to create tensors (descriptions from the [C++ docs]
   Torch.zeros(3) # tensor([0, 0, 0])
   ```
+## Examples
+Here are a few full examples:
+- [Image classification with MNIST](examples/mnist) ([日本語版](https://qiita.com/kojix2/items/c19c36dc1bf73ea93409))
+- [Collaborative filtering with MovieLens](examples/movielens)
+- [Word embeddings](examples/nlp)
 ## LibTorch Installation
-[Download LibTorch](https://pytorch.org/) and run:
+[Download LibTorch](https://pytorch.org/). For Linux, use the `cxx11 ABI` version. Then run:
 ```sh
-gem install torch-rb -- --with-torch-dir=/path/to/libtorch
+bundle config build.torch-rb --with-torch-dir=/path/to/libtorch
 ```
 ### Homebrew
@@ -318,10 +384,10 @@ gem install torch-rb -- --with-torch-dir=/path/to/libtorch
 For Mac, you can use Homebrew.
 ```sh
-brew install ankane/brew/libtorch
+brew install libtorch
 ```
-Then install the gem (no need for `--with-torch-dir`).
+Then install the gem (no need for `bundle config`).
 ## rbenv
@@ -349,9 +415,9 @@ To get started with development:
 ```sh
 git clone https://github.com/ankane/torch-rb.git
-cd torch
+cd torch-rb
 bundle install
-bundle exec rake compile
+bundle exec rake compile -- --with-torch-dir=/path/to/libtorch
 bundle exec rake test
 ```

data/ext/torch/ext.cpp CHANGED Viewed

@@ -6,95 +6,36 @@
 #include <rice/Class.hpp>
 #include <rice/Constructor.hpp>
-using namespace Rice;
+#include "templates.hpp"
-template<>
-inline
-long long from_ruby<long long>(Object x)
-{
-  return NUM2LL(x);
-}
-template<>
-inline
-Object to_ruby<long long>(long long const & x)
-{
-  return LL2NUM(x);
-}
-template<>
-inline
-unsigned long long from_ruby<unsigned long long>(Object x)
-{
-  return NUM2ULL(x);
-}
+// generated with:
+// rake generate:functions
+#include "torch_functions.hpp"
+#include "tensor_functions.hpp"
+#include "nn_functions.hpp"
-template<>
-inline
-Object to_ruby<unsigned long long>(unsigned long long const & x)
-{
-  return ULL2NUM(x);
-}
-template<>
-inline
-short from_ruby<short>(Object x)
-{
-  return NUM2SHORT(x);
-}
-template<>
-inline
-Object to_ruby<short>(short const & x)
-{
-  return INT2NUM(x);
-}
+using namespace Rice;
-template<>
-inline
-unsigned short from_ruby<unsigned short>(Object x)
-{
-  return NUM2USHORT(x);
+Object tensor_array(std::tuple<torch::Tensor, torch::Tensor> x) {
+  Array a;
+  a.push(to_ruby<torch::Tensor>(std::get<0>(x)));
+  a.push(to_ruby<torch::Tensor>(std::get<1>(x)));
+  return Object(a);
 }
-template<>
-inline
-Object to_ruby<unsigned short>(unsigned short const & x)
+extern "C"
+void Init_ext()
 {
-  return UINT2NUM(x);
-}
+  Module rb_mTorch = define_module("Torch");
+  add_torch_functions(rb_mTorch);
-// need to wrap torch::IntArrayRef() since
-// it doesn't own underlying data
-class IntArrayRef {
-  std::vector<int64_t> vec;
-  public:
-    IntArrayRef(Object o) {
-      Array a = Array(o);
-      for (size_t i = 0; i < a.size(); i++) {
-        vec.push_back(from_ruby<int64_t>(a[i]));
-      }
-    }
-    operator torch::IntArrayRef() {
-      return torch::IntArrayRef(vec);
-    }
-};
+  Class rb_cTensor = define_class_under<torch::Tensor>(rb_mTorch, "Tensor");
+  add_tensor_functions(rb_cTensor);
-template<>
-inline
-IntArrayRef from_ruby<IntArrayRef>(Object x)
-{
-  return IntArrayRef(x);
-}
-// for now
-typedef float Scalar;
+  Module rb_mNN = define_module_under(rb_mTorch, "NN");
+  add_nn_functions(rb_mNN);
-extern "C"
-void Init_ext()
-{
-  Module rb_mTorch = define_module("Torch")
-    .define_singleton_method(
+  rb_mTorch.define_singleton_method(
       "grad_enabled?",
       *[]() {
         return torch::GradMode::is_enabled();
@@ -104,11 +45,6 @@ void Init_ext()
       *[](bool enabled) {
         torch::GradMode::set_enabled(enabled);
       })
-    .define_singleton_method(
-      "floating_point?",
-      *[](torch::Tensor& input) {
-        return torch::is_floating_point(input);
-      })
     .define_singleton_method(
       "manual_seed",
       *[](uint64_t seed) {
@@ -178,172 +114,117 @@ void Init_ext()
     // begin operations
     .define_singleton_method(
       "_mean",
-      *[](torch::Tensor& input) {
+      *[](Tensor& input) {
         return torch::mean(input);
       })
     .define_singleton_method(
       "_mean_dim",
-      *[](torch::Tensor& input, int64_t dim, bool keepdim) {
+      *[](Tensor& input, int64_t dim, bool keepdim) {
         return torch::mean(input, dim, keepdim);
       })
     .define_singleton_method(
       "_sum",
-      *[](torch::Tensor& input) {
+      *[](Tensor& input) {
         return torch::sum(input);
       })
     .define_singleton_method(
       "_sum_dim",
-      *[](torch::Tensor& input, int64_t dim, bool keepdim) {
+      *[](Tensor& input, int64_t dim, bool keepdim) {
         return torch::sum(input, dim, keepdim);
       })
     .define_singleton_method(
-      "_norm",
-      *[](torch::Tensor& input) {
-        return torch::norm(input);
-      })
-    .define_singleton_method(
-      "_min",
-      *[](torch::Tensor& input) {
-        return torch::min(input);
+      "_max_out",
+      *[](Tensor &max, Tensor &max_indices, const Tensor &input, int64_t dim, bool keepdim) {
+        return tensor_array(torch::_max_out(max, max_indices, input, dim, keepdim));
       })
     .define_singleton_method(
-      "_max",
-      *[](torch::Tensor& input) {
-        return torch::max(input);
+      "_topk",
+      *[](Tensor& input, int64_t k) {
+        return tensor_array(torch::topk(input, k));
       })
     .define_singleton_method(
-      "_exp",
-      *[](torch::Tensor& input) {
-        return torch::exp(input);
+      "_softmax",
+      *[](const Tensor &input, int64_t dim) {
+        return torch::softmax(input, dim);
       })
     .define_singleton_method(
-      "_log",
-      *[](torch::Tensor& input) {
-        return torch::log(input);
+      "_log_softmax",
+      *[](Tensor& input, int64_t dim) {
+        return torch::log_softmax(input, dim);
       })
     .define_singleton_method(
-      "_unsqueeze",
-      *[](torch::Tensor& input, int64_t dim) {
-        return torch::unsqueeze(input, dim);
-      })
-    .define_singleton_method(
-      "_dot",
-      *[](torch::Tensor& input, torch::Tensor& tensor) {
-        return torch::dot(input, tensor);
-      })
-    .define_singleton_method(
-      "_matmul",
-      *[](torch::Tensor& input, torch::Tensor& other) {
-        return torch::matmul(input, other);
-      })
-    .define_singleton_method(
-      "_add",
-      *[](torch::Tensor& input, torch::Tensor& other) {
-        return torch::add(input, other);
-      })
-    .define_singleton_method(
-      "_add_scalar",
-      *[](torch::Tensor& input, float other) {
-        return torch::add(input, other);
-      })
-    .define_singleton_method(
-      "_add_out",
-      *[](torch::Tensor& out, torch::Tensor& input, torch::Tensor& other) {
-        return torch::add_out(out, input, other);
-      })
-    .define_singleton_method(
-      "_sub",
-      *[](torch::Tensor& input, torch::Tensor& other) {
-        return torch::sub(input, other);
-      })
-    .define_singleton_method(
-      "_sub_scalar",
-      *[](torch::Tensor& input, float other) {
-        return torch::sub(input, other);
-      })
-    .define_singleton_method(
-      "_mul",
-      *[](torch::Tensor& input, torch::Tensor& other) {
-        return torch::mul(input, other);
-      })
-    .define_singleton_method(
-      "_mul_scalar",
-      *[](torch::Tensor& input, float other) {
-        return torch::mul(input, other);
-      })
-    .define_singleton_method(
-      "_div",
-      *[](torch::Tensor& input, torch::Tensor& other) {
-        return torch::div(input, other);
-      })
-    .define_singleton_method(
-      "_div_scalar",
-      *[](torch::Tensor& input, float other) {
-        return torch::div(input, other);
-      })
-    .define_singleton_method(
-      "_remainder",
-      *[](torch::Tensor& input, torch::Tensor& other) {
-        return torch::remainder(input, other);
-      })
-    .define_singleton_method(
-      "_remainder_scalar",
-      *[](torch::Tensor& input, float other) {
-        return torch::remainder(input, other);
+      "relu",
+      *[](Tensor& input) {
+        return torch::relu(input);
       })
     .define_singleton_method(
-      "_pow",
-      *[](torch::Tensor& input, Scalar exponent) {
-        return torch::pow(input, exponent);
+      "prelu",
+      *[](torch::Tensor& input, torch::Tensor& weight) {
+        return torch::prelu(input, weight);
       })
     .define_singleton_method(
-      "_neg",
-      *[](torch::Tensor& input) {
-        return torch::neg(input);
+      "leaky_relu",
+      *[](torch::Tensor& input, Scalar negative_slope) {
+        return torch::leaky_relu(input, negative_slope);
       })
     .define_singleton_method(
-      "relu",
-      *[](torch::Tensor& input) {
-        return torch::relu(input);
+      "conv2d",
+      *[](Tensor& input, Tensor& weight, Tensor& bias, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, int64_t groups) {
+        return torch::conv2d(input, weight, bias, stride, padding, dilation, groups);
       })
+    // linear layers
     .define_singleton_method(
-      "conv2d",
-      *[](torch::Tensor& input, torch::Tensor& weight, torch::Tensor& bias) {
-        return torch::conv2d(input, weight, bias);
+      "bilinear",
+      *[](const Tensor &input1, const Tensor &input2, const Tensor &weight, const Tensor &bias) {
+        return torch::bilinear(input1, input2, weight, bias);
       })
     .define_singleton_method(
       "linear",
-      *[](torch::Tensor& input, torch::Tensor& weight, torch::Tensor& bias) {
+      *[](Tensor& input, Tensor& weight, Tensor& bias) {
         return torch::linear(input, weight, bias);
       })
+    // pooling layers
     .define_singleton_method(
       "max_pool2d",
-      *[](torch::Tensor& input, IntArrayRef kernel_size) {
+      *[](Tensor& input, IntArrayRef kernel_size) {
         return torch::max_pool2d(input, kernel_size);
       })
     .define_singleton_method(
-      "mse_loss",
-      *[](torch::Tensor& input, torch::Tensor& target, std::string reduction) {
-        auto red = reduction == "mean" ? Reduction::Mean : Reduction::Sum;
-        return torch::mse_loss(input, target, red);
+      "avg_pool2d",
+      *[](Tensor& input, IntArrayRef kernel_size) {
+        return torch::avg_pool2d(input, kernel_size);
+      })
+    .define_singleton_method(
+      "_binary_cross_entropy_with_logits",
+      *[](const Tensor &input, const Tensor &target, OptionalTensor weight, OptionalTensor pos_weight, MyReduction reduction) {
+        return torch::binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction);
       })
+    .define_singleton_method("numel", &torch::numel)
     .define_singleton_method(
-      "nll_loss",
-      *[](torch::Tensor& input, torch::Tensor& target) {
-        return torch::nll_loss(input, target);
+      "_from_blob",
+      *[](String s, IntArrayRef size, const torch::TensorOptions &options) {
+        void *data = const_cast<char *>(s.c_str());
+        return torch::from_blob(data, size, options);
       })
     .define_singleton_method(
       "_tensor",
       *[](Object o, IntArrayRef size, const torch::TensorOptions &options) {
         Array a = Array(o);
-        std::vector<float> vec;
-        for (size_t i = 0; i < a.size(); i++) {
-          vec.push_back(from_ruby<float>(a[i]));
+        auto dtype = options.dtype();
+        torch::Tensor t;
+        if (dtype == torch::kBool) {
+          throw std::runtime_error("Cannot create bool from tensor method yet");
+        } else {
+          std::vector<float> vec;
+          for (size_t i = 0; i < a.size(); i++) {
+            vec.push_back(from_ruby<float>(a[i]));
+          }
+          t = torch::tensor(vec, options);
         }
-        return torch::tensor(vec, options).reshape(size);
+        return t.reshape(size);
       });
-  Class rb_cTensor = define_class_under<torch::Tensor>(rb_mTorch, "Tensor")
+  rb_cTensor
     .define_method("cuda?", &torch::Tensor::is_cuda)
     .define_method("distributed?", &torch::Tensor::is_distributed)
     .define_method("complex?", &torch::Tensor::is_complex)
@@ -352,108 +233,162 @@ void Init_ext()
     .define_method("sparse?", &torch::Tensor::is_sparse)
     .define_method("quantized?", &torch::Tensor::is_quantized)
     .define_method("dim", &torch::Tensor::dim)
-    .define_method("numel", &torch::Tensor::numel)
     .define_method("element_size", &torch::Tensor::element_size)
     .define_method("requires_grad", &torch::Tensor::requires_grad)
+    .define_method("view_as", &torch::Tensor::view_as)
+    .define_method(
+      "addcmul!",
+      *[](Tensor& self, Scalar value, const Tensor & tensor1, const Tensor & tensor2) {
+        return self.addcmul_(tensor1, tensor2, value);
+      })
+    .define_method(
+      "addcdiv!",
+      *[](Tensor& self, Scalar value, const Tensor & tensor1, const Tensor & tensor2) {
+        return self.addcdiv_(tensor1, tensor2, value);
+      })
     .define_method(
       "zero!",
-      *[](torch::Tensor& self) {
+      *[](Tensor& self) {
         return self.zero_();
       })
     .define_method(
-      "detach!",
-      *[](torch::Tensor& self) {
-        return self.detach_();
+      "detach",
+      *[](Tensor& self) {
+        return self.detach();
       })
     .define_method(
-      "_access",
-      *[](torch::Tensor& self, int64_t index) {
-        return self[index];
+      "detach!",
+      *[](Tensor& self) {
+        return self.detach_();
       })
     .define_method(
       "_requires_grad!",
-      *[](torch::Tensor& self, bool requires_grad) {
+      *[](Tensor& self, bool requires_grad) {
         return self.set_requires_grad(requires_grad);
       })
     .define_method(
-      "backward",
-      *[](torch::Tensor& self) {
-        return self.backward();
+      "_backward",
+      *[](Tensor& self, Object gradient) {
+        return gradient.is_nil() ? self.backward() : self.backward(from_ruby<torch::Tensor>(gradient));
       })
     .define_method(
       "grad",
-      *[](torch::Tensor& self) {
+      *[](Tensor& self) {
         return self.grad();
       })
     .define_method(
       "_dtype",
-      *[](torch::Tensor& self) {
+      *[](Tensor& self) {
         return (int) at::typeMetaToScalarType(self.dtype());
       })
+    .define_method(
+      "_type",
+      *[](Tensor& self, int dtype) {
+        return self.toType((torch::ScalarType) dtype);
+      })
     .define_method(
       "_layout",
-      *[](torch::Tensor& self) {
+      *[](Tensor& self) {
         std::stringstream s;
         s << self.layout();
         return s.str();
       })
     .define_method(
       "device",
-      *[](torch::Tensor& self) {
+      *[](Tensor& self) {
         std::stringstream s;
         s << self.device();
         return s.str();
       })
     .define_method(
-      "_view",
-      *[](torch::Tensor& self, IntArrayRef size) {
-        return self.view(size);
+      "resize_as!",
+      *[](Tensor& self, Tensor& other) {
+        return self.resize_as_(other);
       })
     .define_method(
-      "add!",
-      *[](torch::Tensor& self, torch::Tensor& other) {
-        self.add_(other);
+      "fill!",
+      *[](Tensor& self, Scalar value) {
+        return self.fill_(value);
       })
     .define_method(
-      "sub!",
-      *[](torch::Tensor& self, torch::Tensor& other) {
-        self.sub_(other);
+      "relu!",
+      *[](Tensor& self) {
+        return self.relu_();
+      })
+    .define_method(
+      "normal!",
+      *[](Tensor& self, double mean, double std) {
+        return self.normal_(mean, std);
+      })
+    .define_method(
+      "random!",
+      *[](Tensor& self, int64_t to) {
+        return self.random_(to);
       })
     .define_method(
-      "mul!",
-      *[](torch::Tensor& self, torch::Tensor& other) {
-        self.mul_(other);
+      "sub!",
+      *[](Tensor& self, Tensor& other) {
+        return self.sub_(other);
       })
     .define_method(
       "div!",
-      *[](torch::Tensor& self, torch::Tensor& other) {
-        self.div_(other);
+      *[](Tensor& self, Tensor& other) {
+        return self.div_(other);
       })
     .define_method(
-      "log_softmax",
-      *[](torch::Tensor& self, int64_t dim) {
-        return self.log_softmax(dim);
+      "sqrt!",
+      *[](Tensor& self) {
+        return self.sqrt_();
       })
     .define_method(
-      "_data",
-      *[](torch::Tensor& self) {
+      "unsqueeze!",
+      *[](Tensor& self, int64_t dim) {
+        return self.unsqueeze_(dim);
+      })
+    .define_method(
+      "copy!",
+      *[](Tensor& self, Tensor& src) {
+        return self.copy_(src);
+      })
+    .define_method(
+      "clone",
+      *[](Tensor& self) {
+        return self.clone();
+      })
+    .define_method(
+      "data",
+      *[](Tensor& self) {
+        return self.data();
+      })
+    .define_method(
+      "_flat_data",
+      *[](Tensor& self) {
         Array a;
         auto dtype = self.dtype();
         // TODO DRY if someone knows C++
-        // TODO kByte (uint8), kChar (int8), kBool (bool)
-        if (dtype == torch::kShort) {
-          short* data = self.data_ptr<short>();
+        if (dtype == torch::kByte) {
+          uint8_t* data = self.data_ptr<uint8_t>();
+          for (int i = 0; i < self.numel(); i++) {
+            a.push(data[i]);
+          }
+        } else if (dtype == torch::kChar) {
+          int8_t* data = self.data_ptr<int8_t>();
+          for (int i = 0; i < self.numel(); i++) {
+            a.push(to_ruby<int>(data[i]));
+          }
+        } else if (dtype == torch::kShort) {
+          int16_t* data = self.data_ptr<int16_t>();
           for (int i = 0; i < self.numel(); i++) {
             a.push(data[i]);
           }
         } else if (dtype == torch::kInt) {
-          int* data = self.data_ptr<int>();
+          int32_t* data = self.data_ptr<int32_t>();
           for (int i = 0; i < self.numel(); i++) {
             a.push(data[i]);
           }
         } else if (dtype == torch::kLong) {
-          long long* data = self.data_ptr<long long>();
+          int64_t* data = self.data_ptr<int64_t>();
           for (int i = 0; i < self.numel(); i++) {
             a.push(data[i]);
           }
@@ -467,19 +402,24 @@ void Init_ext()
           for (int i = 0; i < self.numel(); i++) {
             a.push(data[i]);
           }
+        } else if (dtype == torch::kBool) {
+          bool* data = self.data_ptr<bool>();
+          for (int i = 0; i < self.numel(); i++) {
+            a.push(data[i] ? True : False);
+          }
         } else {
-          throw "Unsupported type";
+          throw std::runtime_error("Unsupported type");
         }
         return a;
       })
     .define_method(
-      "_size",
-      *[](torch::Tensor& self, int i) {
-        return self.size(i);
+      "_to",
+      *[](Tensor& self, torch::Device device, int dtype, bool non_blocking, bool copy) {
+        return self.to(device, (torch::ScalarType) dtype, non_blocking, copy);
       })
     .define_singleton_method(
       "_make_subclass",
-      *[](torch::Tensor& rd, bool requires_grad) {
+      *[](Tensor& rd, bool requires_grad) {
         auto data = torch::autograd::as_variable_ref(rd).detach();
         data.unsafeGetTensorImpl()->set_allow_tensor_metadata_change(true);
         auto var = data.set_requires_grad(requires_grad);
@@ -499,8 +439,11 @@ void Init_ext()
         torch::Layout l;
         if (layout == "strided") {
           l = torch::kStrided;
+        } else if (layout == "sparse") {
+          l = torch::kSparse;
+          throw std::runtime_error("Sparse layout not supported yet");
         } else {
-          throw "Unsupported layout";
+          throw std::runtime_error("Unsupported layout: " + layout);
         }
         return self.layout(l);
       })
@@ -513,7 +456,7 @@ void Init_ext()
         } else if (device == "cuda") {
           d = torch::kCUDA;
         } else {
-          throw "Unsupported device";
+          throw std::runtime_error("Unsupported device: " + device);
         }
         return self.device(d);
       })
@@ -523,24 +466,99 @@ void Init_ext()
         return self.requires_grad(requires_grad);
       });
-  Module rb_mNN = define_module_under(rb_mTorch, "NN");
   Module rb_mInit = define_module_under(rb_mNN, "Init")
     .define_singleton_method(
-      "kaiming_uniform_",
-      *[](torch::Tensor& input, double a) {
-        return torch::nn::init::kaiming_uniform_(input, a);
+      "_calculate_gain",
+      *[](NonlinearityType nonlinearity, double param) {
+        return torch::nn::init::calculate_gain(nonlinearity, param);
       })
     .define_singleton_method(
-      "uniform_",
-      *[](torch::Tensor& input, double to, double from) {
-        return torch::nn::init::uniform_(input, to, from);
+      "_uniform!",
+      *[](Tensor tensor, double low, double high) {
+        return torch::nn::init::uniform_(tensor, low, high);
+      })
+    .define_singleton_method(
+      "_normal!",
+      *[](Tensor tensor, double mean, double std) {
+        return torch::nn::init::normal_(tensor, mean, std);
+      })
+    .define_singleton_method(
+      "_constant!",
+      *[](Tensor tensor, Scalar value) {
+        return torch::nn::init::constant_(tensor, value);
+      })
+    .define_singleton_method(
+      "_ones!",
+      *[](Tensor tensor) {
+        return torch::nn::init::ones_(tensor);
+      })
+    .define_singleton_method(
+      "_zeros!",
+      *[](Tensor tensor) {
+        return torch::nn::init::zeros_(tensor);
+      })
+    .define_singleton_method(
+      "_eye!",
+      *[](Tensor tensor) {
+        return torch::nn::init::eye_(tensor);
+      })
+    .define_singleton_method(
+      "_dirac!",
+      *[](Tensor tensor) {
+        return torch::nn::init::dirac_(tensor);
+      })
+    .define_singleton_method(
+      "_xavier_uniform!",
+      *[](Tensor tensor, double gain) {
+        return torch::nn::init::xavier_uniform_(tensor, gain);
+      })
+    .define_singleton_method(
+      "_xavier_normal!",
+      *[](Tensor tensor, double gain) {
+        return torch::nn::init::xavier_normal_(tensor, gain);
+      })
+    .define_singleton_method(
+      "_kaiming_uniform!",
+      *[](Tensor tensor, double a, FanModeType mode, NonlinearityType nonlinearity) {
+        return torch::nn::init::kaiming_uniform_(tensor, a, mode, nonlinearity);
+      })
+    .define_singleton_method(
+      "_kaiming_normal!",
+      *[](Tensor tensor, double a, FanModeType mode, NonlinearityType nonlinearity) {
+        return torch::nn::init::kaiming_normal_(tensor, a, mode, nonlinearity);
+      })
+    .define_singleton_method(
+      "_orthogonal!",
+      *[](Tensor tensor, double gain) {
+        return torch::nn::init::orthogonal_(tensor, gain);
+      })
+    .define_singleton_method(
+      "_sparse!",
+      *[](Tensor tensor, double sparsity, double std) {
+        return torch::nn::init::sparse_(tensor, sparsity, std);
       });
   Class rb_cParameter = define_class_under<torch::autograd::Variable, torch::Tensor>(rb_mNN, "Parameter")
     .define_method(
       "grad",
       *[](torch::autograd::Variable& self) {
-        return self.grad();
+        auto grad = self.grad();
+        return grad.defined() ? to_ruby<torch::Tensor>(grad) : Nil;
+      });
+  Class rb_cDevice = define_class_under<torch::Device>(rb_mTorch, "Device")
+    .define_constructor(Constructor<torch::Device, std::string>())
+    .define_method("index", &torch::Device::index)
+    .define_method("index?", &torch::Device::has_index)
+    .define_method(
+      "type",
+      *[](torch::Device& self) {
+        std::stringstream s;
+        s << self.type();
+        return s.str();
       });
+  Module rb_mCUDA = define_module_under(rb_mTorch, "CUDA")
+    .define_singleton_method("available?", &torch::cuda::is_available)
+    .define_singleton_method("device_count", &torch::cuda::device_count);
 }