RubyGems - torch-rb - Versions diffs - 0.1.5 → 0.1.6 - Mend

torch-rb 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/README.md +1 -1
data/ext/torch/ext.cpp +0 -170
data/ext/torch/nn_functions.cpp +44 -24
data/ext/torch/templates.cpp +55 -0
data/ext/torch/templates.hpp +48 -0
data/ext/torch/tensor_functions.cpp +76 -16
data/ext/torch/torch_functions.cpp +165 -65
data/lib/torch.rb +51 -42
data/lib/torch/ext.bundle +0 -0
data/lib/torch/native/dispatcher.rb +1 -1
data/lib/torch/native/function.rb +36 -5
data/lib/torch/native/generator.rb +26 -7
data/lib/torch/native/parser.rb +51 -14
data/lib/torch/nn/avg_pool1d.rb +18 -0
data/lib/torch/nn/avg_pool2d.rb +7 -2
data/lib/torch/nn/avg_pool3d.rb +19 -0
data/lib/torch/nn/avg_poolnd.rb +1 -1
data/lib/torch/nn/batch_norm.rb +75 -0
data/lib/torch/nn/batch_norm1d.rb +11 -0
data/lib/torch/nn/batch_norm2d.rb +11 -0
data/lib/torch/nn/batch_norm3d.rb +11 -0
data/lib/torch/nn/constant_pad1d.rb +10 -0
data/lib/torch/nn/constant_pad2d.rb +10 -0
data/lib/torch/nn/constant_pad3d.rb +10 -0
data/lib/torch/nn/constant_padnd.rb +18 -0
data/lib/torch/nn/conv1d.rb +22 -0
data/lib/torch/nn/conv2d.rb +9 -17
data/lib/torch/nn/conv3d.rb +22 -0
data/lib/torch/nn/fold.rb +20 -0
data/lib/torch/nn/functional.rb +320 -100
data/lib/torch/nn/group_norm.rb +36 -0
data/lib/torch/nn/gru.rb +49 -0
data/lib/torch/nn/hardshrink.rb +18 -0
data/lib/torch/nn/instance_norm.rb +20 -0
data/lib/torch/nn/instance_norm1d.rb +18 -0
data/lib/torch/nn/instance_norm2d.rb +11 -0
data/lib/torch/nn/instance_norm3d.rb +11 -0
data/lib/torch/nn/layer_norm.rb +35 -0
data/lib/torch/nn/local_response_norm.rb +21 -0
data/lib/torch/nn/log_sigmoid.rb +9 -0
data/lib/torch/nn/lp_pool1d.rb +9 -0
data/lib/torch/nn/lp_pool2d.rb +9 -0
data/lib/torch/nn/lp_poolnd.rb +22 -0
data/lib/torch/nn/lstm.rb +66 -0
data/lib/torch/nn/max_pool1d.rb +9 -0
data/lib/torch/nn/max_pool2d.rb +1 -1
data/lib/torch/nn/max_pool3d.rb +9 -0
data/lib/torch/nn/max_poolnd.rb +6 -6
data/lib/torch/nn/max_unpool1d.rb +16 -0
data/lib/torch/nn/max_unpool2d.rb +16 -0
data/lib/torch/nn/max_unpool3d.rb +16 -0
data/lib/torch/nn/max_unpoolnd.rb +9 -0
data/lib/torch/nn/module.rb +7 -0
data/lib/torch/nn/reflection_pad1d.rb +10 -0
data/lib/torch/nn/reflection_pad2d.rb +10 -0
data/lib/torch/nn/reflection_padnd.rb +13 -0
data/lib/torch/nn/replication_pad1d.rb +10 -0
data/lib/torch/nn/replication_pad2d.rb +10 -0
data/lib/torch/nn/replication_pad3d.rb +10 -0
data/lib/torch/nn/replication_padnd.rb +13 -0
data/lib/torch/nn/rnn_base.rb +48 -4
data/lib/torch/nn/softshrink.rb +18 -0
data/lib/torch/nn/softsign.rb +9 -0
data/lib/torch/nn/tanh.rb +9 -0
data/lib/torch/nn/tanhshrink.rb +9 -0
data/lib/torch/nn/unfold.rb +19 -0
data/lib/torch/nn/utils.rb +25 -0
data/lib/torch/nn/zero_pad2d.rb +9 -0
data/lib/torch/tensor.rb +14 -25
data/lib/torch/version.rb +1 -1
metadata +50 -2

data/lib/torch/nn/avg_poolnd.rb CHANGED Viewed

@@ -2,7 +2,7 @@ module Torch
   module NN
     class AvgPoolNd < Module
       def extra_inspect
-        format("kernel_size: %s", @kernel_size)
+        format("kernel_size: %s, stride: %s, padding: %s", @kernel_size, @stride, @padding)
       end
     end
   end

data/lib/torch/nn/batch_norm.rb ADDED Viewed

@@ -0,0 +1,75 @@
+module Torch
+  module NN
+    class BatchNorm < Module
+      def initialize(num_features, eps: 1e-5, momentum: 0.1, affine: true, track_running_stats: true)
+        super()
+        @num_features = num_features
+        @eps = eps
+        @momentum = momentum
+        @affine = affine
+        @track_running_stats = track_running_stats
+        if @affine
+          @weight = Parameter.new(Torch::Tensor.new(num_features))
+          @bias = Parameter.new(Torch::Tensor.new(num_features))
+        else
+          register_parameter("weight", nil)
+          register_parameter("bias", nil)
+        end
+        if track_running_stats
+          register_buffer("running_mean", Torch.zeros(num_features))
+          register_buffer("running_var", Torch.ones(num_features))
+          register_buffer("num_batches_tracked", Torch.tensor(0, dtype: :long))
+        else
+          register_parameter("running_mean", nil)
+          register_parameter("running_var", nil)
+          register_parameter("num_batches_tracked", nil)
+        end
+        reset_parameters
+      end
+      def reset_running_stats
+        if @track_running_stats
+          @running_mean.zero!
+          @running_var.fill!(1)
+          @num_batches_tracked.zero!
+        end
+      end
+      def reset_parameters
+        reset_running_stats
+        if @affine
+          Init.ones!(@weight)
+          Init.zeros!(@bias)
+        end
+      end
+      def forward(input)
+        _check_input_dim(input)
+        if @momentum.nil?
+          exponential_average_factor = 0.0
+        else
+          exponential_average_factor = @momentum
+        end
+        if @training and @track_running_stats
+          if @num_batches_tracked.nil?
+            @num_batches_tracked += 1
+            if @momentum.nil?
+              exponential_average_factor = 1.0 / @num_batches_tracked.to_f
+            else
+              exponential_average_factor = @momentum
+            end
+          end
+        end
+        F.batch_norm(
+          input, @running_mean, @running_var,
+          weight: @weight, bias: @bias,
+          training: @training || !@track_running_stats,
+          momentum: exponential_average_factor, eps: @eps
+        )
+      end
+    end
+  end
+end

data/lib/torch/nn/batch_norm1d.rb ADDED Viewed

@@ -0,0 +1,11 @@
+module Torch
+  module NN
+    class BatchNorm1d < BatchNorm
+      def _check_input_dim(input)
+        if input.dim != 2 && input.dim != 3
+          raise ArgumentError, "expected 2D or 3D input (got #{input.dim}D input)"
+        end
+      end
+    end
+  end
+end

data/lib/torch/nn/batch_norm2d.rb ADDED Viewed

@@ -0,0 +1,11 @@
+module Torch
+  module NN
+    class BatchNorm2d < BatchNorm
+      def _check_input_dim(input)
+        if input.dim != 4
+          raise ArgumentError, "expected 4D input (got #{input.dim}D input)"
+        end
+      end
+    end
+  end
+end

data/lib/torch/nn/batch_norm3d.rb ADDED Viewed

@@ -0,0 +1,11 @@
+module Torch
+  module NN
+    class BatchNorm3d < BatchNorm
+      def _check_input_dim(input)
+        if input.dim != 5
+          raise ArgumentError, "expected 5D input (got #{input.dim}D input)"
+        end
+      end
+    end
+  end
+end

data/lib/torch/nn/constant_pad1d.rb ADDED Viewed

@@ -0,0 +1,10 @@
+module Torch
+  module NN
+    class ConstantPad1d < ConstantPadNd
+      def initialize(padding, value)
+        super(value)
+        @padding = _pair(padding)
+      end
+    end
+  end
+end

data/lib/torch/nn/constant_pad2d.rb ADDED Viewed

@@ -0,0 +1,10 @@
+module Torch
+  module NN
+    class ConstantPad2d < ConstantPadNd
+      def initialize(padding, value)
+        super(value)
+        @padding = _quadrupal(padding)
+      end
+    end
+  end
+end

data/lib/torch/nn/constant_pad3d.rb ADDED Viewed

@@ -0,0 +1,10 @@
+module Torch
+  module NN
+    class ConstantPad3d < ConstantPadNd
+      def initialize(padding, value)
+        super(value)
+        @padding = _ntuple(6, padding)
+      end
+    end
+  end
+end

data/lib/torch/nn/constant_padnd.rb ADDED Viewed

@@ -0,0 +1,18 @@
+module Torch
+  module NN
+    class ConstantPadNd < Module
+      def initialize(value)
+        super()
+        @value = value
+      end
+      def forward(input)
+        F.pad(input, @padding, mode: "constant", value: @value)
+      end
+      def extra_inspect
+        format("padding: %s, value: %s", @padding, @value)
+      end
+    end
+  end
+end

data/lib/torch/nn/conv1d.rb ADDED Viewed

@@ -0,0 +1,22 @@
+module Torch
+  module NN
+    class Conv1d < ConvNd
+      def initialize(in_channels, out_channels, kernel_size, stride: 1,
+        padding: 0, dilation: 1, groups: 1, bias: true, padding_mode: "zeros")
+        kernel_size = _single(kernel_size)
+        stride = _single(stride)
+        padding = _single(padding)
+        dilation = _single(dilation)
+        super(in_channels, out_channels, kernel_size, stride, padding, dilation, false, _single(0), groups, bias, padding_mode)
+      end
+      def forward(input)
+        if @padding_mode == "circular"
+          raise NotImplementedError
+        end
+        F.conv1d(input, @weight, @bias, @stride, @padding, @dilation, @groups)
+      end
+    end
+  end
+end

data/lib/torch/nn/conv2d.rb CHANGED Viewed

@@ -1,35 +1,27 @@
 module Torch
   module NN
     class Conv2d < ConvNd
-      def initialize(in_channels, out_channels, kernel_size, stride: 1, padding: 0, dilation: 1, groups: 1, bias: true, padding_mode: "zeros")
-        kernel_size = pair(kernel_size)
-        stride = pair(stride)
-        padding = pair(padding)
-        dilation = pair(dilation)
-        super(in_channels, out_channels, kernel_size, stride, padding, dilation, false, pair(0), groups, bias, padding_mode)
+      def initialize(in_channels, out_channels, kernel_size, stride: 1,
+        padding: 0, dilation: 1, groups: 1, bias: true, padding_mode: "zeros")
+        kernel_size = _pair(kernel_size)
+        stride = _pair(stride)
+        padding = _pair(padding)
+        dilation = _pair(dilation)
+        super(in_channels, out_channels, kernel_size, stride, padding, dilation, false, _pair(0), groups, bias, padding_mode)
       end
       def forward(input)
         if @padding_mode == "circular"
           raise NotImplementedError
         end
-        F.conv2d(input, @weight, @bias, stride: @stride, padding: @padding, dilation: @dilation, groups: @groups)
+        F.conv2d(input, @weight, @bias, @stride, @padding, @dilation, @groups)
       end
       # TODO add more parameters
       def extra_inspect
         format("%s, %s, kernel_size: %s, stride: %s", @in_channels, @out_channels, @kernel_size, @stride)
       end
-      private
-      def pair(value)
-        if value.is_a?(Array)
-          value
-        else
-          [value] * 2
-        end
-      end
     end
   end
 end

data/lib/torch/nn/conv3d.rb ADDED Viewed

@@ -0,0 +1,22 @@
+module Torch
+  module NN
+    class Conv3d < ConvNd
+      def initialize(in_channels, out_channels, kernel_size, stride: 1,
+        padding: 0, dilation: 1, groups: 1, bias: true, padding_mode: "zeros")
+        kernel_size = _triple(kernel_size)
+        stride = _triple(stride)
+        padding = _triple(padding)
+        dilation = _triple(dilation)
+        super(in_channels, out_channels, kernel_size, stride, padding, dilation, false, _triple(0), groups, bias, padding_mode)
+      end
+      def forward(input)
+        if @padding_mode == "circular"
+          raise NotImplementedError
+        end
+        F.conv3d(input, @weight, @bias, @stride, @padding, @dilation, @groups)
+      end
+    end
+  end
+end

data/lib/torch/nn/fold.rb ADDED Viewed

@@ -0,0 +1,20 @@
+module Torch
+  module NN
+    class Fold < Module
+      def initialize(output_size, kernel_size, dilation: 1, padding: 0, stride: 1)
+        super()
+        @output_size = output_size
+        @kernel_size = kernel_size
+        @dilation = dilation
+        @padding = padding
+        @stride = stride
+      end
+      def forward(input)
+        F.fold(input, @output_size, @kernel_size, dilation: @dilation, padding: @padding, stride: @stride)
+      end
+      # TODO add extra_inspect
+    end
+  end
+end

data/lib/torch/nn/functional.rb CHANGED Viewed

@@ -2,6 +2,166 @@ module Torch
   module NN
     class Functional
       class << self
+        include Utils
+        # convolution layers
+        def conv1d(*args, **options)
+          Torch.conv1d(*args, **options)
+        end
+        def conv2d(*args, **options)
+          Torch.conv2d(*args, **options)
+        end
+        def conv3d(*args, **options)
+          Torch.conv3d(*args, **options)
+        end
+        def unfold(input, kernel_size, dilation: 1, padding: 0, stride: 1)
+          if input.dim == 4
+            NN.im2col(input, _pair(kernel_size), _pair(dilation), _pair(padding), _pair(stride))
+          else
+            raise Error, "Input Error: Only 4D input Tensors are supported (got #{input.dim}D)"
+          end
+        end
+        def fold(input, output_size, kernel_size, dilation: 1, padding: 0, stride: 1)
+          if input.dim == 3
+            NN.col2im(input, _pair(output_size), _pair(kernel_size), _pair(dilation), _pair(padding), _pair(stride))
+          else
+            raise Error, "Input Error: Only 3D input Tensors are supported (got #{input.dim}D)"
+          end
+        end
+        # pooling layers
+        def max_pool1d(*args, **options)
+          return_indices = args.pop if args.size == 7
+          if return_indices
+            Torch.max_pool1d_with_indices(*args, **options)
+          else
+            Torch.max_pool1d(*args, **options)
+          end
+        end
+        def max_pool2d(*args, **options)
+          return_indices = args.pop if args.size == 7
+          if return_indices
+            NN.max_pool2d_with_indices(*args, **options)
+          else
+            Torch.max_pool2d(*args, **options)
+          end
+        end
+        def max_pool3d(*args, **options)
+          return_indices = args.pop if args.size == 7
+          if return_indices
+            NN.max_pool3d_with_indices(*args, **options)
+          else
+            Torch.max_pool3d(*args, **options)
+          end
+        end
+        def max_unpool1d(input, indices, kernel_size, stride: nil, padding: 0, output_size: nil)
+          raise NotImplementedYet
+          kernel_size = _single(kernel_size)
+          if !stride.nil?
+            _stride = _single(stride)
+          else
+            _stride = kernel_size
+          end
+          padding = _single(padding)
+          output_size = _unpool_output_size(input, kernel_size, _stride, padding, output_size)
+          output_size = output_size + [1]
+          NN.max_unpool2d(input.unsqueeze(3), indices.unsqueeze(3), output_size).squeeze(3)
+        end
+        def max_unpool2d(*args, **options)
+          raise NotImplementedYet
+          NN.max_unpool2d(*args, **options)
+        end
+        def max_unpool3d(*args, **options)
+          raise NotImplementedYet
+          NN.max_unpool3d(*args, **options)
+        end
+        def avg_pool1d(*args, **options)
+          Torch.avg_pool1d(*args, **options)
+        end
+        def avg_pool2d(*args, **options)
+          NN.avg_pool2d(*args, **options)
+        end
+        def avg_pool3d(*args, **options)
+          NN.avg_pool3d(*args, **options)
+        end
+        # padding layers
+        def pad(input, pad, mode: "constant", value: 0)
+          raise ArgumentError, "Padding length must be divisible by 2" unless pad.size % 2 == 0
+          raise ArgumentError, "Padding length too large" unless pad.size / 2 <= input.dim
+          if mode == "constant"
+            return Torch.constant_pad_nd(input, pad, value)
+          else
+            raise ArgumentError, "Padding mode doesn't take in value argument" unless value == 0
+            if input.dim == 3
+              raise ArgumentError, "3D tensors expect 2 values for padding" unless pad.size == 2
+              case mode
+              when "reflect"
+                NN.reflection_pad1d(input, pad)
+              when "replicate"
+                NN.replication_pad1d(input, pad)
+              else
+                raise NotImplementedYet
+              end
+            elsif input.dim == 4
+              raise ArgumentError, "4D tensors expect 4 values for padding" unless pad.size == 4
+              case mode
+              when "reflect"
+                NN.reflection_pad2d(input, pad)
+              when "replicate"
+                NN.replication_pad2d(input, pad)
+              else
+                raise NotImplementedYet
+              end
+            elsif input.dim == 5
+              raise ArgumentError, "5D tensors expect 6 values for padding" unless pad.size == 6
+              case mode
+              when "replicate"
+                NN.replication_pad3d(input, pad)
+              else
+                raise NotImplementedYet
+              end
+            else
+              raise ArgumentError, "Only 3D, 4D, 5D padding with non-constant padding are supported for now"
+            end
+          end
+        end
+        # activation layers
+        def hardshrink(input, lambd = 0.5)
+          Torch.hardshrink(input, lambd)
+        end
+        def leaky_relu(input, negative_slope = 0.01)
+          NN.leaky_relu(input, negative_slope)
+        end
+        def log_sigmoid(input)
+          NN.log_sigmoid(input)
+        end
+        def prelu(input, weight)
+          Torch.prelu(input, weight)
+        end
         def relu(input, inplace: false)
           if inplace
             input.relu!
@@ -10,37 +170,151 @@ module Torch
           end
         end
-        def conv2d(input, weight, bias, stride: 1, padding: 0, dilation: 1, groups: 1)
-          # TODO pair stride and padding when needed
-          Torch.conv2d(input, weight, bias, stride, padding, dilation, groups)
+        def softplus(input, beta: 1, threshold: 20)
+          NN.softplus(input, beta, threshold)
+        end
+        def softshrink(*args, **options)
+          NN.softshrink(*args, **options)
         end
-        def prelu(input, weight)
-          Torch.prelu(input, weight)
+        def softsign(input)
+          input / (input.abs + 1)
         end
-        def leaky_relu(input, negative_slope = 0.01)
-          Torch.leaky_relu(input, negative_slope)
+        def tanhshrink(input)
+          input - input.tanh
         end
-        def max_pool2d(input, kernel_size)
-          kernel_size = [kernel_size, kernel_size] if kernel_size.is_a?(Integer)
-          Torch.max_pool2d(input, kernel_size)
+        # other activation layers
+        def softmin(input, dim: nil)
+          dim ||= softmax_dim(input.dim)
+          (-input).softmax(dim)
+        end
+        def softmax(input, dim: nil)
+          dim ||= softmax_dim(input.dim)
+          input.softmax(dim)
+        end
+        # TODO make dim keyword argument and update examples
+        def log_softmax(input, dim = nil)
+          dim ||= softmax_dim(input.dim)
+          input.log_softmax(dim)
+        end
+        # normalization layers
+        def batch_norm(input, running_mean, running_var, weight: nil, bias: nil,
+          training: false, momentum: 0.1, eps: 1e-5)
+          if training
+            size = input.size
+            size_prods = size[0]
+            (size.length - 2).times do |i|
+              size_prods *= size[i + 2]
+            end
+            if size_prods == 1
+              raise ArgumentError, "Expected more than 1 value per channel when training, got input size #{size.inspect}"
+            end
+          end
+          Torch.batch_norm(
+            input, weight, bias, running_mean, running_var,
+            training, momentum, eps, false
+          )
+        end
+        def group_norm(input, num_groups, weight: nil, bias: nil, eps: 1e-5)
+          Torch.group_norm(input, num_groups, weight, bias, eps, false)
+        end
+        def instance_norm(input, running_mean: nil, running_var: nil, weight: nil,
+          bias: nil, use_input_stats: true, momentum: 0.1, eps: 1e-5)
+          Torch.instance_norm(
+              input, weight, bias, running_mean, running_var,
+              use_input_stats, momentum, eps, false
+          )
         end
-        def avg_pool2d(input, kernel_size)
-          kernel_size = [kernel_size, kernel_size] if kernel_size.is_a?(Integer)
-          Torch.avg_pool2d(input, kernel_size)
+        def layer_norm(input, normalized_shape, weight: nil, bias: nil, eps: 1e-5)
+          Torch.layer_norm(input, normalized_shape, weight, bias, eps, false)
+        end
+        def local_response_norm(input, size, alpha: 1e-4, beta: 0.75, k: 1.0)
+          dim = input.dim
+          if dim < 3
+            raise ArgumentError, "Expected 3D or higher dimensionality input (got #{dim} dimensions)"
+          end
+          div = input.mul(input).unsqueeze(1)
+          if dim == 3
+            div = pad(div, [0, 0, size / 2, (size - 1) / 2])
+            div = avg_pool2d(div, [size, 1], stride: 1).squeeze(1)
+          else
+            sizes = input.size
+            div = div.view(sizes[0], 1, sizes[1], sizes[2], -1)
+            div = pad(div, [0, 0, 0, 0, size / 2, (size - 1) / 2])
+            div = avg_pool3d(div, [size, 1, 1], stride: 1).squeeze(1)
+            div = div.view(sizes)
+          end
+          div = div.mul(alpha).add(k).pow(beta)
+          input / div
         end
         # linear layers
+        def linear(input, weight, bias)
+          NN.linear(input, weight, bias)
+        end
         def bilinear(input1, input2, weight, bias)
           Torch.bilinear(input1, input2, weight, bias)
         end
-        def linear(input, weight, bias)
-          Torch.linear(input, weight, bias)
+        # dropout layers
+        def dropout(input, p: 0.5, training: true, inplace: false)
+          if inplace
+            Torch.dropout!(input, p, training)
+          else
+            Torch.dropout(input, p, training)
+          end
+        end
+        def dropout2d(input, p: 0.5, training: true, inplace: false)
+          raise ArgumentError, "dropout probability has to be between 0 and 1, but got #{p}" if p < 0 || p > 1
+          if inplace
+            Torch.feature_dropout!(input, p, training)
+          else
+            Torch.feature_dropout(input, p, training)
+          end
+        end
+        def dropout3d(input, p: 0.5, training: true, inplace: false)
+          if inplace
+            Torch.feature_dropout!(input, p, training)
+          else
+            Torch.feature_dropout(input, p, training)
+          end
+        end
+        def alpha_dropout(input, p: 0.5, training: true, inplace: false)
+          if inplace
+            Torch.alpha_dropout!(input, p, training)
+          else
+            Torch.alpha_dropout(input, p, training)
+          end
+        end
+        def feature_alpha_dropout(input, p: 0.5, training: true, inplace: false)
+          if inplace
+            Torch.feature_alpha_dropout!(input, p, training)
+          else
+            Torch.feature_alpha_dropout(input, p, training)
+          end
         end
         # sparse layers
@@ -51,37 +325,47 @@ module Torch
           padding_idx ||= -1
           # weight and indices are swapped from Python interface
-          Torch._embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
+          Torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
         end
         def embedding_bag(input, weight, offsets: nil, max_norm: nil, norm_type: 2, scale_grad_by_freq: false, mode: "mean", sparse: false, per_sample_weights: nil)
-          # need to handle nils
-          raise NotImplementedYet
           # TODO handle max_norm and norm_type
           raise NotImplementedYet unless max_norm.nil? && norm_type == 2.0
-          Torch._embedding_bag(input, weight, offsets, scale_grad_by_freq, mode, sparse, per_sample_weights)
+          mode_enum =
+            case mode
+            when "sum"
+              0
+            when "mean"
+              1
+            when "max"
+              2
+            else
+              raise ArgumentError, "Unknown mode: #{mode}"
+            end
+          # weight and input swapped
+          Torch.embedding_bag(weight, input, offsets, scale_grad_by_freq, mode_enum, sparse, per_sample_weights)
         end
         # distance functions
         def cosine_similarity(x1, x2, dim: 1, eps: 1e-8)
-          Torch._cosine_similarity(x1, x2, dim, eps)
+          Torch.cosine_similarity(x1, x2, dim, eps)
         end
         def pairwise_distance(x1, x2, p: 2.0, eps: 1e-6, keepdim: false)
-          Torch._pairwise_distance(x1, x2, p, eps, keepdim)
+          Torch.pairwise_distance(x1, x2, p, eps, keepdim)
         end
         # loss functions
         def binary_cross_entropy(input, target, weight: nil, reduction: "mean")
-          NN._binary_cross_entropy(input, target, weight, reduction)
+          NN.binary_cross_entropy(input, target, weight, reduction)
         end
         def binary_cross_entropy_with_logits(input, target, weight: nil, reduction: "mean", pos_weight: nil)
-          Torch._binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction)
+          Torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction)
         end
         def cosine_embedding_loss(input1, input2, target, margin: 0, reduction: "mean")
@@ -94,19 +378,19 @@ module Torch
         def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank: 0, reduction: "mean", zero_infinity: false)
           # call to_a on input_lengths and target_lengths for C++
-          Torch._ctc_loss_intlist(log_probs, targets, input_lengths.to_a, target_lengths.to_a, blank, reduction, zero_infinity)
+          Torch.ctc_loss(log_probs, targets, input_lengths.to_a, target_lengths.to_a, blank, reduction, zero_infinity)
         end
         def hinge_embedding_loss(input, target, margin: 1.0, reduction: "mean")
-          Torch._hinge_embedding_loss(input, target, margin, reduction)
+          Torch.hinge_embedding_loss(input, target, margin, reduction)
         end
         def kl_div(input, target, reduction: "mean")
-          Torch._kl_div(input, target, reduction)
+          Torch.kl_div(input, target, reduction)
         end
         def l1_loss(input, target, reduction: "mean")
-          NN._l1_loss(input, target, reduction)
+          NN.l1_loss(input, target, reduction)
         end
         def margin_ranking_loss(input1, input2, target, margin: 0, reduction: "mean")
@@ -114,11 +398,11 @@ module Torch
         end
         def mse_loss(input, target, reduction: "mean")
-          NN._mse_loss(input, target, reduction)
+          NN.mse_loss(input, target, reduction)
         end
         def multilabel_margin_loss(input, target, reduction: "mean")
-          NN._multilabel_margin_loss(input, target, reduction)
+          NN.multilabel_margin_loss(input, target, reduction)
         end
         def multilabel_soft_margin_loss(input, target, weight: nil)
@@ -126,91 +410,27 @@ module Torch
         end
         def multi_margin_loss(input, target, p: 1, margin: 1.0, weight: nil, reduction: "mean")
-          NN._multi_margin_loss(input, target, p, margin, weight, reduction)
+          NN.multi_margin_loss(input, target, p, margin, weight, reduction)
         end
         def nll_loss(input, target, weight: nil, ignore_index: -100, reduction: "mean")
-          NN._nll_loss(input, target, weight, reduction, ignore_index)
+          NN.nll_loss(input, target, weight, reduction, ignore_index)
         end
         def poisson_nll_loss(input, target, log_input: true, full: false, eps: 1e-8, reduction: "mean")
-          Torch._poisson_nll_loss(input, target, log_input, full, eps, reduction)
+          Torch.poisson_nll_loss(input, target, log_input, full, eps, reduction)
         end
         def soft_margin_loss(input, target, reduction: "mean")
-          NN._soft_margin_loss(input, target, reduction)
+          NN.soft_margin_loss(input, target, reduction)
         end
         def smooth_l1_loss(input, target, reduction: "mean")
-          NN._smooth_l1_loss(input, target, reduction)
+          NN.smooth_l1_loss(input, target, reduction)
         end
         def triplet_margin_loss(anchor, positive, negative, margin: 1.0, p: 2, eps: 1e-06, swap: false, reduction: "mean")
-          Torch._triplet_margin_loss(anchor, positive, negative, margin, p, eps, swap, reduction)
-        end
-        # end loss
-        def softmax(input, dim: nil)
-          dim ||= softmax_dim(input.dim)
-          input.softmax(dim: dim)
-        end
-        def softmin(input, dim: nil)
-          dim ||= softmax_dim(input.dim)
-          (-input).softmax(dim: dim)
-        end
-        def softplus(input, beta: 1, threshold: 20)
-          NN._softplus(input, beta, threshold)
-        end
-        # TODO make dim keyword argument and update examples
-        def log_softmax(input, dim = nil)
-          dim ||= softmax_dim(input.dim)
-          input.log_softmax(dim)
-        end
-        def dropout(input, p: 0.5, training: true, inplace: false)
-          if inplace
-            Torch._dropout_(input, p, training)
-          else
-            Torch._dropout(input, p, training)
-          end
-        end
-        def dropout2d(input, p: 0.5, training: true, inplace: false)
-          raise ArgumentError, "dropout probability has to be between 0 and 1, but got #{p}" if p < 0 || p > 1
-          if inplace
-            Torch._feature_dropout_(input, p, training)
-          else
-            Torch._feature_dropout(input, p, training)
-          end
-        end
-        def dropout3d(input, p: 0.5, training: true, inplace: false)
-          if inplace
-            Torch._feature_dropout_(input, p, training)
-          else
-            Torch._feature_dropout(input, p, training)
-          end
-        end
-        def alpha_dropout(input, p: 0.5, training: true, inplace: false)
-          if inplace
-            Torch._alpha_dropout_(input, p, training)
-          else
-            Torch._alpha_dropout(input, p, training)
-          end
-        end
-        def feature_alpha_dropout(input, p: 0.5, training: true, inplace: false)
-          if inplace
-            Torch._feature_alpha_dropout_(input, p, training)
-          else
-            Torch._feature_alpha_dropout(input, p, training)
-          end
+          Torch.triplet_margin_loss(anchor, positive, negative, margin, p, eps, swap, reduction)
         end
         private