RubyGems - torch-rb - Versions diffs - 0.1.3 → 0.1.8 - Mend

torch-rb 0.1.3 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +30 -0
data/README.md +5 -2
data/ext/torch/ext.cpp +130 -555
data/ext/torch/extconf.rb +9 -0
data/ext/torch/templates.cpp +55 -0
data/ext/torch/templates.hpp +244 -0
data/lib/torch.rb +209 -171
data/lib/torch/inspector.rb +23 -19
data/lib/torch/native/dispatcher.rb +48 -0
data/lib/torch/native/function.rb +110 -0
data/lib/torch/native/generator.rb +168 -0
data/lib/torch/native/native_functions.yaml +6491 -0
data/lib/torch/native/parser.rb +134 -0
data/lib/torch/nn/avg_pool1d.rb +18 -0
data/lib/torch/nn/avg_pool2d.rb +19 -0
data/lib/torch/nn/avg_pool3d.rb +19 -0
data/lib/torch/nn/avg_poolnd.rb +9 -0
data/lib/torch/nn/batch_norm.rb +75 -0
data/lib/torch/nn/batch_norm1d.rb +11 -0
data/lib/torch/nn/batch_norm2d.rb +11 -0
data/lib/torch/nn/batch_norm3d.rb +11 -0
data/lib/torch/nn/bce_loss.rb +13 -0
data/lib/torch/nn/bce_with_logits_loss.rb +15 -0
data/lib/torch/nn/bilinear.rb +38 -0
data/lib/torch/nn/constant_pad1d.rb +10 -0
data/lib/torch/nn/constant_pad2d.rb +10 -0
data/lib/torch/nn/constant_pad3d.rb +10 -0
data/lib/torch/nn/constant_padnd.rb +18 -0
data/lib/torch/nn/conv1d.rb +22 -0
data/lib/torch/nn/conv2d.rb +10 -20
data/lib/torch/nn/conv3d.rb +22 -0
data/lib/torch/nn/convnd.rb +3 -3
data/lib/torch/nn/cosine_embedding_loss.rb +14 -0
data/lib/torch/nn/cosine_similarity.rb +15 -0
data/lib/torch/nn/cross_entropy_loss.rb +14 -0
data/lib/torch/nn/ctc_loss.rb +15 -0
data/lib/torch/nn/dropoutnd.rb +2 -2
data/lib/torch/nn/embedding_bag.rb +34 -0
data/lib/torch/nn/fold.rb +20 -0
data/lib/torch/nn/functional.rb +379 -32
data/lib/torch/nn/group_norm.rb +36 -0
data/lib/torch/nn/gru.rb +49 -0
data/lib/torch/nn/hardshrink.rb +18 -0
data/lib/torch/nn/hinge_embedding_loss.rb +14 -0
data/lib/torch/nn/identity.rb +14 -0
data/lib/torch/nn/init.rb +58 -1
data/lib/torch/nn/instance_norm.rb +20 -0
data/lib/torch/nn/instance_norm1d.rb +18 -0
data/lib/torch/nn/instance_norm2d.rb +11 -0
data/lib/torch/nn/instance_norm3d.rb +11 -0
data/lib/torch/nn/kl_div_loss.rb +13 -0
data/lib/torch/nn/l1_loss.rb +13 -0
data/lib/torch/nn/layer_norm.rb +35 -0
data/lib/torch/nn/leaky_relu.rb +20 -0
data/lib/torch/nn/linear.rb +12 -11
data/lib/torch/nn/local_response_norm.rb +21 -0
data/lib/torch/nn/log_sigmoid.rb +9 -0
data/lib/torch/nn/log_softmax.rb +14 -0
data/lib/torch/nn/loss.rb +10 -0
data/lib/torch/nn/lp_pool1d.rb +9 -0
data/lib/torch/nn/lp_pool2d.rb +9 -0
data/lib/torch/nn/lp_poolnd.rb +22 -0
data/lib/torch/nn/lstm.rb +66 -0
data/lib/torch/nn/margin_ranking_loss.rb +14 -0
data/lib/torch/nn/max_pool1d.rb +9 -0
data/lib/torch/nn/max_pool2d.rb +9 -0
data/lib/torch/nn/max_pool3d.rb +9 -0
data/lib/torch/nn/max_poolnd.rb +19 -0
data/lib/torch/nn/max_unpool1d.rb +16 -0
data/lib/torch/nn/max_unpool2d.rb +16 -0
data/lib/torch/nn/max_unpool3d.rb +16 -0
data/lib/torch/nn/max_unpoolnd.rb +9 -0
data/lib/torch/nn/module.rb +186 -35
data/lib/torch/nn/mse_loss.rb +2 -2
data/lib/torch/nn/multi_label_margin_loss.rb +13 -0
data/lib/torch/nn/multi_label_soft_margin_loss.rb +13 -0
data/lib/torch/nn/multi_margin_loss.rb +17 -0
data/lib/torch/nn/nll_loss.rb +14 -0
data/lib/torch/nn/pairwise_distance.rb +16 -0
data/lib/torch/nn/parameter.rb +2 -2
data/lib/torch/nn/poisson_nll_loss.rb +16 -0
data/lib/torch/nn/prelu.rb +19 -0
data/lib/torch/nn/reflection_pad1d.rb +10 -0
data/lib/torch/nn/reflection_pad2d.rb +10 -0
data/lib/torch/nn/reflection_padnd.rb +13 -0
data/lib/torch/nn/relu.rb +8 -3
data/lib/torch/nn/replication_pad1d.rb +10 -0
data/lib/torch/nn/replication_pad2d.rb +10 -0
data/lib/torch/nn/replication_pad3d.rb +10 -0
data/lib/torch/nn/replication_padnd.rb +13 -0
data/lib/torch/nn/rnn.rb +22 -0
data/lib/torch/nn/rnn_base.rb +198 -0
data/lib/torch/nn/sequential.rb +1 -10
data/lib/torch/nn/sigmoid.rb +9 -0
data/lib/torch/nn/smooth_l1_loss.rb +13 -0
data/lib/torch/nn/soft_margin_loss.rb +13 -0
data/lib/torch/nn/softmax.rb +18 -0
data/lib/torch/nn/softmax2d.rb +10 -0
data/lib/torch/nn/softmin.rb +14 -0
data/lib/torch/nn/softplus.rb +19 -0
data/lib/torch/nn/softshrink.rb +18 -0
data/lib/torch/nn/softsign.rb +9 -0
data/lib/torch/nn/tanh.rb +9 -0
data/lib/torch/nn/tanhshrink.rb +9 -0
data/lib/torch/nn/triplet_margin_loss.rb +18 -0
data/lib/torch/nn/unfold.rb +19 -0
data/lib/torch/nn/utils.rb +25 -0
data/lib/torch/nn/weighted_loss.rb +10 -0
data/lib/torch/nn/zero_pad2d.rb +9 -0
data/lib/torch/random.rb +10 -0
data/lib/torch/tensor.rb +51 -44
data/lib/torch/version.rb +1 -1
metadata +98 -6
data/lib/torch/ext.bundle +0 -0

data/lib/torch/nn/conv2d.rb CHANGED

@@ -1,36 +1,26 @@
 module Torch
   module NN
     class Conv2d < ConvNd
-      attr_reader :bias, :weight
+      def initialize(in_channels, out_channels, kernel_size, stride: 1,
+        padding: 0, dilation: 1, groups: 1, bias: true, padding_mode: "zeros")
-      def initialize(in_channels, out_channels, kernel_size, stride: 1, padding: 0, dilation: 1, groups: 1, bias: true, padding_mode: "zeros")
-        kernel_size = pair(kernel_size)
-        stride = pair(stride)
-        padding = pair(padding)
-        dilation = pair(dilation)
-        super(in_channels, out_channels, kernel_size, stride, padding, dilation, false, pair(0), groups, bias, padding_mode)
+        kernel_size = _pair(kernel_size)
+        stride = _pair(stride)
+        padding = _pair(padding)
+        dilation = _pair(dilation)
+        super(in_channels, out_channels, kernel_size, stride, padding, dilation, false, _pair(0), groups, bias, padding_mode)
       end
       def forward(input)
         if @padding_mode == "circular"
           raise NotImplementedError
         end
-        F.conv2d(input, @weight, @bias, stride: @stride, padding: @padding, dilation: @dilation, groups: @groups)
+        F.conv2d(input, @weight, @bias, @stride, @padding, @dilation, @groups)
       end
       # TODO add more parameters
-      def inspect
-        "Conv2d(#{@in_channels}, #{@out_channels}, kernel_size: #{@kernel_size.inspect}, stride: #{@stride.inspect})"
-      end
-      private
-      def pair(value)
-        if value.is_a?(Array)
-          value
-        else
-          [value] * 2
-        end
+      def extra_inspect
+        format("%s, %s, kernel_size: %s, stride: %s", @in_channels, @out_channels, @kernel_size, @stride)
       end
     end
   end

data/lib/torch/nn/conv3d.rb ADDED

@@ -0,0 +1,22 @@
+module Torch
+  module NN
+    class Conv3d < ConvNd
+      def initialize(in_channels, out_channels, kernel_size, stride: 1,
+        padding: 0, dilation: 1, groups: 1, bias: true, padding_mode: "zeros")
+        kernel_size = _triple(kernel_size)
+        stride = _triple(stride)
+        padding = _triple(padding)
+        dilation = _triple(dilation)
+        super(in_channels, out_channels, kernel_size, stride, padding, dilation, false, _triple(0), groups, bias, padding_mode)
+      end
+      def forward(input)
+        if @padding_mode == "circular"
+          raise NotImplementedError
+        end
+        F.conv3d(input, @weight, @bias, @stride, @padding, @dilation, @groups)
+      end
+    end
+  end
+end

data/lib/torch/nn/convnd.rb CHANGED

@@ -29,11 +29,11 @@ module Torch
       end
       def reset_parameters
-        Init.kaiming_uniform!(@weight, Math.sqrt(5))
+        Init.kaiming_uniform!(@weight, a: Math.sqrt(5))
         if @bias
-          fan_in, _ = Init.calculate_fan_in_and_fan_out(@weight)
+          fan_in, _ = Init._calculate_fan_in_and_fan_out(@weight)
           bound = 1 / Math.sqrt(fan_in)
-          Init.uniform!(@bias, -bound, bound)
+          Init.uniform!(@bias, a: -bound, b: bound)
         end
       end
     end

data/lib/torch/nn/cosine_embedding_loss.rb ADDED

@@ -0,0 +1,14 @@
+module Torch
+  module NN
+    class CosineEmbeddingLoss < Loss
+      def initialize(margin: 0, reduction: "mean")
+        super(reduction)
+        @margin = margin
+      end
+      def forward(input1, input2, target)
+        F.cosine_embedding_loss(input1, input2, target, margin: @margin, reduction: @reduction)
+      end
+    end
+  end
+end

data/lib/torch/nn/cosine_similarity.rb ADDED

@@ -0,0 +1,15 @@
+module Torch
+  module NN
+    class CosineSimilarity < Module
+      def initialize(dim: 1, eps: 1e-8)
+        super()
+        @dim = dim
+        @eps = eps
+      end
+      def forward(x1, x2)
+        F.cosine_similarity(x1, x2, dim: @dim, eps: @eps)
+      end
+    end
+  end
+end

data/lib/torch/nn/cross_entropy_loss.rb ADDED

@@ -0,0 +1,14 @@
+module Torch
+  module NN
+    class CrossEntropyLoss < WeightedLoss
+      def initialize(weight: nil, ignore_index: -100, reduction: "mean")
+        super(weight, reduction)
+        @ignore_index = ignore_index
+      end
+      def forward(input, target)
+        F.cross_entropy(input, target, weight: @weight, ignore_index: @ignore_index, reduction: @reduction)
+      end
+    end
+  end
+end

data/lib/torch/nn/ctc_loss.rb ADDED

@@ -0,0 +1,15 @@
+module Torch
+  module NN
+    class CTCLoss < Loss
+      def initialize(blank: 0, reduction: "mean", zero_infinity: false)
+        super(reduction)
+        @blank = blank
+        @zero_infinity = zero_infinity
+      end
+      def forward(log_probs, targets, input_lengths, target_lengths)
+        F.ctc_loss(log_probs, targets, input_lengths, target_lengths, blank: @blank, reduction: @reduction, zero_infinity: @zero_infinity)
+      end
+    end
+  end
+end

data/lib/torch/nn/dropoutnd.rb CHANGED

@@ -7,8 +7,8 @@ module Torch
         @inplace = inplace
       end
-      def inspect
-        "#{self.class.name.split("::").last}(p: #{@p.inspect}, inplace: #{@inplace.inspect})"
+      def extra_inspect
+        format("p: %s, inplace: %s", @p, @inplace)
       end
     end
   end

data/lib/torch/nn/embedding_bag.rb ADDED

@@ -0,0 +1,34 @@
+# ported from https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/sparse.py
+module Torch
+  module NN
+    class EmbeddingBag < Module
+      def initialize(num_embeddings, embedding_dim, max_norm: nil, norm_type: 2.0,
+        scale_grad_by_freq: false, mode: "mean", sparse: false, _weight: nil)
+        super()
+        @num_embeddings = num_embeddings
+        @embedding_dim = embedding_dim
+        @max_norm = max_norm
+        @norm_type = norm_type
+        @scale_grad_by_freq = scale_grad_by_freq
+        if _weight.nil?
+          @weight = Parameter.new(Tensor.new(num_embeddings, embedding_dim))
+          reset_parameters
+        else
+          raise ArgumentError, "Shape of weight does not match num_embeddings and embedding_dim" unless _weight.shape == [num_embeddings, embedding_dim]
+          @weight = Parameter.new(_weight)
+        end
+        @mode = mode
+        @sparse = sparse
+      end
+      def reset_parameters
+        Init.normal!(@weight)
+      end
+      def forward(input, offsets: nil, per_sample_weights: nil)
+        F.embedding_bag(input, @weight, offsets: offsets, max_norm: @max_norm, norm_type: @norm_type, scale_grad_by_freq: @scale_grad_by_freq, mode: @mode, sparse: @sparse, per_sample_weights: per_sample_weights)
+      end
+    end
+  end
+end

data/lib/torch/nn/fold.rb ADDED

@@ -0,0 +1,20 @@
+module Torch
+  module NN
+    class Fold < Module
+      def initialize(output_size, kernel_size, dilation: 1, padding: 0, stride: 1)
+        super()
+        @output_size = output_size
+        @kernel_size = kernel_size
+        @dilation = dilation
+        @padding = padding
+        @stride = stride
+      end
+      def forward(input)
+        F.fold(input, @output_size, @kernel_size, dilation: @dilation, padding: @padding, stride: @stride)
+      end
+      # TODO add extra_inspect
+    end
+  end
+end

data/lib/torch/nn/functional.rb CHANGED

@@ -2,51 +2,284 @@ module Torch
   module NN
     class Functional
       class << self
-        def relu(input)
-          Torch.relu(input)
+        include Utils
+        # convolution layers
+        def conv1d(*args, **options)
+          Torch.conv1d(*args, **options)
         end
-        def conv2d(input, weight, bias, stride: 1, padding: 0, dilation: 1, groups: 1)
-          # TODO pair stride and padding when needed
-          Torch.conv2d(input, weight, bias, stride, padding, dilation, groups)
+        def conv2d(*args, **options)
+          Torch.conv2d(*args, **options)
         end
-        def max_pool2d(input, kernel_size)
-          kernel_size = [kernel_size, kernel_size] if kernel_size.is_a?(Integer)
-          Torch.max_pool2d(input, kernel_size)
+        def conv3d(*args, **options)
+          Torch.conv3d(*args, **options)
         end
-        def avg_pool2d(input, kernel_size)
-          kernel_size = [kernel_size, kernel_size] if kernel_size.is_a?(Integer)
-          Torch.avg_pool2d(input, kernel_size)
+        def unfold(input, kernel_size, dilation: 1, padding: 0, stride: 1)
+          if input.dim == 4
+            NN.im2col(input, _pair(kernel_size), _pair(dilation), _pair(padding), _pair(stride))
+          else
+            raise Error, "Input Error: Only 4D input Tensors are supported (got #{input.dim}D)"
+          end
         end
-        def linear(input, weight, bias)
-          Torch.linear(input, weight, bias)
+        def fold(input, output_size, kernel_size, dilation: 1, padding: 0, stride: 1)
+          if input.dim == 3
+            NN.col2im(input, _pair(output_size), _pair(kernel_size), _pair(dilation), _pair(padding), _pair(stride))
+          else
+            raise Error, "Input Error: Only 3D input Tensors are supported (got #{input.dim}D)"
+          end
         end
-        def mse_loss(input, target, reduction: "mean")
-          Torch.mse_loss(input, target, reduction)
+        # pooling layers
+        def max_pool1d(*args, **options)
+          return_indices = args.pop if args.size == 7
+          if return_indices
+            Torch.max_pool1d_with_indices(*args, **options)
+          else
+            Torch.max_pool1d(*args, **options)
+          end
+        end
+        def max_pool2d(*args, **options)
+          return_indices = args.pop if args.size == 7
+          if return_indices
+            NN.max_pool2d_with_indices(*args, **options)
+          else
+            Torch.max_pool2d(*args, **options)
+          end
+        end
+        def max_pool3d(*args, **options)
+          return_indices = args.pop if args.size == 7
+          if return_indices
+            NN.max_pool3d_with_indices(*args, **options)
+          else
+            Torch.max_pool3d(*args, **options)
+          end
+        end
+        def max_unpool1d(input, indices, kernel_size, stride: nil, padding: 0, output_size: nil)
+          raise NotImplementedYet
+          kernel_size = _single(kernel_size)
+          if !stride.nil?
+            _stride = _single(stride)
+          else
+            _stride = kernel_size
+          end
+          padding = _single(padding)
+          output_size = _unpool_output_size(input, kernel_size, _stride, padding, output_size)
+          output_size = output_size + [1]
+          NN.max_unpool2d(input.unsqueeze(3), indices.unsqueeze(3), output_size).squeeze(3)
+        end
+        def max_unpool2d(*args, **options)
+          raise NotImplementedYet
+          NN.max_unpool2d(*args, **options)
+        end
+        def max_unpool3d(*args, **options)
+          raise NotImplementedYet
+          NN.max_unpool3d(*args, **options)
         end
-        def cross_entropy(input, target)
-          nll_loss(log_softmax(input, 1), target)
+        def avg_pool1d(*args, **options)
+          Torch.avg_pool1d(*args, **options)
         end
-        def nll_loss(input, target, reduction: "mean")
-          # TODO fix for non-1d
-          Torch.nll_loss(input, target, reduction)
+        def avg_pool2d(*args, **options)
+          NN.avg_pool2d(*args, **options)
+        end
+        def avg_pool3d(*args, **options)
+          NN.avg_pool3d(*args, **options)
+        end
+        # padding layers
+        def pad(input, pad, mode: "constant", value: 0)
+          raise ArgumentError, "Padding length must be divisible by 2" unless pad.size % 2 == 0
+          raise ArgumentError, "Padding length too large" unless pad.size / 2 <= input.dim
+          if mode == "constant"
+            return Torch.constant_pad_nd(input, pad, value)
+          else
+            raise ArgumentError, "Padding mode doesn't take in value argument" unless value == 0
+            if input.dim == 3
+              raise ArgumentError, "3D tensors expect 2 values for padding" unless pad.size == 2
+              case mode
+              when "reflect"
+                NN.reflection_pad1d(input, pad)
+              when "replicate"
+                NN.replication_pad1d(input, pad)
+              else
+                raise NotImplementedYet
+              end
+            elsif input.dim == 4
+              raise ArgumentError, "4D tensors expect 4 values for padding" unless pad.size == 4
+              case mode
+              when "reflect"
+                NN.reflection_pad2d(input, pad)
+              when "replicate"
+                NN.replication_pad2d(input, pad)
+              else
+                raise NotImplementedYet
+              end
+            elsif input.dim == 5
+              raise ArgumentError, "5D tensors expect 6 values for padding" unless pad.size == 6
+              case mode
+              when "replicate"
+                NN.replication_pad3d(input, pad)
+              else
+                raise NotImplementedYet
+              end
+            else
+              raise ArgumentError, "Only 3D, 4D, 5D padding with non-constant padding are supported for now"
+            end
+          end
         end
-        def log_softmax(input, dim)
+        # activation layers
+        def hardshrink(input, lambd = 0.5)
+          Torch.hardshrink(input, lambd)
+        end
+        def leaky_relu(input, negative_slope = 0.01)
+          NN.leaky_relu(input, negative_slope)
+        end
+        def log_sigmoid(input)
+          NN.log_sigmoid(input)
+        end
+        def prelu(input, weight)
+          Torch.prelu(input, weight)
+        end
+        def relu(input, inplace: false)
+          if inplace
+            input.relu!
+          else
+            input.relu
+          end
+        end
+        def softplus(input, beta: 1, threshold: 20)
+          NN.softplus(input, beta, threshold)
+        end
+        def softshrink(*args, **options)
+          NN.softshrink(*args, **options)
+        end
+        def softsign(input)
+          input / (input.abs + 1)
+        end
+        def tanhshrink(input)
+          input - input.tanh
+        end
+        # other activation layers
+        def softmin(input, dim: nil)
+          dim ||= softmax_dim(input.dim)
+          (-input).softmax(dim)
+        end
+        def softmax(input, dim: nil)
+          dim ||= softmax_dim(input.dim)
+          input.softmax(dim)
+        end
+        # TODO make dim keyword argument and update examples
+        def log_softmax(input, dim = nil)
+          dim ||= softmax_dim(input.dim)
           input.log_softmax(dim)
         end
+        # normalization layers
+        def batch_norm(input, running_mean, running_var, weight: nil, bias: nil,
+          training: false, momentum: 0.1, eps: 1e-5)
+          if training
+            size = input.size
+            size_prods = size[0]
+            (size.length - 2).times do |i|
+              size_prods *= size[i + 2]
+            end
+            if size_prods == 1
+              raise ArgumentError, "Expected more than 1 value per channel when training, got input size #{size.inspect}"
+            end
+          end
+          Torch.batch_norm(
+            input, weight, bias, running_mean, running_var,
+            training, momentum, eps, false
+          )
+        end
+        def group_norm(input, num_groups, weight: nil, bias: nil, eps: 1e-5)
+          Torch.group_norm(input, num_groups, weight, bias, eps, false)
+        end
+        def instance_norm(input, running_mean: nil, running_var: nil, weight: nil,
+          bias: nil, use_input_stats: true, momentum: 0.1, eps: 1e-5)
+          Torch.instance_norm(
+              input, weight, bias, running_mean, running_var,
+              use_input_stats, momentum, eps, false
+          )
+        end
+        def layer_norm(input, normalized_shape, weight: nil, bias: nil, eps: 1e-5)
+          Torch.layer_norm(input, normalized_shape, weight, bias, eps, false)
+        end
+        def local_response_norm(input, size, alpha: 1e-4, beta: 0.75, k: 1.0)
+          dim = input.dim
+          if dim < 3
+            raise ArgumentError, "Expected 3D or higher dimensionality input (got #{dim} dimensions)"
+          end
+          div = input.mul(input).unsqueeze(1)
+          if dim == 3
+            div = pad(div, [0, 0, size / 2, (size - 1) / 2])
+            div = avg_pool2d(div, [size, 1], stride: 1).squeeze(1)
+          else
+            sizes = input.size
+            div = div.view(sizes[0], 1, sizes[1], sizes[2], -1)
+            div = pad(div, [0, 0, 0, 0, size / 2, (size - 1) / 2])
+            div = avg_pool3d(div, [size, 1, 1], stride: 1).squeeze(1)
+            div = div.view(sizes)
+          end
+          div = div.mul(alpha).add(k).pow(beta)
+          input / div
+        end
+        # linear layers
+        def linear(input, weight, bias)
+          NN.linear(input, weight, bias)
+        end
+        def bilinear(input1, input2, weight, bias)
+          Torch.bilinear(input1, input2, weight, bias)
+        end
+        # dropout layers
         def dropout(input, p: 0.5, training: true, inplace: false)
           if inplace
-            Torch._dropout!(input, p, training)
+            Torch.dropout!(input, p, training)
           else
-            Torch._dropout(input, p, training)
+            Torch.dropout(input, p, training)
           end
         end
@@ -54,42 +287,156 @@ module Torch
           raise ArgumentError, "dropout probability has to be between 0 and 1, but got #{p}" if p < 0 || p > 1
           if inplace
-            Torch._feature_dropout!(input, p, training)
+            Torch.feature_dropout!(input, p, training)
           else
-            Torch._feature_dropout(input, p, training)
+            Torch.feature_dropout(input, p, training)
           end
         end
         def dropout3d(input, p: 0.5, training: true, inplace: false)
           if inplace
-            Torch._feature_dropout!(input, p, training)
+            Torch.feature_dropout!(input, p, training)
           else
-            Torch._feature_dropout(input, p, training)
+            Torch.feature_dropout(input, p, training)
           end
         end
         def alpha_dropout(input, p: 0.5, training: true, inplace: false)
           if inplace
-            Torch._alpha_dropout!(input, p, training)
+            Torch.alpha_dropout!(input, p, training)
           else
-            Torch._alpha_dropout(input, p, training)
+            Torch.alpha_dropout(input, p, training)
           end
         end
         def feature_alpha_dropout(input, p: 0.5, training: true, inplace: false)
           if inplace
-            Torch._feature_alpha_dropout!(input, p, training)
+            Torch.feature_alpha_dropout!(input, p, training)
           else
-            Torch._feature_alpha_dropout(input, p, training)
+            Torch.feature_alpha_dropout(input, p, training)
           end
         end
+        # sparse layers
         def embedding(input, weight, padding_idx: nil, max_norm: nil, norm_type: 2.0, scale_grad_by_freq: false, sparse: false)
           # TODO handle max_norm and norm_type
           raise NotImplementedYet unless max_norm.nil? && norm_type == 2.0
           padding_idx ||= -1
-          Torch._embedding(input, weight, padding_idx, scale_grad_by_freq, sparse)
+          # weight and indices are swapped from Python interface
+          Torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
+        end
+        def embedding_bag(input, weight, offsets: nil, max_norm: nil, norm_type: 2, scale_grad_by_freq: false, mode: "mean", sparse: false, per_sample_weights: nil)
+          # TODO handle max_norm and norm_type
+          raise NotImplementedYet unless max_norm.nil? && norm_type == 2.0
+          mode_enum =
+            case mode
+            when "sum"
+              0
+            when "mean"
+              1
+            when "max"
+              2
+            else
+              raise ArgumentError, "Unknown mode: #{mode}"
+            end
+          # weight and input swapped
+          Torch.embedding_bag(weight, input, offsets, scale_grad_by_freq, mode_enum, sparse, per_sample_weights)
+        end
+        # distance functions
+        def cosine_similarity(x1, x2, dim: 1, eps: 1e-8)
+          Torch.cosine_similarity(x1, x2, dim, eps)
+        end
+        def pairwise_distance(x1, x2, p: 2.0, eps: 1e-6, keepdim: false)
+          Torch.pairwise_distance(x1, x2, p, eps, keepdim)
+        end
+        # loss functions
+        def binary_cross_entropy(input, target, weight: nil, reduction: "mean")
+          NN.binary_cross_entropy(input, target, weight, reduction)
+        end
+        def binary_cross_entropy_with_logits(input, target, weight: nil, reduction: "mean", pos_weight: nil)
+          Torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction)
+        end
+        def cosine_embedding_loss(input1, input2, target, margin: 0, reduction: "mean")
+          raise NotImplementedYet
+        end
+        def cross_entropy(input, target, weight: nil, ignore_index: -100, reduction: "mean")
+          nll_loss(log_softmax(input, 1), target, weight: weight, ignore_index: ignore_index, reduction: reduction)
+        end
+        def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank: 0, reduction: "mean", zero_infinity: false)
+          # call to_a on input_lengths and target_lengths for C++
+          Torch.ctc_loss(log_probs, targets, input_lengths.to_a, target_lengths.to_a, blank, reduction, zero_infinity)
+        end
+        def hinge_embedding_loss(input, target, margin: 1.0, reduction: "mean")
+          Torch.hinge_embedding_loss(input, target, margin, reduction)
+        end
+        def kl_div(input, target, reduction: "mean")
+          Torch.kl_div(input, target, reduction)
+        end
+        def l1_loss(input, target, reduction: "mean")
+          NN.l1_loss(input, target, reduction)
+        end
+        def margin_ranking_loss(input1, input2, target, margin: 0, reduction: "mean")
+          raise NotImplementedYet
+        end
+        def mse_loss(input, target, reduction: "mean")
+          NN.mse_loss(input, target, reduction)
+        end
+        def multilabel_margin_loss(input, target, reduction: "mean")
+          NN.multilabel_margin_loss(input, target, reduction)
+        end
+        def multilabel_soft_margin_loss(input, target, weight: nil)
+          raise NotImplementedYet
+        end
+        def multi_margin_loss(input, target, p: 1, margin: 1.0, weight: nil, reduction: "mean")
+          NN.multi_margin_loss(input, target, p, margin, weight, reduction)
+        end
+        def nll_loss(input, target, weight: nil, ignore_index: -100, reduction: "mean")
+          NN.nll_loss(input, target, weight, reduction, ignore_index)
+        end
+        def poisson_nll_loss(input, target, log_input: true, full: false, eps: 1e-8, reduction: "mean")
+          Torch.poisson_nll_loss(input, target, log_input, full, eps, reduction)
+        end
+        def soft_margin_loss(input, target, reduction: "mean")
+          NN.soft_margin_loss(input, target, reduction)
+        end
+        def smooth_l1_loss(input, target, reduction: "mean")
+          NN.smooth_l1_loss(input, target, reduction)
+        end
+        def triplet_margin_loss(anchor, positive, negative, margin: 1.0, p: 2, eps: 1e-06, swap: false, reduction: "mean")
+          Torch.triplet_margin_loss(anchor, positive, negative, margin, p, eps, swap, reduction)
+        end
+        private
+        def softmax_dim(ndim)
+          ndim == 0 || ndim == 1 || ndim == 3 ? 0 : 1
         end
       end
     end