RubyGems - torch-rb - Versions diffs - 0.8.0 → 0.9.0 - Mend

torch-rb 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +22 -0
data/README.md +23 -41
data/codegen/generate_functions.rb +46 -8
data/codegen/native_functions.yaml +1103 -373
data/ext/torch/backends.cpp +17 -0
data/ext/torch/ext.cpp +8 -0
data/ext/torch/fft.cpp +13 -0
data/ext/torch/fft_functions.h +6 -0
data/ext/torch/linalg.cpp +13 -0
data/ext/torch/linalg_functions.h +6 -0
data/ext/torch/ruby_arg_parser.h +17 -3
data/ext/torch/special.cpp +13 -0
data/ext/torch/special_functions.h +6 -0
data/ext/torch/templates.h +0 -37
data/ext/torch/tensor.cpp +8 -8
data/lib/torch/nn/convnd.rb +2 -0
data/lib/torch/nn/functional_attention.rb +241 -0
data/lib/torch/nn/module.rb +30 -0
data/lib/torch/nn/module_list.rb +49 -0
data/lib/torch/nn/multihead_attention.rb +123 -0
data/lib/torch/nn/parameter.rb +6 -0
data/lib/torch/nn/transformer.rb +92 -0
data/lib/torch/nn/transformer_decoder.rb +25 -0
data/lib/torch/nn/transformer_decoder_layer.rb +43 -0
data/lib/torch/nn/transformer_encoder.rb +25 -0
data/lib/torch/nn/transformer_encoder_layer.rb +36 -0
data/lib/torch/nn/utils.rb +12 -0
data/lib/torch/tensor.rb +20 -0
data/lib/torch/utils/data/data_loader.rb +2 -0
data/lib/torch/version.rb +1 -1
data/lib/torch.rb +6 -0
metadata +18 -3

data/lib/torch/nn/multihead_attention.rb ADDED Viewed

@@ -0,0 +1,123 @@
+module Torch
+  module NN
+    class MultiheadAttention < Module
+      def initialize(
+        embed_dim, num_heads,
+        dropout: 0.0, bias: true, add_bias_kv: false, add_zero_attn: false,
+        kdim: nil, vdim: nil, batch_first: false, device: nil, dtype: nil
+      )
+        super()
+        @embed_dim = embed_dim
+        @kdim = kdim || @embed_dim
+        @vdim = vdim || @embed_dim
+        @qkv_same_embed_dim = @kdim == @embed_dim && @vdim == @embed_dim
+        @num_heads = num_heads
+        @dropout = dropout
+        @batch_first = batch_first
+        @head_dim = @embed_dim.div @num_heads
+        raise ArgumentError, "embed_dim must be divisible by num_heads" unless @head_dim * @num_heads == @embed_dim
+        if @qkv_same_embed_dim
+          @in_proj_weight = Parameter.new(Torch.empty([3 * @embed_dim, @embed_dim]))
+          %w(q k v).each { |x| register_parameter("#{x}_proj_weight", nil) }
+        else
+          @q_proj_weight = Parameter.new(Torch.empty([@embed_dim, @embed_dim]))
+          @k_proj_weight = Parameter.new(Torch.empty([@embed_dim, @kdim]))
+          @v_proj_weight = Parameter.new(Torch.empty([@embed_dim, @vdim]))
+          register_parameter('in_proj_weight', nil)
+        end
+        if bias
+          @in_proj_bias = Parameter.new(Torch.empty(3 * @embed_dim))
+        else
+          register_parameter('in_proj_bias', nil)
+        end
+        @out_proj = Linear.new(@embed_dim, @embed_dim, bias: bias)
+        if add_bias_kv
+          @bias_k = Parameter.new(Torch.empty([1, 1, @embed_dim]))
+          @bias_v = Parameter.new(Torch.empty([1, 1, @embed_dim]))
+        else
+          @bias_k = @bias_v = nil
+        end
+        @add_zero_attn = add_zero_attn
+        reset_parameters
+      end
+      def batch_first?
+        !!@batch_first
+      end
+      def reset_parameters
+        if @qkv_same_embed_dim
+          Init.xavier_uniform!(@in_proj_weight)
+        else
+          Init.xavier_uniform!(@q_proj_weight)
+          Init.xavier_uniform!(@k_proj_weight)
+          Init.xavier_uniform!(@v_proj_weight)
+        end
+        if @in_proj_bias
+          Init.constant!(@in_proj_bias, 0.0)
+          Init.constant!(@out_proj.bias, 0.0)
+        end
+        Init.xavier_uniform!(@bias_k) if @bias_k
+        Init.xavier_uniform!(@bias_v) if @bias_v
+      end
+      def forward(
+        query, key, value,
+        key_padding_mask: nil, need_weights: true, attn_mask: nil
+      )
+        if batch_first?
+          query, key, value = [query, key, value].map { |t| t.transpose(1, 0) }
+        end
+        attn_output, attn_output_weights =
+          if @qkv_same_embed_dim
+            F.multi_head_attention_forward(
+              query, key, value,
+              @embed_dim, @num_heads,
+              @in_proj_weight, @in_proj_bias,
+              @bias_k, @bias_v, @add_zero_attn,
+              @dropout, @out_proj.weight, @out_proj.bias,
+              training: @training,
+              key_padding_mask: key_padding_mask,
+              need_weights: need_weights,
+              attn_mask: attn_mask
+            )
+          else
+            F.multi_head_attention_forward(
+              query, key, value,
+              @embed_dim, @num_heads,
+              @in_proj_weight, @in_proj_bias,
+              @bias_k, @bias_v, @add_zero_attn,
+              @dropout, @out_proj.weight, @out_proj.bias,
+              training: @training,
+              key_padding_mask: key_padding_mask,
+              need_weights: need_weights,
+              attn_mask: attn_mask,
+              use_separate_proj_weight: true,
+              q_proj_weight: @q_proj_weight, k_proj_weight: @k_proj_weight, v_proj_weight: @v_proj_weight
+            )
+          end
+        attn_output = attn_output.transpose(1, 0) if batch_first?
+        [attn_output, attn_output_weights]
+      end
+    end
+  end
+end

data/lib/torch/nn/parameter.rb CHANGED Viewed

@@ -9,6 +9,12 @@ module Torch
       def inspect
         "Parameter containing:\n#{super}"
       end
+      def dup
+        Torch.no_grad do
+          Parameter.new(clone, requires_grad: requires_grad)
+        end
+      end
     end
   end
 end

data/lib/torch/nn/transformer.rb ADDED Viewed

@@ -0,0 +1,92 @@
+require_relative 'transformer_encoder_layer'
+require_relative 'transformer_encoder'
+require_relative 'transformer_decoder_layer'
+require_relative 'transformer_decoder'
+module Torch
+  module NN
+    class Transformer < Module
+      def initialize(
+        d_model: 512, nhead: 8,
+        num_encoder_layers: 6, num_decoder_layers: 6,
+        dim_feedforward: 2048, dropout: 0.1, activation: :relu,
+        custom_encoder: nil, custom_decoder: nil,
+        layer_norm_eps: 1e-5, batch_first: false
+      )
+        super()
+        @encoder =
+          if custom_encoder
+            custom_encoder
+          else
+            encoder_layer = TransformerEncoderLayer.new(
+              d_model, nhead,
+              dim_feedforward: dim_feedforward, dropout: dropout, activation: activation,
+              layer_norm_eps: layer_norm_eps, batch_first: batch_first
+            )
+            encoder_norm = LayerNorm.new(d_model, eps: layer_norm_eps)
+            TransformerEncoder.new(encoder_layer, num_encoder_layers, norm: encoder_norm)
+          end
+        @decoder =
+          if custom_decoder
+            custom_decoder
+          else
+            decoder_layer = TransformerDecoderLayer.new(
+              d_model, nhead,
+              dim_feedforward: dim_feedforward, dropout: dropout, activation: activation,
+              layer_norm_eps: layer_norm_eps, batch_first: batch_first
+            )
+            decoder_norm = LayerNorm.new(d_model, eps: layer_norm_eps)
+            TransformerDecoder.new(decoder_layer, num_decoder_layers, norm: decoder_norm)
+          end
+        reset_parameters
+        @d_model = d_model
+        @nhead = nhead
+        @batch_first = batch_first
+      end
+      attr_reader :d_model, :nhead, :encoder, :decoder
+      def batch_first?
+        !!@batch_first
+      end
+      def reset_parameters
+        parameters.each { |p| Init.xavier_uniform!(p) if p.dim > 1 }
+      end
+      def forward(
+        src, tgt,
+        src_mask: nil, tgt_mask: nil, memory_mask: nil,
+        src_key_padding_mask: nil, tgt_key_padding_mask: nil, memory_key_padding_mask: nil
+      )
+        if (!batch_first? && src.size(1) != tgt.size(1)) ||
+          (batch_first? && src.size(0) != tgt.size(0))
+          raise ArgumentError, "The batch number of src and tgt must be equal"
+        end
+        if src.size(2) != d_model || tgt.size(2) != d_model
+          raise ArgumentError, "The feature number of src and tgt must be equal to d_model"
+        end
+        memory = @encoder.(src, mask: src_mask, src_key_padding_mask: src_key_padding_mask)
+        @decoder.(
+          tgt, memory,
+          tgt_mask: tgt_mask, memory_mask: memory_mask,
+          tgt_key_padding_mask: tgt_key_padding_mask, memory_key_padding_mask: memory_key_padding_mask
+        )
+      end
+      def generate_square_subsequent_mask(sz)
+        mask = Torch.triu(Torch.ones([sz, sz])).eq(1).transpose(0, 1)
+        mask.float.masked_fill!(mask.eq(0), -Float::INFINITY).masked_fill!(mask.eq(1), 0.0)
+      end
+    end
+  end
+end

data/lib/torch/nn/transformer_decoder.rb ADDED Viewed

@@ -0,0 +1,25 @@
+module Torch
+  module NN
+    class TransformerDecoder < Module
+      def initialize(decoder_layer, num_layers, norm: nil)
+        super()
+        @layers = _clones(decoder_layer, num_layers)
+        @num_layers = num_layers
+        @norm = norm
+      end
+      def forward(tgt, memory, tgt_mask: nil, memory_mask: nil, tgt_key_padding_mask: nil, memory_key_padding_mask: nil)
+        output = tgt
+        @layers.each do |mod|
+          output = mod.call(output, memory, tgt_mask: tgt_mask, memory_mask: memory_mask, tgt_key_padding_mask: tgt_key_padding_mask, memory_key_padding_mask: memory_key_padding_mask)
+        end
+        output = @norm.call(output) if @norm
+        output
+      end
+    end
+  end
+end

data/lib/torch/nn/transformer_decoder_layer.rb ADDED Viewed

@@ -0,0 +1,43 @@
+module Torch
+  module NN
+    class TransformerDecoderLayer < Module
+      def initialize(
+        d_model, n_head,
+        dim_feedforward: 2048, dropout: 0.1, activation: :relu,
+        layer_norm_eps: 1e-5, batch_first: false
+      )
+        super()
+        @self_attn = MultiheadAttention.new(d_model, n_head, dropout: dropout, batch_first: batch_first)
+        @multihead_attn = MultiheadAttention.new(d_model, n_head, dropout: dropout, batch_first: batch_first)
+        @linear1 = Linear.new(d_model, dim_feedforward)
+        @dropout = Dropout.new(p: dropout)
+        @linear2 = Linear.new(dim_feedforward, d_model)
+        @norm1 = LayerNorm.new(d_model, eps: layer_norm_eps)
+        @norm2 = LayerNorm.new(d_model, eps: layer_norm_eps)
+        @norm3 = LayerNorm.new(d_model, eps: layer_norm_eps)
+        @dropout1 = Dropout.new(p: dropout)
+        @dropout2 = Dropout.new(p: dropout)
+        @dropout3 = Dropout.new(p: dropout)
+        @activation = _activation_fn(activation)
+      end
+      def forward(tgt, memory, tgt_mask: nil, memory_mask: nil, tgt_key_padding_mask: nil, memory_key_padding_mask: nil)
+        tgt2 = @self_attn.(tgt, tgt, tgt, attn_mask: tgt_mask, key_padding_mask: tgt_key_padding_mask).first
+        tgt += @dropout1.(tgt2)
+        tgt = @norm1.(tgt)
+        tgt2 = @multihead_attn.(tgt, memory, memory, attn_mask: memory_mask, key_padding_mask: memory_key_padding_mask).first
+        tgt += @dropout2.(tgt2)
+        tgt = @norm2.(tgt)
+        tgt2 = @linear2.(@dropout.(@activation.(@linear1.(tgt))))
+        tgt += @dropout3.(tgt2)
+        @norm3.(tgt)
+      end
+    end
+  end
+end

data/lib/torch/nn/transformer_encoder.rb ADDED Viewed

@@ -0,0 +1,25 @@
+module Torch
+  module NN
+    class TransformerEncoder < Module
+      def initialize(encoder_layer, num_layers, norm: nil)
+        super()
+        @layers = _clones(encoder_layer, num_layers)
+        @num_layers = num_layers
+        @norm = norm
+      end
+      def forward(src, mask: nil, src_key_padding_mask: nil)
+        output = src
+        @layers.each do |mod|
+          output = mod.call(output, src_mask: mask, src_key_padding_mask: src_key_padding_mask)
+        end
+        output = @norm.call(output) if @norm
+        output
+      end
+    end
+  end
+end

data/lib/torch/nn/transformer_encoder_layer.rb ADDED Viewed

@@ -0,0 +1,36 @@
+module Torch
+  module NN
+    class TransformerEncoderLayer < Module
+      def initialize(
+        d_model, n_head,
+        dim_feedforward: 2048, dropout: 0.1, activation: :relu,
+        layer_norm_eps: 1e-5, batch_first: false
+      )
+        super()
+        @self_attn = MultiheadAttention.new(d_model, n_head, dropout: dropout, batch_first: batch_first)
+        @linear1 = Linear.new(d_model, dim_feedforward)
+        @dropout = Dropout.new(p: dropout)
+        @linear2 = Linear.new(dim_feedforward, d_model)
+        @norm1 = LayerNorm.new(d_model, eps: layer_norm_eps)
+        @norm2 = LayerNorm.new(d_model, eps: layer_norm_eps)
+        @dropout1 = Dropout.new(p: dropout)
+        @dropout2 = Dropout.new(p: dropout)
+        @activation = _activation_fn(activation)
+      end
+      def forward(src, src_mask: nil, src_key_padding_mask: nil)
+        src2 = @self_attn.(src, src, src, attn_mask: src_mask, key_padding_mask: src_key_padding_mask).first
+        src += @dropout1.(src2)
+        src = @norm1.(src)
+        src2 = @linear2.(@dropout.(@activation.(@linear1.(src))))
+        src += @dropout2.(src2)
+        @norm2.(src)
+      end
+    end
+  end
+end

data/lib/torch/nn/utils.rb CHANGED Viewed

@@ -20,6 +20,18 @@ module Torch
       def _ntuple(n, value)
         value.is_a?(Array) ? value : [value] * n
       end
+      def _clones(mod, n)
+        ModuleList.new(n.times.map { mod.deep_dup })
+      end
+      def _activation_fn(activation)
+        case activation.to_sym
+        when :relu then F.method(:relu)
+        when :gelu then F.method(:gelu)
+        else raise ArgumentError, "Activation should be relu/gelu, not `#{activation}`"
+        end
+      end
     end
   end
 end

data/lib/torch/tensor.rb CHANGED Viewed

@@ -19,6 +19,8 @@ module Torch
     alias_method :&, :logical_and
     alias_method :|, :logical_or
     alias_method :^, :logical_xor
+    alias_method :<<, :__lshift__
+    alias_method :>>, :__rshift__
     def self.new(*args)
       FloatTensor.new(*args)
@@ -183,5 +185,23 @@ module Torch
     def stft(*args)
       Torch.stft(*args)
     end
+    def dup
+      Torch.no_grad do
+        clone
+      end
+    end
+    # not a method in native_functions.yaml
+    # attribute in Python rather than method
+    def imag
+      Torch.imag(self)
+    end
+    # not a method in native_functions.yaml
+    # attribute in Python rather than method
+    def real
+      Torch.real(self)
+    end
   end
 end

data/lib/torch/utils/data/data_loader.rb CHANGED Viewed

@@ -25,6 +25,8 @@ module Torch
         end
         def each
+          return to_enum(:each) unless block_given?
           # try to keep the random number generator in sync with Python
           # this makes it easy to compare results
           base_seed = Torch.empty([], dtype: :int64).random!.item

data/lib/torch/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Torch
-  VERSION = "0.8.0"
+  VERSION = "0.9.0"
 end

data/lib/torch.rb CHANGED Viewed

@@ -39,6 +39,7 @@ require "torch/nn/utils"
 # nn containers
 require "torch/nn/module"
+require "torch/nn/module_list"
 require "torch/nn/sequential"
 # nn convolution layers
@@ -143,6 +144,10 @@ require "torch/nn/softmin"
 require "torch/nn/embedding"
 require "torch/nn/embedding_bag"
+# attention is all you need
+require "torch/nn/multihead_attention"
+require "torch/nn/transformer"
 # nn distance functions
 require "torch/nn/cosine_similarity"
 require "torch/nn/pairwise_distance"
@@ -174,6 +179,7 @@ require "torch/nn/upsample"
 # nn other
 require "torch/nn/functional"
+require "torch/nn/functional_attention"
 require "torch/nn/init"
 # utils

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: torch-rb
 version: !ruby/object:Gem::Version
-  version: 0.8.0
+  version: 0.9.0
 platform: ruby
 authors:
 - Andrew Kane
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-06-15 00:00:00.000000000 Z
+date: 2021-10-23 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rice
@@ -37,16 +37,23 @@ files:
 - codegen/function.rb
 - codegen/generate_functions.rb
 - codegen/native_functions.yaml
+- ext/torch/backends.cpp
 - ext/torch/cuda.cpp
 - ext/torch/device.cpp
 - ext/torch/ext.cpp
 - ext/torch/extconf.rb
+- ext/torch/fft.cpp
+- ext/torch/fft_functions.h
 - ext/torch/ivalue.cpp
+- ext/torch/linalg.cpp
+- ext/torch/linalg_functions.h
 - ext/torch/nn.cpp
 - ext/torch/nn_functions.h
 - ext/torch/random.cpp
 - ext/torch/ruby_arg_parser.cpp
 - ext/torch/ruby_arg_parser.h
+- ext/torch/special.cpp
+- ext/torch/special_functions.h
 - ext/torch/templates.h
 - ext/torch/tensor.cpp
 - ext/torch/tensor_functions.h
@@ -99,6 +106,7 @@ files:
 - lib/torch/nn/feature_alpha_dropout.rb
 - lib/torch/nn/fold.rb
 - lib/torch/nn/functional.rb
+- lib/torch/nn/functional_attention.rb
 - lib/torch/nn/group_norm.rb
 - lib/torch/nn/gru.rb
 - lib/torch/nn/hardshrink.rb
@@ -132,10 +140,12 @@ files:
 - lib/torch/nn/max_unpool3d.rb
 - lib/torch/nn/max_unpoolnd.rb
 - lib/torch/nn/module.rb
+- lib/torch/nn/module_list.rb
 - lib/torch/nn/mse_loss.rb
 - lib/torch/nn/multi_label_margin_loss.rb
 - lib/torch/nn/multi_label_soft_margin_loss.rb
 - lib/torch/nn/multi_margin_loss.rb
+- lib/torch/nn/multihead_attention.rb
 - lib/torch/nn/nll_loss.rb
 - lib/torch/nn/pairwise_distance.rb
 - lib/torch/nn/parameter.rb
@@ -163,6 +173,11 @@ files:
 - lib/torch/nn/softsign.rb
 - lib/torch/nn/tanh.rb
 - lib/torch/nn/tanhshrink.rb
+- lib/torch/nn/transformer.rb
+- lib/torch/nn/transformer_decoder.rb
+- lib/torch/nn/transformer_decoder_layer.rb
+- lib/torch/nn/transformer_encoder.rb
+- lib/torch/nn/transformer_encoder_layer.rb
 - lib/torch/nn/triplet_margin_loss.rb
 - lib/torch/nn/unfold.rb
 - lib/torch/nn/upsample.rb
@@ -212,7 +227,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.2.3
+rubygems_version: 3.2.22
 signing_key:
 specification_version: 4
 summary: Deep learning for Ruby, powered by LibTorch