RubyGems - ruby-dnn - Versions diffs - 0.9.4 → 0.10.0 - Mend

ruby-dnn 0.9.4 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +4 -4
data/README.md +39 -3
data/Rakefile +6 -0
data/examples/cifar100_example.rb +71 -0
data/examples/cifar10_example.rb +2 -1
data/examples/iris_example.rb +2 -1
data/examples/mnist_conv2d_example.rb +2 -1
data/examples/mnist_example.rb +2 -3
data/examples/mnist_lstm_example.rb +2 -1
data/ext/cifar_loader/cifar_loader.c +77 -0
data/ext/cifar_loader/extconf.rb +3 -0
data/lib/dnn.rb +1 -0
data/lib/dnn/{lib/cifar10.rb → cifar10.rb} +9 -11
data/lib/dnn/cifar100.rb +49 -0
data/lib/dnn/core/activations.rb +28 -24
data/lib/dnn/core/cnn_layers.rb +216 -94
data/lib/dnn/core/dataset.rb +21 -5
data/lib/dnn/core/initializers.rb +3 -3
data/lib/dnn/core/layers.rb +81 -150
data/lib/dnn/core/losses.rb +88 -49
data/lib/dnn/core/model.rb +97 -74
data/lib/dnn/core/normalizations.rb +72 -0
data/lib/dnn/core/optimizers.rb +171 -78
data/lib/dnn/core/regularizers.rb +92 -22
data/lib/dnn/core/rnn_layers.rb +146 -121
data/lib/dnn/core/utils.rb +4 -3
data/lib/dnn/{lib/downloader.rb → downloader.rb} +5 -1
data/lib/dnn/{lib/image.rb → image.rb} +1 -1
data/lib/dnn/{lib/iris.rb → iris.rb} +1 -1
data/lib/dnn/{lib/mnist.rb → mnist.rb} +4 -3
data/lib/dnn/version.rb +1 -1
data/ruby-dnn.gemspec +1 -1
metadata +13 -12
data/API-Reference.ja.md +0 -978
data/LIB-API-Reference.ja.md +0 -97
data/ext/cifar10_loader/cifar10_loader.c +0 -44
data/ext/cifar10_loader/extconf.rb +0 -3

data/lib/dnn/core/cnn_layers.rb CHANGED

@@ -1,7 +1,7 @@
 module DNN
   module Layers
     # This module is used for convolution.
-    module Conv2DModule
+    module Conv2D_Utils
       private
       # img[bsize, out_h, out_w, ch] to col[bsize * out_h * out_w, fil_h * fil_w * ch]
@@ -34,7 +34,7 @@ module DNN
         img
       end
-      def padding(img, pad)
+      def zero_padding(img, pad)
         bsize, img_h, img_w, ch = img.shape
         img2 = Xumo::SFloat.zeros(bsize, img_h + pad[0], img_w + pad[1], ch)
         i_begin = pad[0] / 2
@@ -45,7 +45,7 @@ module DNN
         img2
       end
-      def back_padding(img, pad)
+      def zero_padding_bwd(img, pad)
         i_begin = pad[0] / 2
         i_end = img.shape[1] - (pad[0] / 2.0).round
         j_begin = pad[1] / 2
@@ -53,22 +53,34 @@ module DNN
         img[true, i_begin...i_end, j_begin...j_end, true]
       end
-      def out_size(prev_h, prev_w, fil_h, fil_w, strides)
-        out_h = (prev_h - fil_h) / strides[0] + 1
-        out_w = (prev_w - fil_w) / strides[1] + 1
+      def calc_conv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
+        out_h = (prev_h + pad_h - fil_h) / strides[0] + 1
+        out_w = (prev_w + pad_w - fil_w) / strides[1] + 1
         [out_h, out_w]
       end
-      def padding_size(prev_h, prev_w, out_h, out_w, strides)
+      def calc_deconv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
+        out_h = (prev_h - 1) * strides[0] + fil_h - pad_h
+        out_w = (prev_w - 1) * strides[1] + fil_w - pad_w
+        [out_h, out_w]
+      end
+      def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
         pad_h = (prev_h.to_f / strides[0]).ceil - out_h
         pad_w = (prev_w.to_f / strides[1]).ceil - out_w
         [pad_h, pad_w]
       end
+      def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
+        pad_h = ((prev_h.to_f / strides[0]).ceil - out_h) * strides[0]
+        pad_w = ((prev_w.to_f / strides[1]).ceil - out_w) * strides[1]
+        [pad_h, pad_w]
+      end
     end
     class Conv2D < Connection
-      include Conv2DModule
+      include Conv2D_Utils
       # @return [Integer] number of filters.
       attr_reader :num_filters
@@ -76,75 +88,81 @@ module DNN
       attr_reader :filter_size
       # @return [Array] Return stride length. stride length is of the form [height, width].
       attr_reader :strides
+      # @return [Array | Bool] Return padding size or whether to padding.
+      attr_reader :padding
-      def self.load_hash(hash)
+      def self.from_hash(hash)
         Conv2D.new(hash[:num_filters], hash[:filter_size],
-                   weight_initializer: Utils.load_hash(hash[:weight_initializer]),
-                   bias_initializer: Utils.load_hash(hash[:bias_initializer]),
+                   weight_initializer: Utils.from_hash(hash[:weight_initializer]),
+                   bias_initializer: Utils.from_hash(hash[:bias_initializer]),
+                   weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
+                   bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
+                   use_bias: hash[:use_bias],
                    strides: hash[:strides],
-                   padding: hash[:padding],
-                   l1_lambda: hash[:l1_lambda],
-                   l2_lambda: hash[:l2_lambda],
-                   use_bias: hash[:use_bias])
+                   padding: hash[:padding])
       end
-      # @param [Integer] num_filters number of filters.
-      # @param [Array or Integer] filter_size filter size. filter size is of the form [height, width].
-      # @param [Array or Integer] strides stride length. stride length is of the form [height, width].
-      # @param [Bool] padding Whether to padding.
+      # @param [Integer] num_filters Number of filters.
+      # @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
+      # @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
+      # @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
       def initialize(num_filters, filter_size,
                      weight_initializer: Initializers::RandomNormal.new,
                      bias_initializer: Initializers::Zeros.new,
+                     weight_regularizer: nil,
+                     bias_regularizer: nil,
+                     use_bias: true,
                      strides: 1,
-                     padding: false,
-                     l1_lambda: 0,
-                     l2_lambda: 0,
-                     use_bias: true)
+                     padding: false)
         super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
-              l1_lambda: l1_lambda, l2_lambda: l2_lambda, use_bias: use_bias)
+              weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
         @num_filters = num_filters
         @filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
         @strides = strides.is_a?(Integer) ? [strides, strides] : strides
-        @padding = padding
+        @padding = padding.is_a?(Integer) ? [padding, padding] : padding
       end
       def build(input_shape)
         super
-        prev_h, prev_w = input_shape[0..1]
-        @out_size = out_size(prev_h, prev_w, *@filter_size, @strides)
-        if @padding
-          @pad_size = padding_size(prev_h, prev_w, *@out_size, @strides)
-          @out_size = [@out_size[0] + @pad_size[0], @out_size[1] + @pad_size[1]]
+        prev_h, prev_w, num_prev_filter = *input_shape
+        @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
+        @bias.data = Xumo::SFloat.new(@num_filters) if @bias
+        init_weight_and_bias
+        if @padding == true
+          out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
+          @pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
+        elsif @padding.is_a?(Array)
+          @pad_size = @padding
+        else
+          @pad_size = [0, 0]
         end
+        @out_size = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
       end
       def forward(x)
-        x = padding(x, @pad_size) if @padding
+        x = zero_padding(x, @pad_size) if @padding
         @x_shape = x.shape
         @col = im2col(x, *@out_size, *@filter_size, @strides)
-        out = @col.dot(@weight.data)
-        out += @bias.data if @bias
-        out.reshape(x.shape[0], *@out_size, out.shape[3])
+        y = @col.dot(@weight.data)
+        y += @bias.data if @bias
+        y.reshape(x.shape[0], *@out_size, y.shape[3])
       end
-      def backward(dout)
-        dout = dout.reshape(dout.shape[0..2].reduce(:*), dout.shape[3])
-        @weight.grad = @col.transpose.dot(dout)
-        @bias.grad = dout.sum(0) if @bias
-        dcol = dout.dot(@weight.data.transpose)
+      def backward(dy)
+        dy = dy.reshape(dy.shape[0..2].reduce(:*), dy.shape[3])
+        if @trainable
+          @weight.grad += @col.transpose.dot(dy)
+          @bias.grad += dy.sum(0) if @bias
+        end
+        dcol = dy.dot(@weight.data.transpose)
         dx = col2im(dcol, @x_shape, *@out_size, *@filter_size, @strides)
-        @padding ? back_padding(dx, @pad_size) : dx
+        @padding ? zero_padding_bwd(dx, @pad_size) : dx
       end
       def output_shape
         [*@out_size, @num_filters]
       end
-      # @return [Bool] whether to padding.
-      def padding?
-        @padding
-      end
       # @return [Numo::SFloat] Convert weight to filter and return.
       def filters
         num_prev_filter = @input_shape[2]
@@ -163,35 +181,140 @@ module DNN
                strides: @strides,
                padding: @padding})
       end
-      private
-      def init_params
+    end
+    class Conv2D_Transpose < Connection
+      include Conv2D_Utils
+      # @return [Integer] number of filters.
+      attr_reader :num_filters
+      # @return [Array] Return filter size. filter size is of the form [height, width].
+      attr_reader :filter_size
+      # @return [Array] Return stride length. stride length is of the form [height, width].
+      attr_reader :strides
+      # @return [Array] Return padding size.
+      attr_reader :padding
+      def self.from_hash(hash)
+        Conv2D_Transpose.new(hash[:num_filters], hash[:filter_size],
+                   weight_initializer: Utils.from_hash(hash[:weight_initializer]),
+                   bias_initializer: Utils.from_hash(hash[:bias_initializer]),
+                   weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
+                   bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
+                   use_bias: hash[:use_bias],
+                   strides: hash[:strides],
+                   padding: hash[:padding])
+      end
+      # @param [Integer] num_filters Number of filters.
+      # @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
+      # @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
+      # @param [Array] padding Padding size. Padding size is of the form [height, width].
+      def initialize(num_filters, filter_size,
+                     weight_initializer: Initializers::RandomNormal.new,
+                     bias_initializer: Initializers::Zeros.new,
+                     weight_regularizer: nil,
+                     bias_regularizer: nil,
+                     use_bias: true,
+                     strides: 1,
+                     padding: false)
+        super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
+              weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
+        @num_filters = num_filters
+        @filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
+        @strides = strides.is_a?(Integer) ? [strides, strides] : strides
+        @padding = padding.is_a?(Integer) ? [padding, padding] : padding
+      end
+      def build(input_shape)
+        super
+        prev_h, prev_w, num_prev_filter = *input_shape
+        @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
+        @weight_initializer.init_param(self, @weight)
+        @weight_regularizer.param = @weight if @weight_regularizer
+        if @bias
+          @bias.data = Xumo::SFloat.new(@num_filters)
+          @bias_initializer.init_param(self, @bias)
+          @bias_regularizer.param = @bias if @bias_regularizer
+        end
+        if @padding == true
+          out_h, out_w = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
+          @pad_size = calc_padding_size(out_h, out_w, prev_h, prev_w, @strides)
+        elsif @padding.is_a?(Array)
+          @pad_size = @padding
+        else
+          @pad_size = [0, 0]
+        end
+        @out_size = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
+      end
+      def forward(x)
+        bsize = x.shape[0]
+        x = x.reshape(x.shape[0..2].reduce(:*), x.shape[3])
+        @x = x
+        col = x.dot(@weight.data.transpose)
+        img_shape = [bsize, @out_size[0] + @pad_size[0], @out_size[1] + @pad_size[1], @num_filters]
+        y = col2im(col, img_shape, *input_shape[0..1], *@filter_size, @strides)
+        y += @bias.data if @bias
+        @padding ? zero_padding_bwd(y, @pad_size) : y
+      end
+      def backward(dy)
+        dy = zero_padding(dy, @pad_size) if @padding
+        col = im2col(dy, *input_shape[0..1], *@filter_size, @strides)
+        if @trainable
+          @weight.grad += col.transpose.dot(@x)
+          @bias.grad += col.reshape(col.shape[0] * @filter_size.reduce(:*), @num_filters).sum(0) if @bias
+        end
+        dx = col.dot(@weight.data)
+        dx.reshape(dy.shape[0], *input_shape)
+      end
+      def output_shape
+        [*@out_size, @num_filters]
+      end
+      # @return [Numo::SFloat] Convert weight to filter and return.
+      def filters
         num_prev_filter = @input_shape[2]
-        @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
-        @bias.data = Xumo::SFloat.new(@num_filters) if @bias
-        super()
+        @weight.data.reshape(*@filter_size, @num_filters, num_prev_filter)
+      end
+      # @param [Numo::SFloat] filters Convert weight to filters and set.
+      def filters=(filters)
+        num_prev_filter = @input_shape[2]
+        @weight.data = filters.reshape(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
+      end
+      def to_hash
+        super({num_filters: @num_filters,
+               filter_size: @filter_size,
+               strides: @strides,
+               padding: @padding})
       end
     end
     # Super class of all pooling2D class.
     class Pool2D < Layer
-      include Conv2DModule
+      include Conv2D_Utils
-      # @return [Array] Return pooling size. pooling size is of the form [height, width].
+      # @return [Array] Return pooling size. Pooling size is of the form [height, width].
       attr_reader :pool_size
-      # @return [Array] Return stride length. stride length is of the form [height, width].
+      # @return [Array] Return stride length. Stride length is of the form [height, width].
       attr_reader :strides
+      # @return [Array | Bool] Return padding size or whether to padding.
+      attr_reader :padding
-      def self.load_hash(pool2d_class, hash)
+      def self.from_hash(pool2d_class, hash)
         pool2d_class.new(hash[:pool_size], strides: hash[:strides], padding: hash[:padding])
       end
-      # @param [Array or Integer] pool_size pooling size. pooling size is of the form [height, width].
-      # @param [Array or Integer or NilClass] strides stride length. stride length is of the form [height, width].
+      # @param [Array | Integer] pool_size Pooling size. Pooling size is of the form [height, width].
+      # @param [Array | Integer | NilClass] strides stride length. Stride length is of the form [height, width].
       #   If you set nil, treat pool_size as strides.
-      # @param [Bool] padding Whether to padding.
+      # @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
       def initialize(pool_size, strides: nil, padding: false)
         super()
         @pool_size = pool_size.is_a?(Integer) ? [pool_size, pool_size] : pool_size
@@ -200,29 +323,28 @@ module DNN
         else
           @pool_size.clone
         end
-        @padding = padding
+        @padding = padding.is_a?(Integer) ? [padding, padding] : padding
       end
       def build(input_shape)
         super
         prev_h, prev_w = input_shape[0..1]
         @num_channel = input_shape[2]
-        @out_size = out_size(prev_h, prev_w, *@pool_size, @strides)
-        if @padding
-          @pad_size = padding_size(prev_h, prev_w, *@out_size, @strides)
-          @out_size = [@out_size[0] + @pad_size[0], @out_size[1] + @pad_size[1]]
+        if @padding == true
+          out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, 0, 0, @strides)
+          @pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
+        elsif @padding.is_a?(Array)
+          @pad_size = @padding
+        else
+          @pad_size = [0, 0]
         end
+        @out_size = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, *@pad_size, @strides)
       end
       def output_shape
         [*@out_size, @num_channel]
       end
-      # @return [Bool] whether to padding.
-      def padding?
-        @padding
-      end
       def to_hash
         super({pool_size: @pool_size,
                strides: @strides,
@@ -232,12 +354,12 @@ module DNN
     class MaxPool2D < Pool2D
-      def self.load_hash(hash)
-        Pool2D.load_hash(self, hash)
+      def self.from_hash(hash)
+        Pool2D.from_hash(self, hash)
       end
       def forward(x)
-        x = padding(x, @pad_size) if @padding
+        x = zero_padding(x, @pad_size) if @padding
         @x_shape = x.shape
         col = im2col(x, *@out_size, *@pool_size, @strides)
         col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
@@ -246,23 +368,23 @@ module DNN
         col.max(1).reshape(x.shape[0], *@out_size, x.shape[3])
       end
-      def backward(dout)
-        dmax = Xumo::SFloat.zeros(dout.size * @pool_size.reduce(:*))
-        dmax[@max_index] = dout.flatten
-        dcol = dmax.reshape(dout.shape[0..2].reduce(:*), @pool_size.reduce(:*) * dout.shape[3])
+      def backward(dy)
+        dmax = Xumo::SFloat.zeros(dy.size * @pool_size.reduce(:*))
+        dmax[@max_index] = dy.flatten
+        dcol = dmax.reshape(dy.shape[0..2].reduce(:*), @pool_size.reduce(:*) * dy.shape[3])
         dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
-        @padding ? back_padding(dx, @pad_size) : dx
+        @padding ? zero_padding_bwd(dx, @pad_size) : dx
       end
     end
     class AvgPool2D < Pool2D
-      def self.load_hash(hash)
-        Pool2D.load_hash(self, hash)
+      def self.from_hash(hash)
+        Pool2D.from_hash(self, hash)
       end
       def forward(x)
-        x = padding(x, @pad_size) if @padding
+        x = zero_padding(x, @pad_size) if @padding
         @x_shape = x.shape
         col = im2col(x, *@out_size, *@pool_size, @strides)
         col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
@@ -270,21 +392,23 @@ module DNN
         col.mean(1).reshape(x.shape[0], *@out_size, x.shape[3])
       end
-      def backward(dout)
+      def backward(dy)
         row_length = @pool_size.reduce(:*)
-        dout /= row_length
-        davg = Xumo::SFloat.zeros(dout.size, row_length)
+        dy /= row_length
+        davg = Xumo::SFloat.zeros(dy.size, row_length)
         row_length.times do |i|
-          davg[true, i] = dout.flatten
+          davg[true, i] = dy.flatten
         end
-        dcol = davg.reshape(dout.shape[0..2].reduce(:*), dout.shape[3] * @pool_size.reduce(:*))
+        dcol = davg.reshape(dy.shape[0..2].reduce(:*), dy.shape[3] * @pool_size.reduce(:*))
         dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
-        @padding ? back_padding(dx, @pad_size) : dx
+        @padding ? zero_padding_bwd(dx, @pad_size) : dx
       end
     end
     class UnPool2D < Layer
+      include Conv2D_Utils
       # @return [Array] Return unpooling size. unpooling size is of the form [height, width].
       attr_reader :unpool_size
@@ -294,7 +418,7 @@ module DNN
         @unpool_size = unpool_size.is_a?(Integer) ? [unpool_size, unpool_size] : unpool_size
       end
-      def self.load_hash(hash)
+      def self.from_hash(hash)
         UnPool2D.new(hash[:unpool_size])
       end
@@ -308,8 +432,6 @@ module DNN
         @num_channel = input_shape[2]
       end
-      include Conv2DModule
       def forward(x)
         @x_shape = x.shape
         unpool_h, unpool_w = @unpool_size
@@ -322,12 +444,12 @@ module DNN
         x2.reshape(x.shape[0], *@out_size, x.shape[3])
       end
-      def backward(dout)
+      def backward(dy)
         in_size = input_shape[0..1]
-        col = im2col(dout, *input_shape[0..1], *@unpool_size, @unpool_size)
-        col = col.reshape(dout.shape[0] * in_size.reduce(:*), @unpool_size.reduce(:*), dout.shape[3]).transpose(0, 2, 1)
-                 .reshape(dout.shape[0] * in_size.reduce(:*) * dout.shape[3], @unpool_size.reduce(:*))
-        col.sum(1).reshape(dout.shape[0], *in_size, dout.shape[3])
+        col = im2col(dy, *input_shape[0..1], *@unpool_size, @unpool_size)
+        col = col.reshape(dy.shape[0] * in_size.reduce(:*), @unpool_size.reduce(:*), dy.shape[3]).transpose(0, 2, 1)
+                 .reshape(dy.shape[0] * in_size.reduce(:*) * dy.shape[3], @unpool_size.reduce(:*))
+        col.sum(1).reshape(dy.shape[0], *in_size, dy.shape[3])
       end
       def output_shape