RubyGems - ruby-dnn - Versions diffs - 0.9.4 → 0.10.0 - Mend

ruby-dnn 0.9.4 → 0.10.0

Files changed (37) hide show

checksums.yaml +4 -4
data/README.md +39 -3
data/Rakefile +6 -0
data/examples/cifar100_example.rb +71 -0
data/examples/cifar10_example.rb +2 -1
data/examples/iris_example.rb +2 -1
data/examples/mnist_conv2d_example.rb +2 -1
data/examples/mnist_example.rb +2 -3
data/examples/mnist_lstm_example.rb +2 -1
data/ext/cifar_loader/cifar_loader.c +77 -0
data/ext/cifar_loader/extconf.rb +3 -0
data/lib/dnn.rb +1 -0
data/lib/dnn/{lib/cifar10.rb → cifar10.rb} +9 -11
data/lib/dnn/cifar100.rb +49 -0
data/lib/dnn/core/activations.rb +28 -24
data/lib/dnn/core/cnn_layers.rb +216 -94
data/lib/dnn/core/dataset.rb +21 -5
data/lib/dnn/core/initializers.rb +3 -3
data/lib/dnn/core/layers.rb +81 -150
data/lib/dnn/core/losses.rb +88 -49
data/lib/dnn/core/model.rb +97 -74
data/lib/dnn/core/normalizations.rb +72 -0
data/lib/dnn/core/optimizers.rb +171 -78
data/lib/dnn/core/regularizers.rb +92 -22
data/lib/dnn/core/rnn_layers.rb +146 -121
data/lib/dnn/core/utils.rb +4 -3
data/lib/dnn/{lib/downloader.rb → downloader.rb} +5 -1
data/lib/dnn/{lib/image.rb → image.rb} +1 -1
data/lib/dnn/{lib/iris.rb → iris.rb} +1 -1
data/lib/dnn/{lib/mnist.rb → mnist.rb} +4 -3
data/lib/dnn/version.rb +1 -1
data/ruby-dnn.gemspec +1 -1
metadata +13 -12
data/API-Reference.ja.md +0 -978
data/LIB-API-Reference.ja.md +0 -97
data/ext/cifar10_loader/cifar10_loader.c +0 -44
data/ext/cifar10_loader/extconf.rb +0 -3

@@ -1,18 +1,34 @@
+# This class manages input datas and output datas together.
 class DNN::Dataset
-  def initialize(x_datas, y_datas)
+  # @param [Numo::SFloat] x_datas input datas.
+  # @param [Numo::SFloat] y_datas output datas.
+  # @param [Bool] random Set true to return batches randomly. Setting false returns batches in order of index.
+  def initialize(x_datas, y_datas, random = true)
     @x_datas = x_datas
     @y_datas = y_datas
+    @random = random
     @num_datas = x_datas.shape[0]
-    @indexes = @num_datas.times.to_a.shuffle
+    reset_indexs
   end
-  def get_batch(batch_size)
+  # Return the next batch.
+  # If the number of remaining data < batch size, if random = true, shuffle the data again and return a batch.
+  # If random = false, all remaining data will be returned regardless of the batch size.
+  def next_batch(batch_size)
     if @indexes.length < batch_size
-      @indexes = @num_datas.times.to_a.shuffle
+      batch_indexes = @indexes unless @random
+      reset_indexs
+      batch_indexes = @indexes.shift(batch_size) if @random
+    else
+      batch_indexes = @indexes.shift(batch_size)
     end
-    batch_indexes = @indexes.shift(batch_size)
     x_batch = @x_datas[batch_indexes, false]
     y_batch = @y_datas[batch_indexes, false]
     [x_batch, y_batch]
   end
+  private def reset_indexs
+    @indexes = @num_datas.times.to_a
+    @indexes.shuffle! if @random
+  end
 end

data/lib/dnn/core/initializers.rb CHANGED

@@ -28,7 +28,7 @@ module DNN
     class Const < Initializer
       attr_reader :const
-      def self.load_hash(hash)
+      def self.from_hash(hash)
         self.new(hash[:const])
       end
@@ -51,7 +51,7 @@ module DNN
       attr_reader :mean
       attr_reader :std
-      def self.load_hash(hash)
+      def self.from_hash(hash)
         self.new(hash[:mean], hash[:std], hash[:seed])
       end
@@ -76,7 +76,7 @@ module DNN
       attr_reader :min
       attr_reader :max
-      def self.load_hash(hash)
+      def self.from_hash(hash)
         self.new(hash[:min], hash[:max], hash[:seed])
       end

data/lib/dnn/core/layers.rb CHANGED

@@ -3,6 +3,9 @@ module DNN
     # Super class of all optimizer classes.
     class Layer
+      # @return [Bool] learning_phase Return the true if learning.
+      attr_accessor :learning_phase
+      # @return [Array] Return the shape of the input data.
       attr_reader :input_shape
       def initialize
@@ -10,12 +13,15 @@ module DNN
       end
       # Build the layer.
+      # @param [Array] input_shape Setting the shape of the input data.
       def build(input_shape)
         @input_shape = input_shape
+        @learning_phase = true
         @built = true
       end
       # Does the layer have already been built?
+      # @return [Bool] If layer have already been built then return true.
       def built?
         @built
       end
@@ -26,10 +32,13 @@ module DNN
       end
       # Backward propagation.
-      def backward(dout)
-        raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'update'")
+      def backward(dy)
+        raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'backward'")
       end
+      # Please reimplement this method as needed.
+      # The default implementation return input_shape.
+      # @return [Array] Return the shape of the output data.
       def output_shape
         @input_shape
       end
@@ -55,31 +64,11 @@ module DNN
         @params = {}
         @trainable = true
       end
-      def build(input_shape)
-        @input_shape = input_shape
-        unless @built
-          @built = true
-          init_params
-        end
-      end
-      # Update the parameters.
-      def update(optimizer)
-        optimizer.update(@params) if @trainable
-      end
-      private
-      # Initialize of the parameters.
-      def init_params
-        raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'init_params'")
-      end
     end
     class InputLayer < Layer
-      def self.load_hash(hash)
+      def self.from_hash(hash)
         self.new(hash[:input_shape])
       end
@@ -97,8 +86,8 @@ module DNN
         x
       end
-      def backward(dout)
-        dout
+      def backward(dy)
+        dy
       end
       def to_hash
@@ -109,44 +98,42 @@ module DNN
     # It is a superclass of all connection layers.
     class Connection < HasParamLayer
-      # @return [DNN::Initializers] weight initializer.
+      # @return [DNN::Initializers::Initializer] Weight initializer.
       attr_reader :weight_initializer
-      # @return [DNN::Initializers] bias initializer.
+      # @return [DNN::Initializers::Initializer] Bias initializer.
       attr_reader :bias_initializer
-      # @return [Float] L1 regularization.
-      attr_reader :l1_lambda
-      # @return [Float] L2 regularization.
-      attr_reader :l2_lambda
-      # @param [DNN::Initializers] weight_initializer weight initializer.
-      # @param [DNN::Initializers] bias_initializer bias initializer.
-      # @param [Float] l1_lambda L1 regularization
-      # @param [Float] l2_lambda L2 regularization
+      # @return [DNN::Regularizers::Regularizer] Weight regularization.
+      attr_reader :weight_regularizer
+      # @return [DNN::Regularizers::Regularizer] Bias regularization.
+      attr_reader :bias_regularizer
+      # @param [DNN::Initializers::Initializer] weight_initializer Weight initializer.
+      # @param [DNN::Initializers::Initializer] bias_initializer Bias initializer.
+      # @param [DNN::Regularizers::Regularizer] weight_regularizer Weight regularization.
+      # @param [DNN::Regularizers::Regularizer] bias_regularizer Bias regularization.
       # @param [Bool] use_bias whether to use bias.
       def initialize(weight_initializer: Initializers::RandomNormal.new,
                      bias_initializer: Initializers::Zeros.new,
-                     l1_lambda: 0,
-                     l2_lambda: 0,
+                     weight_regularizer: nil,
+                     bias_regularizer: nil,
                      use_bias: true)
         super()
         @weight_initializer = weight_initializer
         @bias_initializer = bias_initializer
-        @l1_lambda = l1_lambda
-        @l2_lambda = l2_lambda
-        @params[:weight] = @weight = Param.new
-        # For compatibility on or before with v0.9.3, setting use_bias to nil use bias.
-        # Therefore, setting use_bias to nil is deprecated.
-        if use_bias || use_bias == nil
-          @params[:bias] = @bias = Param.new
+        @weight_regularizer = weight_regularizer
+        @bias_regularizer = bias_regularizer
+        @params[:weight] = @weight = Param.new(nil, 0)
+        if use_bias
+          @params[:bias] = @bias = Param.new(nil, 0)
         else
-          @params[:bias] = @bias = nil
+          @bias = nil
         end
       end
       def regularizers
         regularizers = []
-        regularizers << Lasso.new(@l1_lambda, @weight) if @l1_lambda > 0
-        regularizers << Ridge.new(@l2_lambda, @weight) if @l2_lambda > 0
+        regularizers << @weight_regularizer if @weight_regularizer
+        regularizers << @bias_regularizer if @bias_regularizer
         regularizers
       end
@@ -158,15 +145,18 @@ module DNN
       def to_hash(merge_hash)
         super({weight_initializer: @weight_initializer.to_hash,
                bias_initializer: @bias_initializer.to_hash,
-               l1_lambda: @l1_lambda,
-               l2_lambda: @l2_lambda}.merge(merge_hash))
+               weight_regularizer: @weight_regularizer&.to_hash,
+               bias_regularizer: @bias_regularizer&.to_hash,
+               use_bias: use_bias}.merge(merge_hash))
       end
-      private
-      def init_params
+      private def init_weight_and_bias
         @weight_initializer.init_param(self, @weight)
-        @bias_initializer.init_param(self, @bias) if @bias
+        @weight_regularizer.param = @weight if @weight_regularizer
+        if @bias
+          @bias_initializer.init_param(self, @bias)
+          @bias_regularizer.param = @bias if @bias_regularizer
+        end
       end
     end
@@ -176,12 +166,12 @@ module DNN
       # @return [Integer] number of nodes.
       attr_reader :num_nodes
-      def self.load_hash(hash)
+      def self.from_hash(hash)
         self.new(hash[:num_nodes],
-                 weight_initializer: Utils.load_hash(hash[:weight_initializer]),
-                 bias_initializer: Utils.load_hash(hash[:bias_initializer]),
-                 l1_lambda: hash[:l1_lambda],
-                 l2_lambda: hash[:l2_lambda],
+                 weight_initializer: Utils.from_hash(hash[:weight_initializer]),
+                 bias_initializer: Utils.from_hash(hash[:bias_initializer]),
+                 weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
+                 bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
                  use_bias: hash[:use_bias])
       end
@@ -189,25 +179,35 @@ module DNN
       def initialize(num_nodes,
                      weight_initializer: Initializers::RandomNormal.new,
                      bias_initializer: Initializers::Zeros.new,
-                     l1_lambda: 0,
-                     l2_lambda: 0,
+                     weight_regularizer: nil,
+                     bias_regularizer: nil,
                      use_bias: true)
         super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
-              l1_lambda: l1_lambda, l2_lambda: l2_lambda, use_bias: use_bias)
+              weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
         @num_nodes = num_nodes
       end
+      def build(input_shape)
+        super
+        num_prev_nodes = input_shape[0]
+        @weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes)
+        @bias.data = Xumo::SFloat.new(@num_nodes) if @bias
+        init_weight_and_bias
+      end
       def forward(x)
         @x = x
-        out = x.dot(@weight.data)
-        out += @bias.data if @bias
-        out
+        y = x.dot(@weight.data)
+        y += @bias.data if @bias
+        y
       end
-      def backward(dout)
-        @weight.grad = @x.transpose.dot(dout)
-        @bias.grad = dout.sum(0) if @bias
-        dout.dot(@weight.data.transpose)
+      def backward(dy)
+        if @trainable
+          @weight.grad += @x.transpose.dot(dy)
+          @bias.grad += dy.sum(0) if @bias
+        end
+        dy.dot(@weight.data.transpose)
       end
       def output_shape
@@ -217,17 +217,6 @@ module DNN
       def to_hash
         super({num_nodes: @num_nodes})
       end
-      private
-      # TODO
-      # Change writing super() other than the first.
-      def init_params
-        num_prev_nodes = @input_shape[0]
-        @weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes)
-        @bias.data = Xumo::SFloat.new(@num_nodes) if @bias
-        super()
-      end
     end
@@ -236,8 +225,8 @@ module DNN
         x.reshape(x.shape[0], *output_shape)
       end
-      def backward(dout)
-        dout.reshape(dout.shape[0], *@input_shape)
+      def backward(dy)
+        dy.reshape(dy.shape[0], *@input_shape)
       end
       def output_shape
@@ -247,7 +236,7 @@ module DNN
     class Reshape < Layer
-      def self.load_hash(hash)
+      def self.from_hash(hash)
         self.new(hash[:output_shape])
       end
@@ -260,8 +249,8 @@ module DNN
         x.reshape(x.shape[0], *@output_shape)
       end
-      def backward(dout)
-        dout.reshape(dout.shape[0], *@input_shape)
+      def backward(dy)
+        dy.reshape(dy.shape[0], *@input_shape)
       end
       def output_shape
@@ -276,11 +265,11 @@ module DNN
     class Dropout < Layer
       # @return [Float] dropout ratio.
-      attr_reader :dropout_ratio
+      attr_accessor :dropout_ratio
       # @return [Float] Use 'weight scaling inference rule'.
       attr_reader :use_scale
-      def self.load_hash(hash)
+      def self.from_hash(hash)
         self.new(hash[:dropout_ratio], seed: hash[:seed], use_scale: hash[:use_scale])
       end
@@ -292,7 +281,7 @@ module DNN
         @mask = nil
       end
-      def forward(x, learning_phase)
+      def forward(x)
         if learning_phase
           Xumo::SFloat.srand(@seed)
           @mask = Xumo::SFloat.ones(*x.shape).rand < @dropout_ratio
@@ -303,9 +292,9 @@ module DNN
         x
       end
-      def backward(dout)
-        dout[@mask] = 0
-        dout
+      def backward(dy)
+        dy[@mask] = 0
+        dy
       end
       def to_hash
@@ -313,64 +302,6 @@ module DNN
       end
     end
-    class BatchNormalization < HasParamLayer
-      # @return [Float] Exponential moving average of mean and variance.
-      attr_reader :momentum
-      def self.load_hash(hash)
-        self.new(momentum: hash[:momentum])
-      end
-      # @param [Float] momentum Exponential moving average of mean and variance.
-      def initialize(momentum: 0.9)
-        super()
-        @momentum = momentum
-      end
-      def forward(x, learning_phase)
-        if learning_phase
-          mean = x.mean(0)
-          @xc = x - mean
-          var = (@xc**2).mean(0)
-          @std = NMath.sqrt(var + 1e-7)
-          xn = @xc / @std
-          @xn = xn
-          @running_mean.data = @momentum * @running_mean.data + (1 - @momentum) * mean
-          @running_var.data = @momentum * @running_var.data + (1 - @momentum) * var
-        else
-          xc = x - @running_mean.data
-          xn = xc / NMath.sqrt(@running_var.data + 1e-7)
-        end
-        @gamma.data * xn + @beta.data
-      end
-      def backward(dout)
-        batch_size = dout.shape[0]
-        @beta.grad = dout.sum(0)
-        @gamma.grad = (@xn * dout).sum(0)
-        dxn = @gamma.data * dout
-        dxc = dxn / @std
-        dstd = -((dxn * @xc) / (@std**2)).sum(0)
-        dvar = 0.5 * dstd / @std
-        dxc += (2.0 / batch_size) * @xc * dvar
-        dmean = dxc.sum(0)
-        dxc - dmean / batch_size
-      end
-      def to_hash
-        super({momentum: @momentum})
-      end
-      private
-      def init_params
-        @params[:gamma] = @gamma = Param.new(Xumo::SFloat.ones(*output_shape))
-        @params[:beta] = @beta = Param.new(Xumo::SFloat.zeros(*output_shape))
-        @params[:running_mean] = @running_mean = Param.new(Xumo::SFloat.zeros(*output_shape))
-        @params[:running_var] = @running_var = Param.new(Xumo::SFloat.zeros(*output_shape))
-      end
-    end
   end
 end

data/lib/dnn/core/losses.rb CHANGED

@@ -2,8 +2,8 @@ module DNN
   module Losses
     class Loss
-      def forward(out, y, layers)
-        loss_value = loss(out, y)
+      def forward(x, y, layers)
+        loss_value = forward_loss(x, y)
         regularizers = layers.select { |layer| layer.is_a?(Connection) }
                              .map { |layer| layer.regularizers }.flatten
@@ -13,122 +13,161 @@ module DNN
         loss_value
       end
-      def backward(y)
-        raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'backward'")
-      end
-      def regularizes_backward(layers)
+      def backward(y, layers)
         layers.select { |layer| layer.is_a?(Connection) }.each do |layer|
           layer.regularizers.each do |regularizer|
             regularizer.backward
           end
         end
+        backward_loss(y)
       end
-      def to_hash
-        {class: self.class.name}
+      def to_hash(merge_hash = nil)
+        hash = {class: self.class.name}
+        hash.merge!(merge_hash) if merge_hash
+        hash
       end
       private
-      def loss(out, y)
-        raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'loss'")
+      def forward_loss(x, y)
+        raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'forward_loss'")
+      end
+      def backward_loss(y)
+        raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'backward_loss'")
       end
     end
     class MeanSquaredError < Loss
-      def loss(out, y)
-        @out = out
+      private
+      def forward_loss(x, y)
+        @x = x
         batch_size = y.shape[0]
-        0.5 * ((out - y)**2).sum / batch_size
+        0.5 * ((x - y)**2).sum / batch_size
       end
-      def backward(y)
-        @out - y
+      def backward_loss(y)
+        @x - y
       end
     end
     class MeanAbsoluteError < Loss
-      def loss(out, y)
-        @out = out
+      private
+      def forward_loss(x, y)
+        @x = x
         batch_size = y.shape[0]
-        (out - y).abs.sum / batch_size
+        (x - y).abs.sum / batch_size
       end
-      def backward(y)
-        dout = @out - y
-        dout[dout >= 0] = 1
-        dout[dout < 0] = -1
-        dout
+      def backward_loss(y)
+        dy = @x - y
+        dy[dy >= 0] = 1
+        dy[dy < 0] = -1
+        dy
       end
     end
     class HuberLoss < Loss
-      def forward(out, y, layers)
-        @loss_value = super(out, y, layers)
+      def forward(x, y, layers)
+        @loss_value = super(x, y, layers)
       end
-      def loss(out, y)
-        @out = out
+      private
+      def forward_loss(x, y)
+        @x = x
         loss_value = loss_l1(y)
         loss_value > 1 ? loss_value : loss_l2(y)
       end
-      def backward(y)
-        dout = @out - y
+      def backward_loss(y)
+        dy = @x - y
         if @loss_value > 1
-          dout[dout >= 0] = 1
-          dout[dout < 0] = -1
+          dy[dy >= 0] = 1
+          dy[dy < 0] = -1
         end
-        dout
+        dy
       end
-      private
       def loss_l1(y)
         batch_size = y.shape[0]
-        (@out - y).abs.sum / batch_size
+        (@x - y).abs.sum / batch_size
       end
       def loss_l2(y)
         batch_size = y.shape[0]
-        0.5 * ((@out - y)**2).sum / batch_size
+        0.5 * ((@x - y)**2).sum / batch_size
       end
     end
     class SoftmaxCrossEntropy < Loss
+      # @return [Float] Return the eps value.
+      attr_accessor :eps
+      def self.from_hash(hash)
+        SoftmaxCrossEntropy.new(eps: hash[:eps])
+      end
       def self.softmax(x)
         NMath.exp(x) / NMath.exp(x).sum(1).reshape(x.shape[0], 1)
       end
-      def loss(x, y)
-        @out = SoftmaxCrossEntropy.softmax(x)
+      # @param [Float] eps Value to avoid nan.
+      def initialize(eps: 1e-7)
+        @eps = eps
+      end
+      def to_hash
+        super(eps: @eps)
+      end
+      private
+      def forward_loss(x, y)
+        @x = SoftmaxCrossEntropy.softmax(x)
         batch_size = y.shape[0]
-        -(y * NMath.log(@out + 1e-7)).sum / batch_size
+        -(y * NMath.log(@x + @eps)).sum / batch_size
       end
-      def backward(y)
-        @out - y
+      def backward_loss(y)
+        @x - y
       end
     end
     class SigmoidCrossEntropy < Loss
-      def initialize
-        @sigmoid = Sigmoid.new
+      # @return [Float] Return the eps value.
+      attr_accessor :eps
+      def self.from_hash(hash)
+        SigmoidCrossEntropy.new(eps: hash[:eps])
       end
-      def loss(x, y)
-        @out = @sigmoid.forward(x)
+      # @param [Float] eps Value to avoid nan.
+      def initialize(eps: 1e-7)
+        @eps = eps
+      end
+      def to_hash
+        super(eps: @eps)
+      end
+      private
+      def forward_loss(x, y)
+        @x = Sigmoid.new.forward(x)
         batch_size = y.shape[0]
-        -(y * NMath.log(@out + 1e-7) + (1 - y) * NMath.log(1 - @out + 1e-7))
+        -(y * NMath.log(@x) + (1 - y) * NMath.log(1 - @x))
       end
-      def backward(y)
-        @out - y
+      def backward_loss(y)
+        @x - y
       end
     end