RubyGems - ruby-dnn - Versions diffs - 0.9.4 → 0.10.0 - Mend

ruby-dnn 0.9.4 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +4 -4
data/README.md +39 -3
data/Rakefile +6 -0
data/examples/cifar100_example.rb +71 -0
data/examples/cifar10_example.rb +2 -1
data/examples/iris_example.rb +2 -1
data/examples/mnist_conv2d_example.rb +2 -1
data/examples/mnist_example.rb +2 -3
data/examples/mnist_lstm_example.rb +2 -1
data/ext/cifar_loader/cifar_loader.c +77 -0
data/ext/cifar_loader/extconf.rb +3 -0
data/lib/dnn.rb +1 -0
data/lib/dnn/{lib/cifar10.rb → cifar10.rb} +9 -11
data/lib/dnn/cifar100.rb +49 -0
data/lib/dnn/core/activations.rb +28 -24
data/lib/dnn/core/cnn_layers.rb +216 -94
data/lib/dnn/core/dataset.rb +21 -5
data/lib/dnn/core/initializers.rb +3 -3
data/lib/dnn/core/layers.rb +81 -150
data/lib/dnn/core/losses.rb +88 -49
data/lib/dnn/core/model.rb +97 -74
data/lib/dnn/core/normalizations.rb +72 -0
data/lib/dnn/core/optimizers.rb +171 -78
data/lib/dnn/core/regularizers.rb +92 -22
data/lib/dnn/core/rnn_layers.rb +146 -121
data/lib/dnn/core/utils.rb +4 -3
data/lib/dnn/{lib/downloader.rb → downloader.rb} +5 -1
data/lib/dnn/{lib/image.rb → image.rb} +1 -1
data/lib/dnn/{lib/iris.rb → iris.rb} +1 -1
data/lib/dnn/{lib/mnist.rb → mnist.rb} +4 -3
data/lib/dnn/version.rb +1 -1
data/ruby-dnn.gemspec +1 -1
metadata +13 -12
data/API-Reference.ja.md +0 -978
data/LIB-API-Reference.ja.md +0 -97
data/ext/cifar10_loader/cifar10_loader.c +0 -44
data/ext/cifar10_loader/extconf.rb +0 -3

data/lib/dnn/core/regularizers.rb CHANGED

@@ -1,36 +1,106 @@
 module DNN
+  module Regularizers
-  class Lasso
-    def initialize(l1_lambda, param)
-      @l1_lambda = l1_lambda
-      @param = param
-    end
+    class Regularizer
+      attr_accessor :param
-    def forward(x)
-      x + @l1_lambda * @param.data.abs.sum
-    end
+      def forward(x)
+        raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'forward'")
+      end
+      def backward
+        raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'backward'")
+      end
-    def backward
-      dlasso = Xumo::SFloat.ones(*@param.data.shape)
-      dlasso[@param.data < 0] = -1
-      @param.grad += @l1_lambda * dlasso
+      def to_hash(merge_hash)
+        hash = {class: self.class.name}
+        hash.merge!(merge_hash)
+        hash
+      end
     end
-  end
+    class L1 < Regularizer
+      attr_accessor :l1_lambda
+      def self.from_hash(hash)
+        L1.new(hash[:l1_lambda])
+      end
+      def initialize(l1_lambda = 0.01)
+        @l1_lambda = l1_lambda
+      end
-  class Ridge
-    def initialize(l2_lambda, param)
-      @l2_lambda = l2_lambda
-      @param = param
+      def forward(x)
+        x + @l1_lambda * @param.data.abs.sum
+      end
+      def backward
+        dparam = Xumo::SFloat.ones(*@param.data.shape)
+        dparam[@param.data < 0] = -1
+        @param.grad += @l1_lambda * dparam
+      end
+      def to_hash
+        super(l1_lambda: @l1_lambda)
+      end
     end
-    def forward(x)
-      x + 0.5 * @l2_lambda * (@param.data**2).sum
+    class L2 < Regularizer
+      attr_accessor :l2_lambda
+      def self.from_hash(hash)
+        L2.new(hash[:l2_lambda])
+      end
+      def initialize(l2_lambda = 0.01)
+        @l2_lambda = l2_lambda
+      end
+      def forward(x)
+        x + 0.5 * @l2_lambda * (@param.data**2).sum
+      end
+      def backward
+        @param.grad += @l2_lambda * @param.data
+      end
+      def to_hash
+        super(l2_lambda: @l2_lambda)
+      end
     end
-    def backward
-      @param.grad += @l2_lambda * @param.data
+    class L1L2 < Regularizer
+      attr_accessor :l1_lambda
+      attr_accessor :l2_lambda
+      def self.from_hash(hash)
+        L1L2.new(hash[:l1_lambda], hash[:l2_lambda])
+      end
+      def initialize(l1_lambda = 0.01, l2_lambda = 0.01)
+        @l1_lambda = l1_lambda
+        @l2_lambda = l2_lambda
+      end
+      def forward(x)
+        l1 = @l1_lambda * @param.data.abs.sum
+        l2 = 0.5 * @l2_lambda * (@param.data**2).sum
+        x + l1 + l2
+      end
+      def backward
+        dparam = Xumo::SFloat.ones(*@param.data.shape)
+        dparam[@param.data < 0] = -1
+        @param.grad += @l1_lambda * dparam
+        @param.grad += @l2_lambda * @param.data
+      end
+      def to_hash
+        super(l1_lambda: l1_lambda, l2_lambda: l2_lambda)
+      end
     end
-  end
+  end
 end

data/lib/dnn/core/rnn_layers.rb CHANGED

@@ -11,25 +11,36 @@ module DNN
       attr_reader :stateful
       # @return [Bool] Set the false, only the last of each cell of RNN is left.
       attr_reader :return_sequences
+      # @return [DNN::Initializers::Initializer] Recurrent weight initializer.
+      attr_reader :recurrent_weight_initializer
+      # @return [DNN::Regularizers::Regularizer] Recurrent weight regularization.
+      attr_reader :recurrent_weight_regularizer
       def initialize(num_nodes,
                      stateful: false,
                      return_sequences: true,
                      weight_initializer: RandomNormal.new,
+                     recurrent_weight_initializer: RandomNormal.new,
                      bias_initializer: Zeros.new,
-                     l1_lambda: 0,
-                     l2_lambda: 0,
+                     weight_regularizer: nil,
+                     recurrent_weight_regularizer: nil,
+                     bias_regularizer: nil,
                      use_bias: true)
         super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
-              l1_lambda: l1_lambda, l2_lambda: l2_lambda, use_bias: use_bias)
+              weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
         @num_nodes = num_nodes
         @stateful = stateful
         @return_sequences = return_sequences
         @layers = []
-        @hidden = @params[:h] = Param.new
-        # TODO
-        # Change to a good name.
-        @params[:weight2] = @weight2 = Param.new
+        @hidden = @params[:hidden] = Param.new
+        @params[:recurrent_weight] = @recurrent_weight = Param.new(nil, 0)
+        @recurrent_weight_initializer = recurrent_weight_initializer
+        @recurrent_weight_regularizer = recurrent_weight_regularizer
+      end
+      def build(input_shape)
+        super
+        @time_length = @input_shape[0]
       end
       def forward(xs)
@@ -38,6 +49,7 @@ module DNN
         h = (@stateful && @hidden.data) ? @hidden.data : Xumo::SFloat.zeros(xs.shape[0], @num_nodes)
         xs.shape[1].times do |t|
           x = xs[true, t, false]
+          @layers[t].trainable = @trainable
           h = @layers[t].forward(x, h)
           hs[true, t, false] = h
         end
@@ -46,9 +58,6 @@ module DNN
       end
       def backward(dh2s)
-        @weight.grad = Xumo::SFloat.zeros(*@weight.data.shape)
-        @weight2.grad = Xumo::SFloat.zeros(*@weight2.data.shape)
-        @bias.grad = Xumo::SFloat.zeros(*@bias.data.shape) if @bias
         unless @return_sequences
           dh = dh2s
           dh2s = Xumo::SFloat.zeros(dh.shape[0], @time_length, dh.shape[1])
@@ -72,7 +81,9 @@ module DNN
         hash = {
           num_nodes: @num_nodes,
           stateful: @stateful,
-          return_sequences: @return_sequences
+          return_sequences: @return_sequences,
+          recurrent_weight_initializer: @recurrent_weight_initializer.to_hash,
+          recurrent_weight_regularizer: @recurrent_weight_regularizer&.to_hash,
         }
         hash.merge!(merge_hash) if merge_hash
         super(hash)
@@ -85,48 +96,48 @@ module DNN
       def regularizers
         regularizers = []
-        if @l1_lambda > 0
-          regularizers << Lasso.new(@l1_lambda, @weight)
-          regularizers << Lasso.new(@l1_lambda, @weight2)
-        end
-        if @l2_lambda > 0
-          regularizers << Ridge.new(@l2_lambda, @weight)
-          regularizers << Ridge.new(@l2_lambda, @weight2)
-        end
+        regularizers << @weight_regularizer if @weight_regularizer
+        regularizers << @recurrent_weight_regularizer if @recurrent_weight_regularizer
+        regularizers << @bias_regularizer if @bias_regularizer
         regularizers
       end
-      private
-      def init_params
-        @time_length = @input_shape[0]
+      private def init_weight_and_bias
+        super
+        @recurrent_weight_initializer.init_param(self, @recurrent_weight)
+        @recurrent_weight_regularizer.param = @recurrent_weight if @recurrent_weight_regularizer
       end
     end
     class SimpleRNN_Dense
-      def initialize(weight, weight2, bias, activation)
+      attr_accessor :trainable
+      def initialize(weight, recurrent_weight, bias, activation)
         @weight = weight
-        @weight2 = weight2
+        @recurrent_weight = recurrent_weight
         @bias = bias
         @activation = activation.clone
+        @trainable = true
       end
       def forward(x, h)
         @x = x
         @h = h
-        h2 = x.dot(@weight.data) + h.dot(@weight2.data)
+        h2 = x.dot(@weight.data) + h.dot(@recurrent_weight.data)
         h2 += @bias.data if @bias
         @activation.forward(h2)
       end
       def backward(dh2)
         dh2 = @activation.backward(dh2)
-        @weight.grad += @x.transpose.dot(dh2)
-        @weight2.grad += @h.transpose.dot(dh2)
-        @bias.grad += dh2.sum(0) if @bias
+        if @trainable
+          @weight.grad += @x.transpose.dot(dh2)
+          @recurrent_weight.grad += @h.transpose.dot(dh2)
+          @bias.grad += dh2.sum(0) if @bias
+        end
         dx = dh2.dot(@weight.data.transpose)
-        dh = dh2.dot(@weight2.data.transpose)
+        dh = dh2.dot(@recurrent_weight.data.transpose)
         [dx, dh]
       end
     end
@@ -137,15 +148,17 @@ module DNN
       attr_reader :activation
-      def self.load_hash(hash)
+      def self.from_hash(hash)
         simple_rnn = self.new(hash[:num_nodes],
                               stateful: hash[:stateful],
                               return_sequences: hash[:return_sequences],
-                              activation: Utils.load_hash(hash[:activation]),
-                              weight_initializer: Utils.load_hash(hash[:weight_initializer]),
-                              bias_initializer: Utils.load_hash(hash[:bias_initializer]),
-                              l1_lambda: hash[:l1_lambda],
-                              l2_lambda: hash[:l2_lambda],
+                              activation: Utils.from_hash(hash[:activation]),
+                              weight_initializer: Utils.from_hash(hash[:weight_initializer]),
+                              recurrent_weight_initializer: Utils.from_hash(hash[:recurrent_weight_initializer]),
+                              bias_initializer: Utils.from_hash(hash[:bias_initializer]),
+                              weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
+                              recurrent_weight_regularizer: Utils.from_hash(hash[:recurrent_weight_regularizer]),
+                              bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
                               use_bias: hash[:use_bias])
         simple_rnn
       end
@@ -155,53 +168,56 @@ module DNN
                      return_sequences: true,
                      activation: Tanh.new,
                      weight_initializer: RandomNormal.new,
+                     recurrent_weight_initializer: RandomNormal.new,
                      bias_initializer: Zeros.new,
-                     l1_lambda: 0,
-                     l2_lambda: 0,
+                     weight_regularizer: nil,
+                     recurrent_weight_regularizer: nil,
+                     bias_regularizer: nil,
                      use_bias: true)
         super(num_nodes,
               stateful: stateful,
               return_sequences: return_sequences,
               weight_initializer: weight_initializer,
+              recurrent_weight_initializer: recurrent_weight_initializer,
               bias_initializer: bias_initializer,
-              l1_lambda: l1_lambda,
-              l2_lambda: l2_lambda,
+              weight_regularizer: weight_regularizer,
+              recurrent_weight_regularizer: recurrent_weight_regularizer,
+              bias_regularizer: bias_regularizer,
               use_bias: use_bias)
         @activation = activation
       end
-      def to_hash
-        super({activation: @activation.to_hash})
-      end
-      private
-      def init_params
-        super()
-        num_prev_nodes = @input_shape[1]
+      def build(input_shape)
+        super
+        num_prev_nodes = input_shape[1]
         @weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes)
-        @weight2.data = Xumo::SFloat.new(@num_nodes, @num_nodes)
+        @recurrent_weight.data = Xumo::SFloat.new(@num_nodes, @num_nodes)
         @bias.data = Xumo::SFloat.new(@num_nodes) if @bias
-        @weight_initializer.init_param(self, @weight)
-        @weight_initializer.init_param(self, @weight2)
-        @bias_initializer.init_param(self, @bias) if @bias
+        init_weight_and_bias
         @time_length.times do |t|
-          @layers << SimpleRNN_Dense.new(@weight, @weight2, @bias, @activation)
+          @layers << SimpleRNN_Dense.new(@weight, @recurrent_weight, @bias, @activation)
         end
       end
+      def to_hash
+        super({activation: @activation.to_hash})
+      end
     end
     class LSTM_Dense
-      def initialize(weight, weight2, bias)
+      attr_accessor :trainable
+      def initialize(weight, recurrent_weight, bias)
         @weight = weight
-        @weight2 = weight2
+        @recurrent_weight = recurrent_weight
         @bias = bias
         @tanh = Tanh.new
         @g_tanh = Tanh.new
         @forget_sigmoid = Sigmoid.new
         @in_sigmoid = Sigmoid.new
         @out_sigmoid = Sigmoid.new
+        @trainable = true
       end
       def forward(x, h, c)
@@ -209,7 +225,7 @@ module DNN
         @h = h
         @c = c
         num_nodes = h.shape[1]
-        a = x.dot(@weight.data) + h.dot(@weight2.data)
+        a = x.dot(@weight.data) + h.dot(@recurrent_weight.data)
         a += @bias.data if @bias
         @forget = @forget_sigmoid.forward(a[true, 0...num_nodes])
@@ -234,11 +250,13 @@ module DNN
         da = Xumo::SFloat.hstack([dforget, dg, din, dout])
-        @weight.grad += @x.transpose.dot(da)
-        @weight2.grad += @h.transpose.dot(da)
-        @bias.grad += da.sum(0) if @bias
+        if @trainable
+          @weight.grad += @x.transpose.dot(da)
+          @recurrent_weight.grad += @h.transpose.dot(da)
+          @bias.grad += da.sum(0) if @bias
+        end
         dx = da.dot(@weight.data.transpose)
-        dh = da.dot(@weight2.data.transpose)
+        dh = da.dot(@recurrent_weight.data.transpose)
         dc = dc2_tmp * @forget
         [dx, dh, dc]
       end
@@ -246,14 +264,16 @@ module DNN
     class LSTM < RNN
-      def self.load_hash(hash)
+      def self.from_hash(hash)
         lstm = self.new(hash[:num_nodes],
                         stateful: hash[:stateful],
                         return_sequences: hash[:return_sequences],
-                        weight_initializer: Utils.load_hash(hash[:weight_initializer]),
-                        bias_initializer: Utils.load_hash(hash[:bias_initializer]),
-                        l1_lambda: hash[:l1_lambda],
-                        l2_lambda: hash[:l2_lambda],
+                        weight_initializer: Utils.from_hash(hash[:weight_initializer]),
+                        recurrent_weight_initializer: Utils.from_hash(hash[:recurrent_weight_initializer]),
+                        bias_initializer: Utils.from_hash(hash[:bias_initializer]),
+                        weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
+                        recurrent_weight_regularizer: Utils.from_hash(hash[:recurrent_weight_regularizer]),
+                        bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
                         use_bias: hash[:use_bias])
         lstm
       end
@@ -262,12 +282,26 @@ module DNN
                      stateful: false,
                      return_sequences: true,
                      weight_initializer: RandomNormal.new,
+                     recurrent_weight_initializer: RandomNormal.new,
                      bias_initializer: Zeros.new,
-                     l1_lambda: 0,
-                     l2_lambda: 0,
+                     weight_regularizer: nil,
+                     recurrent_weight_regularizer: nil,
+                     bias_regularizer: nil,
                      use_bias: true)
         super
-        @cell = @params[:c] = Param.new
+        @cell = @params[:cell] = Param.new
+      end
+      def build(input_shape)
+        super
+        num_prev_nodes = input_shape[1]
+        @weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes * 4)
+        @recurrent_weight.data = Xumo::SFloat.new(@num_nodes, @num_nodes * 4)
+        @bias.data = Xumo::SFloat.new(@num_nodes * 4) if @bias
+        init_weight_and_bias
+        @time_length.times do |t|
+          @layers << LSTM_Dense.new(@weight, @recurrent_weight, @bias)
+        end
       end
       def forward(xs)
@@ -283,6 +317,7 @@ module DNN
         c ||= Xumo::SFloat.zeros(xs.shape[0], @num_nodes)
         xs.shape[1].times do |t|
           x = xs[true, t, false]
+          @layers[t].trainable = @trainable
           h, c = @layers[t].forward(x, h, c)
           hs[true, t, false] = h
         end
@@ -292,9 +327,6 @@ module DNN
       end
       def backward(dh2s)
-        @weight.grad = Xumo::SFloat.zeros(*@weight.data.shape)
-        @weight2.grad = Xumo::SFloat.zeros(*@weight2.data.shape)
-        @bias.grad = Xumo::SFloat.zeros(*@bias.data.shape) if @bias
         unless @return_sequences
           dh = dh2s
           dh2s = Xumo::SFloat.zeros(dh.shape[0], @time_length, dh.shape[1])
@@ -315,33 +347,20 @@ module DNN
         super()
         @cell.data = @cell.data.fill(0) if @cell.data
       end
-      private
-      def init_params
-        super()
-        num_prev_nodes = @input_shape[1]
-        @weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes * 4)
-        @weight2.data = Xumo::SFloat.new(@num_nodes, @num_nodes * 4)
-        @bias.data = Xumo::SFloat.new(@num_nodes * 4) if @bias
-        @weight_initializer.init_param(self, @weight)
-        @weight_initializer.init_param(self, @weight2)
-        @bias_initializer.init_param(self, @bias) if @bias
-        @time_length.times do |t|
-          @layers << LSTM_Dense.new(@weight, @weight2, @bias)
-        end
-      end
     end
     class GRU_Dense
-      def initialize(weight, weight2, bias)
+      attr_accessor :trainable
+      def initialize(weight, recurrent_weight, bias)
         @weight = weight
-        @weight2 = weight2
+        @recurrent_weight = recurrent_weight
         @bias = bias
         @update_sigmoid = Sigmoid.new
         @reset_sigmoid = Sigmoid.new
         @tanh = Tanh.new
+        @trainable = true
       end
       def forward(x, h)
@@ -349,60 +368,68 @@ module DNN
         @h = h
         num_nodes = h.shape[1]
         @weight_a = @weight.data[true, 0...(num_nodes * 2)]
-        @weight2_a = @weight2.data[true, 0...(num_nodes * 2)]
+        @weight2_a = @recurrent_weight.data[true, 0...(num_nodes * 2)]
         a = x.dot(@weight_a) + h.dot(@weight2_a)
         a += @bias.data[0...(num_nodes * 2)] if @bias
         @update = @update_sigmoid.forward(a[true, 0...num_nodes])
         @reset = @reset_sigmoid.forward(a[true, num_nodes..-1])
         @weight_h = @weight.data[true, (num_nodes * 2)..-1]
-        @weight2_h = @weight2.data[true, (num_nodes * 2)..-1]
+        @weight2_h = @recurrent_weight.data[true, (num_nodes * 2)..-1]
         @tanh_h = if @bias
           bias_h = @bias.data[(num_nodes * 2)..-1]
           @tanh.forward(x.dot(@weight_h) + (h * @reset).dot(@weight2_h) + bias_h)
         else
           @tanh.forward(x.dot(@weight_h) + (h * @reset).dot(@weight2_h))
         end
-        h2 = (1 - @update) * h + @update * @tanh_h
+        h2 = (1 - @update) * @tanh_h + @update * h
         h2
       end
       def backward(dh2)
-        dtanh_h = @tanh.backward(dh2 * @update)
-        dh = dh2 * (1 - @update)
+        dtanh_h = @tanh.backward(dh2 * (1 - @update))
+        dh = dh2 * @update
-        dweight_h = @x.transpose.dot(dtanh_h)
+        if @trainable
+          dweight_h = @x.transpose.dot(dtanh_h)
+          dweight2_h = (@h * @reset).transpose.dot(dtanh_h)
+          dbias_h = dtanh_h.sum(0) if @bias
+        end
         dx = dtanh_h.dot(@weight_h.transpose)
-        dweight2_h = (@h * @reset).transpose.dot(dtanh_h)
         dh += dtanh_h.dot(@weight2_h.transpose) * @reset
-        dbias_h = dtanh_h.sum(0) if @bias
         dreset = @reset_sigmoid.backward(dtanh_h.dot(@weight2_h.transpose) * @h)
-        dupdate = @update_sigmoid.backward(dh2 * @tanh_h - dh2 * @h)
+        dupdate = @update_sigmoid.backward(dh2 * @h - dh2 * @tanh_h)
         da = Xumo::SFloat.hstack([dupdate, dreset])
-        dweight_a = @x.transpose.dot(da)
+        if @trainable
+          dweight_a = @x.transpose.dot(da)
+          dweight2_a = @h.transpose.dot(da)
+          dbias_a = da.sum(0) if @bias
+        end
         dx += da.dot(@weight_a.transpose)
-        dweight2_a = @h.transpose.dot(da)
         dh += da.dot(@weight2_a.transpose)
-        dbias_a = da.sum(0) if @bias
-        @weight.grad += Xumo::SFloat.hstack([dweight_a, dweight_h])
-        @weight2.grad += Xumo::SFloat.hstack([dweight2_a, dweight2_h])
-        @bias.grad += Xumo::SFloat.hstack([dbias_a, dbias_h]) if @bias
+        if @trainable
+          @weight.grad += Xumo::SFloat.hstack([dweight_a, dweight_h])
+          @recurrent_weight.grad += Xumo::SFloat.hstack([dweight2_a, dweight2_h])
+          @bias.grad += Xumo::SFloat.hstack([dbias_a, dbias_h]) if @bias
+        end
         [dx, dh]
       end
     end
     class GRU < RNN
-      def self.load_hash(hash)
+      def self.from_hash(hash)
         gru = self.new(hash[:num_nodes],
                        stateful: hash[:stateful],
                        return_sequences: hash[:return_sequences],
-                       weight_initializer: Utils.load_hash(hash[:weight_initializer]),
-                       bias_initializer: Utils.load_hash(hash[:bias_initializer]),
-                       l1_lambda: hash[:l1_lambda],
-                       l2_lambda: hash[:l2_lambda],
+                       weight_initializer: Utils.from_hash(hash[:weight_initializer]),
+                       recurrent_weight_initializer: Utils.from_hash(hash[:recurrent_weight_initializer]),
+                       bias_initializer: Utils.from_hash(hash[:bias_initializer]),
+                       weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
+                       recurrent_weight_regularizer: Utils.from_hash(hash[:recurrent_weight_regularizer]),
+                       bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
                        use_bias: hash[:use_bias])
         gru
       end
@@ -411,26 +438,24 @@ module DNN
                      stateful: false,
                      return_sequences: true,
                      weight_initializer: RandomNormal.new,
+                     recurrent_weight_initializer: RandomNormal.new,
                      bias_initializer: Zeros.new,
-                     l1_lambda: 0,
-                     l2_lambda: 0,
+                     weight_regularizer: nil,
+                     recurrent_weight_regularizer: nil,
+                     bias_regularizer: nil,
                      use_bias: true)
         super
       end
-      private
-      def init_params
-        super()
+      def build(input_shape)
+        super
         num_prev_nodes = @input_shape[1]
         @weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes * 3)
-        @weight2.data = Xumo::SFloat.new(@num_nodes, @num_nodes * 3)
+        @recurrent_weight.data = Xumo::SFloat.new(@num_nodes, @num_nodes * 3)
         @bias.data = Xumo::SFloat.new(@num_nodes * 3) if @bias
-        @weight_initializer.init_param(self, @weight)
-        @weight_initializer.init_param(self, @weight2)
-        @bias_initializer.init_param(self, @bias) if @bias
+        init_weight_and_bias
         @time_length.times do |t|
-          @layers << GRU_Dense.new(@weight, @weight2, @bias)
+          @layers << GRU_Dense.new(@weight, @recurrent_weight, @bias)
         end
       end
     end