RubyGems - ruby-dnn - Versions diffs - 0.15.3 → 0.16.0 - Mend

ruby-dnn 0.15.3 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

checksums.yaml +4 -4
data/Rakefile +1 -9
data/examples/api-examples/early_stopping_example.rb +1 -1
data/examples/api-examples/initializer_example.rb +1 -1
data/examples/api-examples/regularizer_example.rb +1 -1
data/examples/api-examples/save_example.rb +1 -1
data/examples/dcgan/dcgan.rb +3 -3
data/examples/iris_example.rb +41 -17
data/examples/mnist_define_by_run.rb +1 -1
data/examples/pix2pix/dcgan.rb +157 -0
data/examples/pix2pix/imgen.rb +27 -0
data/examples/pix2pix/train.rb +52 -0
data/lib/dnn.rb +2 -0
data/lib/dnn/core/layers/activations.rb +37 -19
data/lib/dnn/core/layers/basic_layers.rb +110 -25
data/lib/dnn/core/layers/cnn_layers.rb +19 -21
data/lib/dnn/core/layers/embedding.rb +3 -3
data/lib/dnn/core/layers/math_layers.rb +169 -0
data/lib/dnn/core/layers/merge_layers.rb +29 -24
data/lib/dnn/core/layers/normalizations.rb +4 -2
data/lib/dnn/core/layers/rnn_layers.rb +44 -36
data/lib/dnn/core/link.rb +7 -2
data/lib/dnn/core/losses.rb +54 -30
data/lib/dnn/core/models.rb +47 -47
data/lib/dnn/core/monkey_patch.rb +75 -0
data/lib/dnn/core/optimizers.rb +10 -6
data/lib/dnn/core/param.rb +17 -0
data/lib/dnn/core/regularizers.rb +35 -33
data/lib/dnn/core/tensor.rb +40 -0
data/lib/dnn/core/utils.rb +1 -1
data/lib/dnn/datasets/cifar10.rb +10 -9
data/lib/dnn/datasets/cifar100.rb +10 -9
data/lib/dnn/datasets/downloader.rb +1 -5
data/lib/dnn/datasets/fashion-mnist.rb +4 -12
data/lib/dnn/datasets/iris.rb +9 -9
data/lib/dnn/datasets/mnist.rb +4 -12
data/lib/dnn/datasets/stl-10.rb +6 -8
data/lib/dnn/version.rb +1 -1
data/ruby-dnn.gemspec +1 -1
metadata +7 -5
data/ext/cifar_loader/cifar_loader.c +0 -77
data/ext/cifar_loader/extconf.rb +0 -3

data/lib/dnn/core/layers/merge_layers.rb CHANGED

@@ -1,41 +1,46 @@
 module DNN
   module Layers
-    class MergeLayer < Layers::Layer
-      def self.call(x1, x2, *args)
-        new(*args).call(x1, x2)
-      end
-      def call(input_tensor1, input_tensor2)
+    module MergeLayerNode
+      def forward(input_tensor1, input_tensor2)
         x1 = input_tensor1.data
         x2 = input_tensor2.data
-        prev_link1 = input_tensor1.link
-        prev_link2 = input_tensor2.link
-        build(x1.shape[1..-1]) unless built?
-        y = forward(x1, x2)
+        prev_link1 = (input_tensor1.is_a?(Tensor) ? input_tensor1.link : input_tensor1)
+        prev_link2 = (input_tensor2.is_a?(Tensor) ? input_tensor2.link : input_tensor2)
+        y = forward_node(x1, x2)
         link = TwoInputLink.new(prev_link1, prev_link2, self)
         Tensor.new(y, link)
       end
-    end
-    class Add < MergeLayer
-      def forward(x1, x2)
-        x1 + x2
+      def backward(dy)
+        backward_node(dy)
       end
-      def backward(dy)
-        [dy, dy]
+      def forward_node(x1, x2)
+        raise NotImplementedError, "Class '#{self.class.name}' has implement method 'forward_node'"
+      end
+      def backward_node(dy)
+        raise NotImplementedError, "Class '#{self.class.name}' has implement method 'backward_node'"
       end
     end
-    class Mul < MergeLayer
-      def forward(x1, x2)
-        @x1, @x2 = x1, x2
-        x1 * x2
+    class MergeLayer < Layers::Layer
+      include MergeLayerNode
+      def self.call(x1, x2, *args)
+        new(*args).call(x1, x2)
       end
-      def backward(dy)
-        [dy * @x2, dy * @x1]
+      def call(input_tensor1, input_tensor2)
+        input_tensor1 = Tensor.new(input_tensor1) if !input_tensor1.is_a?(Tensor) && !input_tensor1.is_a?(Param)
+        input_tensor2 = Tensor.new(input_tensor2) if !input_tensor2.is_a?(Tensor) && !input_tensor2.is_a?(Param)
+        if input_tensor1.data.is_a?(Numo::NArray)
+          build(input_tensor1.data.shape[1..-1]) unless built?
+        else
+          build([1]) unless built?
+        end
+        forward(input_tensor1, input_tensor2)
       end
     end
@@ -47,13 +52,13 @@ module DNN
         @axis = axis
       end
-      def forward(x1, x2)
+      def forward_node(x1, x2)
         @x1_dim = x1.shape[@axis]
         @x2_dim = x2.shape[@axis]
         x1.concatenate(x2, axis: @axis)
       end
-      def backward(dy)
+      def backward_node(dy)
         dy.split([@x1_dim, @x1_dim + @x2_dim], axis: @axis)
       end

data/lib/dnn/core/layers/normalizations.rb CHANGED

@@ -2,6 +2,8 @@ module DNN
   module Layers
     class BatchNormalization < TrainableLayer
+      include LayerNode
       attr_reader :gamma
       attr_reader :beta
       attr_reader :running_mean
@@ -32,7 +34,7 @@ module DNN
         @running_var.data = Xumo::SFloat.zeros(*output_shape)
       end
-      def forward(x)
+      def forward_node(x)
         if DNN.learning_phase
           mean = x.mean(axis: @axis, keepdims: true)
           @xc = x - mean
@@ -49,7 +51,7 @@ module DNN
         @gamma.data * xn + @beta.data
       end
-      def backward(dy)
+      def backward_node(dy)
         batch_size = dy.shape[@axis]
         if @trainable
           @beta.grad = dy.sum(axis: @axis, keepdims: true)

data/lib/dnn/core/layers/rnn_layers.rb CHANGED

@@ -3,6 +3,8 @@ module DNN
     # Super class of all RNN classes.
     class RNN < Connection
+      include LayerNode
       attr_reader :num_nodes
       attr_reader :recurrent_weight
       attr_reader :hidden
@@ -50,7 +52,7 @@ module DNN
         raise NotImplementedError, "Class '#{self.class.name}' has implement method 'create_hidden_layer'"
       end
-      def forward(xs)
+      def forward_node(xs)
         create_hidden_layer
         @xs_shape = xs.shape
         hs = Xumo::SFloat.zeros(xs.shape[0], @time_length, @num_nodes)
@@ -58,14 +60,14 @@ module DNN
         xs.shape[1].times do |t|
           x = xs[true, t, false]
           @hidden_layers[t].trainable = @trainable
-          h = @hidden_layers[t].forward(x, h)
+          h = @hidden_layers[t].forward_node(x, h)
           hs[true, t, false] = h
         end
         @hidden.data = h
         @return_sequences ? hs : h
       end
-      def backward(dh2s)
+      def backward_node(dh2s)
         unless @return_sequences
           dh = dh2s
           dh2s = Xumo::SFloat.zeros(dh.shape[0], @time_length, dh.shape[1])
@@ -75,7 +77,7 @@ module DNN
         dh = 0
         (dh2s.shape[1] - 1).downto(0) do |t|
           dh2 = dh2s[true, t, false]
-          dx, dh = @hidden_layers[t].backward(dh2 + dh)
+          dx, dh = @hidden_layers[t].backward_node(dh2 + dh)
           dxs[true, t, false] = dx
         end
         dxs
@@ -134,7 +136,9 @@ module DNN
       end
     end
-    class SimpleRNNDense
+    class SimpleRNNDense < Layer
+      include LayerNode
       attr_accessor :trainable
       def initialize(weight, recurrent_weight, bias, activation)
@@ -145,16 +149,16 @@ module DNN
         @trainable = true
       end
-      def forward(x, h)
+      def forward_node(x, h)
         @x = x
         @h = h
         h2 = x.dot(@weight.data) + h.dot(@recurrent_weight.data)
         h2 += @bias.data if @bias
-        @activation.forward(h2)
+        @activation.forward_node(h2)
       end
-      def backward(dh2)
-        dh2 = @activation.backward(dh2)
+      def backward_node(dh2)
+        dh2 = @activation.backward_node(dh2)
         if @trainable
           @weight.grad += @x.transpose.dot(dh2)
           @recurrent_weight.grad += @h.transpose.dot(dh2)
@@ -226,7 +230,9 @@ module DNN
       end
     end
-    class LSTMDense
+    class LSTMDense < Layer
+      include LayerNode
       attr_accessor :trainable
       def initialize(weight, recurrent_weight, bias)
@@ -241,7 +247,7 @@ module DNN
         @trainable = true
       end
-      def forward(x, h, c)
+      def forward_node(x, h, c)
         @x = x
         @h = h
         @c = c
@@ -249,25 +255,25 @@ module DNN
         a = x.dot(@weight.data) + h.dot(@recurrent_weight.data)
         a += @bias.data if @bias
-        @forget = @forget_sigmoid.forward(a[true, 0...num_nodes])
-        @g = @g_tanh.forward(a[true, num_nodes...(num_nodes * 2)])
-        @in = @in_sigmoid.forward(a[true, (num_nodes * 2)...(num_nodes * 3)])
-        @out = @out_sigmoid.forward(a[true, (num_nodes * 3)..-1])
+        @forget = @forget_sigmoid.forward_node(a[true, 0...num_nodes])
+        @g = @g_tanh.forward_node(a[true, num_nodes...(num_nodes * 2)])
+        @in = @in_sigmoid.forward_node(a[true, (num_nodes * 2)...(num_nodes * 3)])
+        @out = @out_sigmoid.forward_node(a[true, (num_nodes * 3)..-1])
         c2 = @forget * c + @g * @in
-        @tanh_c2 = @tanh.forward(c2)
+        @tanh_c2 = @tanh.forward_node(c2)
         h2 = @out * @tanh_c2
         [h2, c2]
       end
-      def backward(dh2, dc2)
+      def backward_node(dh2, dc2)
         dh2_tmp = @tanh_c2 * dh2
-        dc2_tmp = @tanh.backward(@out * dh2) + dc2
+        dc2_tmp = @tanh.backward_node(@out * dh2) + dc2
-        dout = @out_sigmoid.backward(dh2_tmp)
-        din = @in_sigmoid.backward(dc2_tmp * @g)
-        dg = @g_tanh.backward(dc2_tmp * @in)
-        dforget = @forget_sigmoid.backward(dc2_tmp * @c)
+        dout = @out_sigmoid.backward_node(dh2_tmp)
+        din = @in_sigmoid.backward_node(dc2_tmp * @g)
+        dg = @g_tanh.backward_node(dc2_tmp * @in)
+        dforget = @forget_sigmoid.backward_node(dc2_tmp * @c)
         da = Xumo::SFloat.hstack([dforget, dg, din, dout])
@@ -313,7 +319,7 @@ module DNN
         @hidden_layers = Array.new(@time_length) { LSTMDense.new(@weight, @recurrent_weight, @bias) }
       end
-      def forward(xs)
+      def forward_node(xs)
         create_hidden_layer
         @xs_shape = xs.shape
         hs = Xumo::SFloat.zeros(xs.shape[0], @time_length, @num_nodes)
@@ -328,7 +334,7 @@ module DNN
         xs.shape[1].times do |t|
           x = xs[true, t, false]
           @hidden_layers[t].trainable = @trainable
-          h, c = @hidden_layers[t].forward(x, h, c)
+          h, c = @hidden_layers[t].forward_node(x, h, c)
           hs[true, t, false] = h
         end
         @hidden.data = h
@@ -336,7 +342,7 @@ module DNN
         @return_sequences ? hs : h
       end
-      def backward(dh2s)
+      def backward_node(dh2s)
         unless @return_sequences
           dh = dh2s
           dh2s = Xumo::SFloat.zeros(dh.shape[0], @time_length, dh.shape[1])
@@ -347,7 +353,7 @@ module DNN
         dc = 0
         (dh2s.shape[1] - 1).downto(0) do |t|
           dh2 = dh2s[true, t, false]
-          dx, dh, dc = @hidden_layers[t].backward(dh2 + dh, dc)
+          dx, dh, dc = @hidden_layers[t].backward_node(dh2 + dh, dc)
           dxs[true, t, false] = dx
         end
         dxs
@@ -363,7 +369,9 @@ module DNN
       end
     end
-    class GRUDense
+    class GRUDense < Layer
+      include LayerNode
       attr_accessor :trainable
       def initialize(weight, recurrent_weight, bias)
@@ -376,7 +384,7 @@ module DNN
         @trainable = true
       end
-      def forward(x, h)
+      def forward_node(x, h)
         @x = x
         @h = h
         num_nodes = h.shape[1]
@@ -384,23 +392,23 @@ module DNN
         @weight2_a = @recurrent_weight.data[true, 0...(num_nodes * 2)]
         a = x.dot(@weight_a) + h.dot(@weight2_a)
         a += @bias.data[0...(num_nodes * 2)] if @bias
-        @update = @update_sigmoid.forward(a[true, 0...num_nodes])
-        @reset = @reset_sigmoid.forward(a[true, num_nodes..-1])
+        @update = @update_sigmoid.forward_node(a[true, 0...num_nodes])
+        @reset = @reset_sigmoid.forward_node(a[true, num_nodes..-1])
         @weight_h = @weight.data[true, (num_nodes * 2)..-1]
         @weight2_h = @recurrent_weight.data[true, (num_nodes * 2)..-1]
         @tanh_h = if @bias
                     bias_h = @bias.data[(num_nodes * 2)..-1]
-                    @tanh.forward(x.dot(@weight_h) + (h * @reset).dot(@weight2_h) + bias_h)
+                    @tanh.forward_node(x.dot(@weight_h) + (h * @reset).dot(@weight2_h) + bias_h)
                   else
-                    @tanh.forward(x.dot(@weight_h) + (h * @reset).dot(@weight2_h))
+                    @tanh.forward_node(x.dot(@weight_h) + (h * @reset).dot(@weight2_h))
                   end
         h2 = (1 - @update) * @tanh_h + @update * h
         h2
       end
-      def backward(dh2)
-        dtanh_h = @tanh.backward(dh2 * (1 - @update))
+      def backward_node(dh2)
+        dtanh_h = @tanh.backward_node(dh2 * (1 - @update))
         dh = dh2 * @update
         if @trainable
@@ -411,8 +419,8 @@ module DNN
         dx = dtanh_h.dot(@weight_h.transpose)
         dh += dtanh_h.dot(@weight2_h.transpose) * @reset
-        dreset = @reset_sigmoid.backward(dtanh_h.dot(@weight2_h.transpose) * @h)
-        dupdate = @update_sigmoid.backward(dh2 * @h - dh2 * @tanh_h)
+        dreset = @reset_sigmoid.backward_node(dtanh_h.dot(@weight2_h.transpose) * @h)
+        dupdate = @update_sigmoid.backward_node(dh2 * @h - dh2 * @tanh_h)
         da = Xumo::SFloat.hstack([dupdate, dreset])
         if @trainable
           dweight_a = @x.transpose.dot(da)

data/lib/dnn/core/link.rb CHANGED

@@ -26,9 +26,14 @@ module DNN
     end
     def backward(dy)
-      dy1, dy2 = *@layer.backward(dy)
+      dys = @layer.backward(dy)
+      if dys.is_a?(Array)
+        dy1, dy2 = *dys
+      else
+        dy1 = dys
+      end
       @prev1&.backward(dy1)
-      @prev2&.backward(dy2)
+      @prev2&.backward(dy2) if dy2
     end
   end
 end

data/lib/dnn/core/losses.rb CHANGED

@@ -2,6 +2,10 @@ module DNN
   module Losses
     class Loss
+      def self.call(y, t, *args)
+        new(*args).(y, t)
+      end
       def self.from_hash(hash)
         return nil unless hash
         loss_class = DNN.const_get(hash[:class])
@@ -11,31 +15,30 @@ module DNN
         loss
       end
+      def call(y, t)
+        forward(y, t)
+      end
       def loss(y, t, layers = nil)
         unless y.shape == t.shape
           raise DNN_ShapeError, "The shape of y does not match the t shape. y shape is #{y.shape}, but t shape is #{t.shape}."
         end
-        loss_value = forward(y, t)
-        loss_value += regularizers_forward(layers) if layers
-        loss_value
+        loss = call(y, t)
+        loss = regularizers_forward(loss, layers) if layers
+        loss
       end
       def forward(y, t)
         raise NotImplementedError, "Class '#{self.class.name}' has implement method 'forward'"
       end
-      def backward(y, t)
-        raise NotImplementedError, "Class '#{self.class.name}' has implement method 'backward'"
-      end
-      def regularizers_forward(layers)
-        loss_value = 0
+      def regularizers_forward(loss, layers)
         regularizers = layers.select { |layer| layer.respond_to?(:regularizers) }
                              .map(&:regularizers).flatten
         regularizers.each do |regularizer|
-          loss_value = regularizer.forward(loss_value)
+          loss = regularizer.forward(loss)
         end
-        loss_value
+        loss
       end
       def regularizers_backward(layers)
@@ -64,22 +67,30 @@ module DNN
     end
     class MeanSquaredError < Loss
-      def forward(y, t)
+      include Layers::MergeLayerNode
+      def forward_node(y, t)
+        @y = y
+        @t = t
         0.5 * ((y - t)**2).mean(0).sum
       end
-      def backward(y, t)
-        y - t
+      def backward_node(dy)
+        @y - @t
       end
     end
     class MeanAbsoluteError < Loss
-      def forward(y, t)
+      include Layers::MergeLayerNode
+      def forward_node(y, t)
+        @y = y
+        @t = t
         (y - t).abs.mean(0).sum
       end
-      def backward(y, t)
-        dy = y - t
+      def backward_node(d)
+        dy = @y - @t
         dy[dy >= 0] = 1
         dy[dy < 0] = -1
         dy
@@ -87,26 +98,33 @@ module DNN
     end
     class Hinge < Loss
-      def forward(y, t)
+      include Layers::MergeLayerNode
+      def forward_node(y, t)
+        @t = t
         @a = 1 - y * t
         Xumo::SFloat.maximum(0, @a).mean(0).sum
       end
-      def backward(y, t)
+      def backward_node(d)
         a = Xumo::SFloat.ones(*@a.shape)
         a[@a <= 0] = 0
-        a * -t
+        a * -@t
       end
     end
     class HuberLoss < Loss
-      def forward(y, t)
+      include Layers::MergeLayerNode
+      def forward_node(y, t)
+        @y = y
+        @t = t
         loss_l1_value = loss_l1(y, t)
         @loss_value = loss_l1_value > 1 ? loss_l1_value : loss_l2(y, t)
       end
-      def backward(y, t)
-        dy = y - t
+      def backward_node(d)
+        dy = @y - @t
         if @loss_value > 1
           dy[dy >= 0] = 1
           dy[dy < 0] = -1
@@ -126,6 +144,8 @@ module DNN
     end
     class SoftmaxCrossEntropy < Loss
+      include Layers::MergeLayerNode
       attr_accessor :eps
       class << self
@@ -141,13 +161,14 @@ module DNN
         @eps = eps
       end
-      def forward(y, t)
+      def forward_node(y, t)
+        @t = t
         @x = SoftmaxCrossEntropy.softmax(y)
         -(t * Xumo::NMath.log(@x + @eps)).mean(0).sum
       end
-      def backward(y, t)
-        @x - t
+      def backward_node(d)
+        @x - @t
       end
       def to_hash
@@ -160,11 +181,13 @@ module DNN
     end
     class SigmoidCrossEntropy < Loss
+      include Layers::MergeLayerNode
       attr_accessor :eps
       class << self
         def sigmoid(y)
-          Layers::Sigmoid.new.forward(y)
+          Layers::Sigmoid.new.forward_node(y)
         end
         alias activation sigmoid
@@ -175,13 +198,14 @@ module DNN
         @eps = eps
       end
-      def forward(y, t)
+      def forward_node(y, t)
+        @t = t
         @x = SigmoidCrossEntropy.sigmoid(y)
         -(t * Xumo::NMath.log(@x + @eps) + (1 - t) * Xumo::NMath.log(1 - @x + @eps)).mean(0).sum
       end
-      def backward(y, t)
-        @x - t
+      def backward_node(d)
+        @x - @t
       end
       def to_hash