RubyGems - red-chainer - Versions diffs - 0.3.2 → 0.4.0 - Mend

red-chainer 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

checksums.yaml +4 -4
data/.gitignore +2 -2
data/.travis.yml +8 -3
data/.yardopts +1 -0
data/Gemfile +6 -1
data/README.md +34 -3
data/examples/cifar/train_cifar.rb +13 -2
data/examples/iris/iris.rb +9 -5
data/examples/mnist/mnist.rb +16 -4
data/lib/chainer.rb +17 -1
data/lib/chainer/backend.rb +27 -0
data/lib/chainer/cuda.rb +37 -15
data/lib/chainer/dataset/convert.rb +20 -16
data/lib/chainer/datasets/cifar.rb +8 -6
data/lib/chainer/datasets/mnist.rb +14 -55
data/lib/chainer/device.rb +88 -0
data/lib/chainer/function.rb +103 -41
data/lib/chainer/function_node.rb +454 -0
data/lib/chainer/functions/activation/leaky_relu.rb +38 -13
data/lib/chainer/functions/activation/log_softmax.rb +46 -9
data/lib/chainer/functions/activation/relu.rb +8 -8
data/lib/chainer/functions/activation/relu_grad2.rb +34 -0
data/lib/chainer/functions/activation/sigmoid.rb +13 -11
data/lib/chainer/functions/activation/sigmoid_grad.rb +25 -0
data/lib/chainer/functions/activation/tanh.rb +48 -11
data/lib/chainer/functions/array/broadcast_to.rb +56 -0
data/lib/chainer/functions/array/cast.rb +41 -0
data/lib/chainer/functions/array/reshape.rb +28 -0
data/lib/chainer/functions/array/rollaxis.rb +57 -0
data/lib/chainer/functions/array/select_item.rb +72 -0
data/lib/chainer/functions/array/squeeze.rb +78 -0
data/lib/chainer/functions/array/transpose.rb +44 -0
data/lib/chainer/functions/connection/convolution_2d.rb +43 -26
data/lib/chainer/functions/connection/convolution_2d_grad_w.rb +48 -0
data/lib/chainer/functions/connection/deconvolution_2d.rb +159 -0
data/lib/chainer/functions/connection/linear.rb +29 -22
data/lib/chainer/functions/evaluation/accuracy.rb +5 -5
data/lib/chainer/functions/loss/mean_squared_error.rb +21 -12
data/lib/chainer/functions/loss/softmax_cross_entropy.rb +98 -71
data/lib/chainer/functions/math/basic_math.rb +36 -30
data/lib/chainer/functions/math/exp.rb +28 -0
data/lib/chainer/functions/math/identity.rb +4 -3
data/lib/chainer/functions/math/sum.rb +52 -0
data/lib/chainer/functions/noise/dropout.rb +20 -4
data/lib/chainer/functions/normalization/batch_normalization.rb +257 -104
data/lib/chainer/functions/pooling/average_pooling_2d.rb +29 -6
data/lib/chainer/functions/pooling/max_pooling_2d.rb +67 -12
data/lib/chainer/functions/pooling/pooling_2d.rb +6 -4
data/lib/chainer/gradient_check.rb +157 -73
data/lib/chainer/gradient_method.rb +3 -2
data/lib/chainer/initializers/init.rb +5 -5
data/lib/chainer/initializers/normal.rb +4 -2
data/lib/chainer/initializers/uniform.rb +15 -0
data/lib/chainer/iterators/serial_iterator.rb +5 -3
data/lib/chainer/link.rb +4 -2
data/lib/chainer/links/connection/convolution_2d.rb +2 -2
data/lib/chainer/links/model/classifier.rb +24 -5
data/lib/chainer/links/normalization/batch_normalization.rb +7 -10
data/lib/chainer/optimizer.rb +42 -11
data/lib/chainer/optimizers/adam.rb +3 -2
data/lib/chainer/optimizers/momentum_sgd.rb +1 -1
data/lib/chainer/parameter.rb +7 -6
data/lib/chainer/serializer.rb +4 -4
data/lib/chainer/serializers/marshal.rb +10 -8
data/lib/chainer/testing/array.rb +1 -1
data/lib/chainer/training/extensions/evaluator.rb +2 -3
data/lib/chainer/training/extensions/exponential_shift.rb +1 -1
data/lib/chainer/training/extensions/progress_bar.rb +1 -0
data/lib/chainer/training/trainer.rb +4 -9
data/lib/chainer/training/triggers/interval.rb +7 -2
data/lib/chainer/utils/array.rb +80 -1
data/lib/chainer/utils/conv.rb +10 -2
data/lib/chainer/utils/initializer.rb +2 -2
data/lib/chainer/variable.rb +159 -69
data/lib/chainer/variable_node.rb +64 -10
data/lib/chainer/version.rb +1 -1
data/red-chainer.gemspec +4 -3
data/templates/default/layout/html/layout.erb +40 -0
data/templates/default/onefile/html/layout.erb +33 -0
metadata +44 -11
data/lib/chainer/dataset/download.rb +0 -56

data/lib/chainer/functions/activation/leaky_relu.rb CHANGED Viewed

@@ -2,7 +2,7 @@ module Chainer
   module Functions
     module Activation
       # Leaky rectifier unit.
-      class LeakyReLU < Function
+      class LeakyReLU < FunctionNode
         # Leaky Rectified Linear Unit function.
         #
         # This function is expressed as
@@ -13,7 +13,7 @@ module Chainer
         #
         # where $a$ is a configurable slope value.
         #
-        # @param [Chainer::Variable or Numo::NArray] x Input variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
+        # @param [Chainer::Variable or Numo::NArray or Cumo::NArray] x Input variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
         # @param [float] slope Slope value $a$.
         # @return [Chainer::Variable] Output variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
         # @example
@@ -31,32 +31,57 @@ module Chainer
         #    [-0.4, 1]]
         #
         def self.leaky_relu(x, slope: 0.2)
-          self.new(slope: slope).(x)
+          self.new(slope: slope).apply([x])[0]
         end
         def initialize(slope:0.2)
           @slope = slope
         end
-        def forward_cpu(x)
-          y = x[0].dup()
-          y[x[0] < 0] *= @slope
+        def forward(inputs)
+          x, = inputs
+          y = x.dup
+          y[x < 0] *= @slope
           if @slope >= 0
-            retain_inputs([])
             retain_outputs([0])
+          else
+            retain_inputs([0])
           end
           [y]
         end
-        def backward_cpu(x, gy)
-          gx = gy[0].dup()
+        def backward(indexes, grad_outputs)
+          if @slope >= 0
+            x = nil
+            y = get_retained_outputs.first.data
+          else
+            x = get_retained_inputs.first.data
+            y = nil
+          end
+          LeakyReLUGrad.new(x, y, @slope).apply(grad_outputs)
+        end
+      end
+      class LeakyReLUGrad < FunctionNode
+        def initialize(x, y, slope)
+          @x = x
+          @y = y
+          @slope = slope
+        end
+        def forward(inputs)
+          gy, = inputs
+          gy = gy.dup
           if @slope >= 0
-            y = @output_data
-            gx[y[0] < 0] *= @slope
+            gy[@y < 0] *= @slope
           else
-            gx[x[0] < 0] *= @slope
+            gy[@x < 0] *= @slope
           end
-          [gx]
+          [gy]
+        end
+        def backward(indexes, grad_outputs)
+          LeakyReLUGrad.new(@x, @y, @slope).apply(grad_outputs)
         end
       end
     end

data/lib/chainer/functions/activation/log_softmax.rb CHANGED Viewed

@@ -2,11 +2,12 @@ module Chainer
   module Functions
     module Activation
       def self.logsumexp(x)
+        xm = Chainer.get_array_module(x)
         m = x.max(axis: 1, keepdims: true)
         y = x - m
-        y = Numo::NMath.exp(y)
+        y = xm::NMath.exp(y)
         s = y.sum(axis: 1, keepdims: true)
-        s = Numo::NMath.log(s)
+        s = xm::NMath.log(s)
         m + s
       end
@@ -16,7 +17,7 @@ module Chainer
       end
       # Log-softmax activation function.
-      class LogSoftmax < Function
+      class LogSoftmax < FunctionNode
         # Channel-wise log-softmax function.
         #
         # This function computes its logarithm of softmax along the second axis.
@@ -36,7 +37,7 @@ module Chainer
         #   because +softmax(x)+ may returns +0+.
         #   +log_softmax+ method is more stable.
         #
-        # @param [Chainer::Variable or Numo::NArray] x Input variable. A $n$-dimensional ($n \\geq 2$) float array.
+        # @param [Chainer::Variable or Numo::NArray or Cumo::NArray] x Input variable. A $n$-dimensional ($n \\geq 2$) float array.
         # @return [Chainer::Variable] Output variable. A $n$-dimensional ($n \\geq 2$) float array, which is the same shape with x.
         #
         # @see Chainer::Functions::Softmax
@@ -56,23 +57,59 @@ module Chainer
         #   => true
         #
         def self.log_softmax(x)
-          self.new.(x)
+          self.new.apply([x]).first
         end
         def forward(xs)
           y = Chainer::Functions::Activation._log_softmax(xs[0])
           @x_shape = xs[0].shape
           @x_dtype = xs[0].class
-          retain_inputs([])
           retain_outputs([0])
           [y]
         end
-        def backward(x, gy)
-          y = @output_data[0]
-          gx = gy[0] - Numo::NMath.exp(y) * gy[0].sum(axis: 1, keepdims: true)
+        def backward(indexes, gy)
+          y = get_retained_outputs.first
+          LogSoftmaxGrad.new(@x_shape, @x_dtype).apply([y, gy[0]])
+        end
+      end
+      class LogSoftmaxGrad < FunctionNode
+        def initialize(x_shape, x_dtype)
+          @x_shape = x_shape
+          @x_dtype = x_dtype
+        end
+        def forward(inputs)
+          retain_inputs([0, 1])
+          y, gy = inputs
+          xm = Chainer.get_array_module(y)
+          gx = gy - xm::NMath.exp(y) * gy.sum(axis: 1, keepdims: true)
           [gx]
         end
+        def backward(indexes, ggx)
+          y, gy = get_retained_inputs
+          ret = []
+          exp_y = Chainer::Functions::Math::Exp.exp(y)
+          if indexes.include?(0)
+            gy_sum = Chainer::Functions::Math::Sum.sum(gy, axis: 1, keepdims: true)
+            gy_sum = Chainer::Functions::Array::BroadcastTo.broadcast_to(gy_sum, gy.shape)
+            g0 = -ggx.first * exp_y * gy_sum
+            ret << g0
+          end
+          if indexes.include?(1)
+            a = Chainer::Functions::Math::Sum.sum(ggx.first * exp_y, axis: 1, keepdims: true)
+            a = Chainer::Functions::Array::BroadcastTo.broadcast_to(a, gy.shape)
+            g1 = ggx.first - a
+            ret << g1
+          end
+          ret
+        end
       end
     end
   end

data/lib/chainer/functions/activation/relu.rb CHANGED Viewed

@@ -2,14 +2,14 @@ module Chainer
   module Functions
     module Activation
       # Rectified Linear Unit.
-      class Relu < Function
+      class Relu < FunctionNode
         # Rectified Linear Unit function.
         #
         # $$
         # f(x)=\\max(0, x).
         # $$
         #
-        # @param [Chainer::Variable or Numo::NArray] x Input variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
+        # @param [Chainer::Variable or Numo::NArray or Cumo::NArray] x Input variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
         # @return [Chainer::Variable] Output variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
         # @example
         #   > x = Numo::SFloat[[-1, 0], [2, -3], [-2, 1]]
@@ -23,18 +23,18 @@ module Chainer
         #   => [3, 2]
         #
         def self.relu(x)
-          self.new.(x)
+          y, = self.new.apply([x])
+          y
         end
-        def forward_cpu(x)
-          retain_inputs([])
+        def forward(x)
           retain_outputs([0])
           [Utils::Array.force_array(x[0].class.maximum(x[0], 0))]
         end
-        def backward_cpu(x, gy)
-          y = @output_data[0]
-          [Utils::Array.force_array(gy[0] * (y > 0))]
+        def backward(indexes, gy)
+          y = get_retained_outputs.first
+          ReLUGrad2.new(y).apply([gy[0]])
         end
       end
     end

data/lib/chainer/functions/activation/relu_grad2.rb ADDED Viewed

@@ -0,0 +1,34 @@
+module Chainer
+  module Functions
+    module Activation
+      # Computes the gradient of the ReLU function.
+      #
+      # This function takes 2 variables b and c, and
+      # computes f(b, c) = sign(b) * c with backpropagation
+      # where operations are dones in elementwise manner
+      # and sign(x) = 1 when x > 0 is positive and 0 otherwise.
+      # As the gradient of f with respect to b is 0,
+      # we do not backpropagate errors toward b for computational efficiency.<Paste>
+      class ReLUGrad2 < FunctionNode
+        def initialize(b)
+          @b = b.data
+        end
+        def forward(inputs)
+          y = inputs[0] * (@b > 0)
+          [Utils::Array.force_array(y, y.class)]
+        end
+        def backward(indexes, gy)
+          [gy[0] * heaviside(@b)]
+        end
+        private
+        def heaviside(x)
+          (x > 0).cast_to(x.class)
+        end
+      end
+    end
+  end
+end

data/lib/chainer/functions/activation/sigmoid.rb CHANGED Viewed

@@ -2,14 +2,14 @@ module Chainer
   module Functions
     module Activation
       # Logistic sigmoid function.
-      class Sigmoid < Function
+      class Sigmoid < FunctionNode
         # Element-wise sigmoid logistic function.
         #
         # $$
         # f(x)=(1 + \\exp(-x))^ { -1 }.
         # $$
         #
-        # @param [Chainer::Variable or Numo::NArray] x Input variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
+        # @param [Chainer::Variable or Numo::NArray or Cumo::NArray] x Input variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
         # @return [Chainer::Variable] Output variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
         # @example  It maps the input values into the range of $`[0, 1]`$.
         #   > x = Numo::SFloat.new(3).seq(-2, 2)
@@ -21,21 +21,23 @@ module Chainer
         #   [0.119203, 0.5, 0.880797]
         #
         def self.sigmoid(x)
-          self.new.(x)
+          self.new.apply([x]).first
         end
-        def forward_cpu(x)
+        def forward(inputs)
+          x, = inputs
           half = 0.5
-          y = Utils::Array.force_array((Numo::NMath.tanh(x[0] * half) * half)+ half)
-          retain_inputs([])
+          xm = Chainer.get_array_module(x)
+          y = Utils::Array.force_array((xm::NMath.tanh(x * half) * half)+ half)
           retain_outputs([0])
-          return [y]
+          [y]
         end
-        def backward_cpu(x, gy)
-          one = 1
-          y = @output_data[0]
-          [Utils::Array.force_array((gy[0] * y) * (one - y))]
+        def backward(indexes, grad_outputs)
+          x = nil
+          y = get_retained_outputs.first
+          gy, = grad_outputs
+          Chainer::Functions::Activation::SigmoidGrad.new([x]).apply([y, gy])
         end
       end
     end

data/lib/chainer/functions/activation/sigmoid_grad.rb ADDED Viewed

@@ -0,0 +1,25 @@
+module Chainer
+  module Functions
+    module Activation
+      # Logistic sigmoid gradient function.
+      class SigmoidGrad < FunctionNode
+        def initialize(inputs)
+          @x, = inputs
+        end
+        def forward(inputs)
+          retain_inputs([0, 1])
+          y, gy = inputs
+          one = 1
+          [Utils::Array.force_array(gy * y * (one - y))]
+        end
+        def backward(indexes, grad_outputs)
+          y, gy = get_retained_inputs
+          g, = grad_outputs
+          [g * gy * ( 1 -2 * y), g * y * (1 - y)]
+        end
+      end
+    end
+  end
+end

data/lib/chainer/functions/activation/tanh.rb CHANGED Viewed

@@ -2,14 +2,14 @@ module Chainer
   module Functions
     module Activation
       # Hyperbolic tangent function.
-      class Tanh < Function
+      class Tanh < FunctionNode
         # Elementwise hyperbolic tangent function.
         #
         # $$
         # f(x)=\\tanh(x).
         # $$
         #
-        # @param [Chainer::Variable or Numo::NArray] x Input variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
+        # @param [Chainer::Variable or Numo::NArray or Cumo::NArray] x Input variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
         # @return [Chainer::Variable] Output variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
         # @example
         #   > x = Numo::SFloat.new(3).seq(-1, 2)
@@ -21,20 +21,57 @@ module Chainer
         #   [-0.761594, 0.761594, 0.995055]
         #
         def self.tanh(x)
-          self.new.(x)
+          self.new.apply([x]).first
         end
-        def forward_cpu(x)
-          y = Utils::Array.force_array(Numo::NMath.tanh(x[0]))
-          retain_inputs([])
+        def forward(x)
+          xm = Chainer.get_array_module(x[0])
+          y = Utils::Array.force_array(xm::NMath.tanh(x[0]))
           retain_outputs([0])
-          return [y]
+          @use_cudnn = false
+          [y]
         end
-        def backward_cpu(x, gy)
-          y = @output_data[0]
-          one = y.class.cast(1)
-          [Utils::Array.force_array(gy[0] * (one - y * y))]
+        def backward(indexes, grad_outputs)
+          if @use_cudnn
+            x = get_retained_inputs.first.data
+          else
+            x = nil
+          end
+          y = get_retained_outputs.first
+          gy = grad_outputs.first
+          TanhGrad.new(x).apply([y, gy])
+        end
+      end
+      class TanhGrad < FunctionNode
+        def initialize(x)
+          super()
+          # The original input `x` is only required for cuDNN.
+          # If it is None, this class does not use cuDNN.
+          # Note that x must be c-contiguous and it is checked
+          # in Tanh.forward_gpu.
+          @x = x
+        end
+        def forward(inputs)
+          retain_inputs([0, 1])
+          y, gy = inputs
+          one = y.class.new.fill(1)
+          [Utils::Array.force_array(gy * (one - y * y))]
+        end
+        def backward(indexes, grad_outputs)
+          y, gy = get_retained_inputs
+          g = grad_outputs[0]
+          y_mul_g = y * g
+          grad_y = -2 * gy * y_mul_g
+          ggy = g - y * y_mul_g
+          [grad_y, ggy]
         end
       end
     end

data/lib/chainer/functions/array/broadcast_to.rb ADDED Viewed

@@ -0,0 +1,56 @@
+module Chainer
+  module Functions
+    module Array
+      # Function that broadcasts an array to a new shape.
+      class BroadcastTo < FunctionNode
+        def initialize(shape)
+            @shape = shape
+        end
+        def self.broadcast_to(x, shape)
+          return Chainer::Variable.as_variable(x) if x.shape == shape
+          self.new(shape).apply([x]).first
+        end
+        def forward(inputs)
+          x = inputs.first
+          [Chainer::Utils::Array.broadcast_to(x, @shape)]
+        end
+        def backward(indexes, grad_outputs)
+          gx = grad_outputs.first
+          shape = @inputs.first.shape
+          ndim = shape.size
+          lead = gx.ndim - ndim
+          lead_axis = lead.times.to_a
+          axis = shape.each_with_object([]).with_index do |(sx, res), i|
+            next unless sx == 1
+            res << i + lead
+          end
+          gx = Chainer::Functions::Math::Sum.sum(gx, axis: lead_axis + axis, keepdims: true)
+          return [Chainer::Functions::Array::Squeeze.squeeze(gx, axis: lead_axis)] if lead > 0
+          [gx]
+        end
+        private
+        def backward_one(shape, dtype, g)
+          return dtype.zeros(shape) unless g
+          ndim = shape.size
+          if g.ndim != ndim
+            g = g.sum(axis: 0...(g.ndim - ndim))
+          end
+          axis = shape.each_with_index.select{|sx, i| sx == 1 }.map{|sx, i| i }
+          if axis.size > 0
+            g.sum(keepdims: true, axis: axis)
+          else
+            g
+          end
+        end
+      end
+    end
+  end
+end