RubyGems - red-chainer - Versions diffs - 0.3.2 → 0.4.0 - Mend

red-chainer 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

checksums.yaml +4 -4
data/.gitignore +2 -2
data/.travis.yml +8 -3
data/.yardopts +1 -0
data/Gemfile +6 -1
data/README.md +34 -3
data/examples/cifar/train_cifar.rb +13 -2
data/examples/iris/iris.rb +9 -5
data/examples/mnist/mnist.rb +16 -4
data/lib/chainer.rb +17 -1
data/lib/chainer/backend.rb +27 -0
data/lib/chainer/cuda.rb +37 -15
data/lib/chainer/dataset/convert.rb +20 -16
data/lib/chainer/datasets/cifar.rb +8 -6
data/lib/chainer/datasets/mnist.rb +14 -55
data/lib/chainer/device.rb +88 -0
data/lib/chainer/function.rb +103 -41
data/lib/chainer/function_node.rb +454 -0
data/lib/chainer/functions/activation/leaky_relu.rb +38 -13
data/lib/chainer/functions/activation/log_softmax.rb +46 -9
data/lib/chainer/functions/activation/relu.rb +8 -8
data/lib/chainer/functions/activation/relu_grad2.rb +34 -0
data/lib/chainer/functions/activation/sigmoid.rb +13 -11
data/lib/chainer/functions/activation/sigmoid_grad.rb +25 -0
data/lib/chainer/functions/activation/tanh.rb +48 -11
data/lib/chainer/functions/array/broadcast_to.rb +56 -0
data/lib/chainer/functions/array/cast.rb +41 -0
data/lib/chainer/functions/array/reshape.rb +28 -0
data/lib/chainer/functions/array/rollaxis.rb +57 -0
data/lib/chainer/functions/array/select_item.rb +72 -0
data/lib/chainer/functions/array/squeeze.rb +78 -0
data/lib/chainer/functions/array/transpose.rb +44 -0
data/lib/chainer/functions/connection/convolution_2d.rb +43 -26
data/lib/chainer/functions/connection/convolution_2d_grad_w.rb +48 -0
data/lib/chainer/functions/connection/deconvolution_2d.rb +159 -0
data/lib/chainer/functions/connection/linear.rb +29 -22
data/lib/chainer/functions/evaluation/accuracy.rb +5 -5
data/lib/chainer/functions/loss/mean_squared_error.rb +21 -12
data/lib/chainer/functions/loss/softmax_cross_entropy.rb +98 -71
data/lib/chainer/functions/math/basic_math.rb +36 -30
data/lib/chainer/functions/math/exp.rb +28 -0
data/lib/chainer/functions/math/identity.rb +4 -3
data/lib/chainer/functions/math/sum.rb +52 -0
data/lib/chainer/functions/noise/dropout.rb +20 -4
data/lib/chainer/functions/normalization/batch_normalization.rb +257 -104
data/lib/chainer/functions/pooling/average_pooling_2d.rb +29 -6
data/lib/chainer/functions/pooling/max_pooling_2d.rb +67 -12
data/lib/chainer/functions/pooling/pooling_2d.rb +6 -4
data/lib/chainer/gradient_check.rb +157 -73
data/lib/chainer/gradient_method.rb +3 -2
data/lib/chainer/initializers/init.rb +5 -5
data/lib/chainer/initializers/normal.rb +4 -2
data/lib/chainer/initializers/uniform.rb +15 -0
data/lib/chainer/iterators/serial_iterator.rb +5 -3
data/lib/chainer/link.rb +4 -2
data/lib/chainer/links/connection/convolution_2d.rb +2 -2
data/lib/chainer/links/model/classifier.rb +24 -5
data/lib/chainer/links/normalization/batch_normalization.rb +7 -10
data/lib/chainer/optimizer.rb +42 -11
data/lib/chainer/optimizers/adam.rb +3 -2
data/lib/chainer/optimizers/momentum_sgd.rb +1 -1
data/lib/chainer/parameter.rb +7 -6
data/lib/chainer/serializer.rb +4 -4
data/lib/chainer/serializers/marshal.rb +10 -8
data/lib/chainer/testing/array.rb +1 -1
data/lib/chainer/training/extensions/evaluator.rb +2 -3
data/lib/chainer/training/extensions/exponential_shift.rb +1 -1
data/lib/chainer/training/extensions/progress_bar.rb +1 -0
data/lib/chainer/training/trainer.rb +4 -9
data/lib/chainer/training/triggers/interval.rb +7 -2
data/lib/chainer/utils/array.rb +80 -1
data/lib/chainer/utils/conv.rb +10 -2
data/lib/chainer/utils/initializer.rb +2 -2
data/lib/chainer/variable.rb +159 -69
data/lib/chainer/variable_node.rb +64 -10
data/lib/chainer/version.rb +1 -1
data/red-chainer.gemspec +4 -3
data/templates/default/layout/html/layout.erb +40 -0
data/templates/default/onefile/html/layout.erb +33 -0
metadata +44 -11
data/lib/chainer/dataset/download.rb +0 -56

data/lib/chainer/functions/array/cast.rb ADDED Viewed

@@ -0,0 +1,41 @@
+module Chainer
+  module Functions
+    module Array
+      class Cast < FunctionNode
+        # Cast an input variable to a given type.
+        #
+        # @param x [Chainer::Variable or Numo::Narray] x : Input variable to be casted.
+        # @param type [Numo::Narray class] type : data class to cast
+        # @return [Chainer::Variable] Variable holding a casted array.
+        #
+        # example
+        # > x = Numo::UInt8.new(3, 5).seq
+        # > x.class
+        # # => Numo::UInt8
+        # > y = Chainer::Functions::Array::Cast.cast(x, Numo::DFloat)
+        # > y.dtype
+        # # => Numo::DFloat
+        def self.cast(x, type)
+          if (Chainer.array?(x) && x.class == type) || (x.is_a?(Chainer::Variable) && x.dtype == type)
+            return Chainer::Variable.as_variable(x)
+          end
+          self.new(type).apply([x]).first
+        end
+        def initialize(type)
+            @type = type
+        end
+        def forward(x)
+          @in_type = x.first.class
+          [x.first.cast_to(@type)]
+        end
+        def backward(indexes, g)
+          [Cast.cast(g.first, @in_type)]
+        end
+      end
+    end
+  end
+end

data/lib/chainer/functions/array/reshape.rb ADDED Viewed

@@ -0,0 +1,28 @@
+module Chainer
+  module Functions
+    module Array
+      # Reshapes an input array without copy.
+      class Reshape < FunctionNode
+        def initialize(shape)
+          @shape = shape
+        end
+        def self.reshape(x, shape)
+          return Chainer::Variable.as_variable(x) if x.shape == shape
+          return self.new(shape).apply([x]).first
+        end
+        def forward(inputs)
+          x = inputs.first
+          new_shape = @shape.map { |s| s == -1 ? nil : s }
+          [x.reshape(*new_shape)]
+        end
+        def backward(indexes, grad_outputs)
+          gx = grad_outputs.first
+          [Reshape.reshape(gx, @inputs.first.shape)]
+        end
+      end
+    end
+  end
+end

data/lib/chainer/functions/array/rollaxis.rb ADDED Viewed

@@ -0,0 +1,57 @@
+module Chainer
+  module Functions
+    module Array
+      # Roll axis of an array.
+      class Rollaxis < FunctionNode
+        # Roll the axis backwards to the given position.
+        #
+        # @param [Chainer::Variable] x Input variable
+        # @param [Integer] axis The axis to roll backwards.
+        # @param [Integer] start The place to which the axis is moved.
+        # @return [Chainer::Variable] Variable whose axis is rolled.
+        def self.rollaxis(x, axis, start: 0)
+          Rollaxis.new(axis, start).apply([x]).first
+        end
+        def initialize(axis, start)
+          unless axis.is_a?(Integer)
+            raise ArgumentError, 'axis must be int'
+          end
+          unless start.is_a?(Integer)
+            raise ArgumentError, 'start must be int'
+          end
+          @axis = axis
+          @start = start
+        end
+        def forward(inputs)
+          retain_inputs([])
+          @in_ndim = inputs.first.ndim
+          [Chainer::Utils::Array.rollaxis(inputs.first, @axis, start: @start)]
+        end
+        def backward(indexes, gy)
+          axis = @axis
+          if axis < 0
+            axis += @in_ndim
+          end
+          start = @start
+          if start < 0
+            start += @in_ndim
+          end
+          if axis > start
+            axis += 1
+          else
+            start -= 1
+          end
+          Rollaxis.new(start, axis).apply(gy)
+        end
+      end
+    end
+  end
+end

data/lib/chainer/functions/array/select_item.rb ADDED Viewed

@@ -0,0 +1,72 @@
+module Chainer
+  module Functions
+    module Array
+      # Select elements stored in given indices.
+      class SelectItem < FunctionNode
+        # Select elements stored in given indices.
+        #  This function returns $t.choose(x.T)$, that means
+        #  $y[i] == x[i, t[i]]$ for all $i$.
+        #
+        #  @param [Chainer::Variable] x Variable storing arrays.
+        #  @param [Chainer::Variable] t Variable storing index numbers.
+        #  @return [Chainer::Variable] Variable that holds $t$-th element of $x$.
+        def self.select_item(x, t)
+          SelectItem.new.apply([x, t]).first
+        end
+        def forward(inputs)
+          retain_inputs([1])
+          x, t = inputs
+          @in_shape = x.shape
+          @in_dtype = x.class
+          # TODO: x[six.moves.range(t.size), t]
+          new_x = x.class.zeros(t.size)
+          t.size.times.each do |i|
+            new_x[i] = x[i, t[i]]
+          end
+          x = new_x
+          [x]
+        end
+        def backward(indexes, gy)
+          t = get_retained_inputs.first
+          ret = []
+          if indexes.include?(0)
+            ggx = Assign.new(@in_shape, @in_dtype, t).apply(gy).first
+            ret << ggx
+          end
+          if indexes.include?(1)
+            ret << nil
+          end
+          ret
+        end
+      end
+      class Assign < FunctionNode
+        def initialize(shape, dtype, t)
+          @shape = shape
+          @dtype = dtype
+          @t = t.data
+        end
+        def forward(inputs)
+          gx = @dtype.zeros(*@shape)
+          # TODO: gx[six.moves.range(self.t.size), self.t] = inputs[0]
+          # binding.pry
+          @t.size.times.each do |i|
+            gx[i, @t[i]] = inputs[0][i]
+          end
+          [gx]
+        end
+        def backward(indexes, gy)
+          SelectItem.new.apply([gy[0], @t])
+        end
+      end
+    end
+  end
+end

data/lib/chainer/functions/array/squeeze.rb ADDED Viewed

@@ -0,0 +1,78 @@
+module Chainer
+  module Functions
+    module Array
+      class Squeeze < FunctionNode
+        # Remove demensions of size one from the shape of a Numo::NArray.
+        # @param [Chainer::Variable or Numo::NArray] x Input data.
+        # @param [nil or integer or array of integer] axis A subset of the single-dimensional entries in the shape to remove.
+        #   If `nil` is supplied, all of them are removed. The dimension index starts at zero.
+        #   If an axis with dimension greater than one is selected, an error is raised.
+        # @return [Chainer::Variable] Variable whose dimensions of size 1 are removed.
+        def self.squeeze(x, axis: nil)
+          self.new(axis: axis).apply([x]).first
+        end
+        def initialize(axis: nil)
+          if axis.nil?
+            @axis = nil
+          elsif axis.kind_of?(Integer)
+            @axis = [axis]
+          elsif axis.kind_of?(::Array) && Array(axis).all? { |i| i.kind_of?(Integer) }
+            @axis = axis
+          else
+            raise TypeError, 'axis must be None, int or tuple of ints'
+          end
+        end
+        def forward(inputs)
+          x = inputs.first
+          shape = x.shape
+          # TODO: numpy.squeeze
+          if @axis.nil?
+            new_shape = shape.reject { |axis| axis == 1 }
+          else
+            new_shape = shape
+            @axis.map do |a|
+              raise StandardError, "cannot select an axis to squeeze out which has size not equal to one" unless shape[a] == 1
+              new_shape[a] = nil
+            end
+            new_shape.compact!
+          end
+          ret = new_shape.size.zero? ? x.class.new.fill(x[0]) : x.reshape(*new_shape)
+          [ret]
+        end
+        def backward(indexes, grad_outputs)
+          if @axis.nil?
+            axis = argone(@inputs[0].shape)
+          else
+            axis = @axis
+            ndim = @inputs[0].shape.size
+            axis = axis.map { |x| x < 0 ? x + ndim : x }
+            axis.sort!
+          end
+          gx = grad_outputs.first
+          shape = gx.shape
+          axis.each do |x|
+            shape.insert(x, 1)
+          end
+          [gx.reshape(*shape)]
+        end
+        private
+        def argone(iterable)
+          result = []
+          Array(iterable).each_with_index do |x, i|
+            raise StandardError, "elements in iterable must be int" unless x.kind_of?(Integer)
+            result << i if x == 1
+          end
+          result
+        end
+      end
+    end
+  end
+end

data/lib/chainer/functions/array/transpose.rb ADDED Viewed

@@ -0,0 +1,44 @@
+module Chainer
+  module Functions
+    module Array
+      # Permute the dimensions of an array.
+      class Transpose < FunctionNode
+        # Permute the dimensions of an input variable without copy.
+        #
+        # @param [Chainer::Variable] x Input Variable.
+        # @param [::Array<Integer>] axes By default, reverse the dimensions,
+        #   otherwise permute the axes according to the values given.
+        # @return [Chainer::Variable] Variable whose axes are permuted.
+        def self.transpose(x, axes: nil)
+          Transpose.new(axes: axes).apply([x]).first
+        end
+        def initialize(axes: nil)
+          @axes = axes
+        end
+        def label
+          'Transpose'
+        end
+        def forward(inputs)
+          x = inputs.first
+          [x.transpose(*@axes)]
+        end
+        def backward(indexes, grad_outputs)
+          inv_axes = @axes
+          if inv_axes
+            axes_len = inv_axes.size
+            axes = inv_axes.map { |ax| ax % axes_len }
+            inv_axes = Numo::NArray[*axes].sort_index.to_a
+          end
+          Transpose.new(axes: inv_axes).apply(grad_outputs)
+        end
+      end
+    end
+  end
+end

data/lib/chainer/functions/connection/convolution_2d.rb CHANGED Viewed

@@ -1,7 +1,8 @@
 module Chainer
   module Functions
     module Connection
-      class Convolution2DFunction < Chainer::Function
+      class Convolution2DFunction < Chainer::FunctionNode
+        attr_reader :sy, :sx, :ph, :pw, :cover_all
         # Two-dimensional convolution function.
         # This is an implementation of two-dimensional convolution in ConvNets.
         # It takes three variables: the input image `x`, the filter weight `w`, and the bias vector `b`.
@@ -33,9 +34,9 @@ module Chainer
         #  w_O &= (w_I + 2w_P - w_K + s_X - 1) / s_X + 1.
         # If the bias vector is given, then it is added to all spatial locations of the output of convolution.
         #
-        # @param [Chainer::Variable or Numo::NArray] x Input variable of shape :math:`(n, c_I, h_I, w_I)`.
-        # @param [Chainer::Variable or Numo::NArray] w Weight variable of shape :math:`(c_O, c_I, h_K, w_K)`.
-        # @param [Chainer::Variable or Numo::NArray] b Bias variable of length :math:`c_O`
+        # @param [Chainer::Variable or Numo::NArray or Cumo::NArray] x Input variable of shape :math:`(n, c_I, h_I, w_I)`.
+        # @param [Chainer::Variable or Numo::NArray or Cumo::NArray] w Weight variable of shape :math:`(c_O, c_I, h_K, w_K)`.
+        # @param [Chainer::Variable or Numo::NArray or Cumo::NArray] b Bias variable of length :math:`c_O`
         # @param [Int or 2-D Array] stride Stride of filter applications. `stride=s` and `stride=(s, s)` are equivalent.
         # @param [Int or 2-D Array] pad Spatial padding width for input arrays.
         # @param [Boolean] cover_all If `true`, all spatial locations are convoluted into some output pixels.
@@ -43,48 +44,64 @@ module Chainer
         def self.convolution_2d(x, w, b: nil, stride: 1, pad: 0, cover_all: false)
           func = self.new(stride: stride, pad: pad, cover_all: cover_all)
           if b.nil?
-              func.(x, w)
+            args = [x, w]
           else
-              func.(x, w, b)
+            args = [x, w, b]
           end
+          func.apply(args).first
         end
         def initialize(stride: 1, pad: 0, cover_all: false)
-          @sy, @sx = stride.is_a?(Array) ? stride : [stride, stride]
-          @ph, @pw = pad.is_a?(Array) ? pad : [pad, pad]
+          @sy, @sx = stride.is_a?(::Array) ? stride : [stride, stride]
+          @ph, @pw = pad.is_a?(::Array) ? pad : [pad, pad]
           @cover_all = cover_all
         end
-        def forward_cpu(inputs)
+        def forward(inputs)
+          retain_inputs([0, 1])
           x = inputs[0]
           w = inputs[1]
           b = inputs.size == 3 ? inputs[2] : nil
-          kh, kw = w.shape[2], w.shape[3]
+          unless inputs.all? { |i| i.is_a?(Numo::NArray) }
+            if b.nil?
+              raise TypeError, "Numo::NArray must not be used together w: #{w.class}, x: #{x.class}"
+            else
+              raise TypeError, "Numo::NArray must not be used together w: #{w.class}, x: #{x.class}, b: #{b.class}"
+            end
+          end
-          @col = Chainer::Utils::Conv.im2col_cpu(x, kh, kw, @sy, @sx, @ph, @pw, cover_all: @cover_all)
-          y = Chainer::Utils::Math.tensordot(@col, w, [[1, 2, 3], [1, 2, 3]])
+          kh, kw = w.shape[2..-1]
+          col = Chainer::Utils::Conv.im2col(x, kh, kw, @sy, @sx, @ph, @pw, cover_all: @cover_all)
+          y = Chainer::Utils::Math.tensordot(col, w, [[1, 2, 3], [1, 2, 3]]).cast_to(x.class)
           y += b if b
           [y.transpose(0, 3, 1, 2)]
         end
-        def backward_cpu(inputs, grad_outputs)
-          x, w, b = inputs[0], inputs[1], inputs[2]
-          gy = grad_outputs[0]
-          height, width = x.shape[2..-1]
+        def backward(indexes, grad_outputs)
+          x, w = get_retained_inputs
+          gy = grad_outputs.first
-          gw = Chainer::Utils::Math.tensordot(gy, @col, [[0, 2, 3], [0, 4, 5]])
-          gcol = Chainer::Utils::Math.tensordot(w, gy, [0, 1])
-          gcol = gcol.transpose(3, 0, 1, 2)
-          gx = Chainer::Utils::Conv.col2im_cpu(gcol, @sy, @sx, @ph, @pw, height, width)
+          ret = []
+          if indexes.include?(0)
+            xh, xw = x.shape[2..-1]
+            gx = Deconvolution2DFunction.deconvolution_2d(gy, w, stride: [@sy, @sx], pad: [@ph, @pw], outsize: [xh, xw])
+            ret << gx
+          end
-          if b.nil?
-            [gx, gw]
-          else
-            gb = gy.sum(axis: [0, 2, 3])
-            [gx, gw, gb]
+          if indexes.include?(1)
+            gw = Chainer::Functions::Connection::Convolution2DGradW.new(self).apply([x, gy]).first
+            ret << gw
+          end
+          if indexes.include?(2)
+            gb = Chainer::Functions::Math::Sum.sum(gy, axis: [0, 2, 3])
+            ret << gb
           end
+          ret
         end
       end
     end

data/lib/chainer/functions/connection/convolution_2d_grad_w.rb ADDED Viewed

@@ -0,0 +1,48 @@
+module Chainer
+  module Functions
+    module Connection
+      class Convolution2DGradW < Chainer::FunctionNode
+        def initialize(conv2d)
+          w_node = conv2d.inputs[1]
+          @kh, @kw = w_node.shape[2..-1]
+          @sy = conv2d.sy
+          @sx = conv2d.sx
+          @ph = conv2d.ph
+          @pw = conv2d.pw
+          @cover_all = conv2d.cover_all
+          @w_dtype = w_node.dtype
+        end
+        def forward(inputs)
+          retain_inputs([0, 1])
+          x, gy = inputs
+          col = Chainer::Utils::Conv.im2col(x, @kh, @kw, @sy, @sx, @ph, @pw, cover_all: @cover_all)
+          gw = Chainer::Utils::Math.tensordot(gy, col, [[0, 2, 3], [0, 4, 5]]).cast_to(@w_dtype)
+          [gw]
+        end
+        def backward(indexes, grad_outputs)
+          x, gy = get_retained_inputs
+          ggw = grad_outputs.first
+          ret = []
+          if indexes.include?(0)
+            xh, xw = x.shape[2..-1]
+            gx = Deconvolution2DFunction.deconvolution_2d(gy, ggw, stride: [@sy, @sx], pad: [@ph, @pw], outsize: [xh, xw])
+            ret << gx
+          end
+          if indexes.include?(1)
+            ggy = Chainer::Functions::Connection::Convolution2DFunction.convolution_2d(x, ggw, stride: [@sy, @sx], pad: [@ph, @pw], cover_all: @cover_all)
+            ret << ggy
+          end
+          ret
+        end
+      end
+    end
+  end
+end