red-chainer 0.3.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -2
  3. data/.travis.yml +8 -3
  4. data/.yardopts +1 -0
  5. data/Gemfile +6 -1
  6. data/README.md +34 -3
  7. data/examples/cifar/train_cifar.rb +13 -2
  8. data/examples/iris/iris.rb +9 -5
  9. data/examples/mnist/mnist.rb +16 -4
  10. data/lib/chainer.rb +17 -1
  11. data/lib/chainer/backend.rb +27 -0
  12. data/lib/chainer/cuda.rb +37 -15
  13. data/lib/chainer/dataset/convert.rb +20 -16
  14. data/lib/chainer/datasets/cifar.rb +8 -6
  15. data/lib/chainer/datasets/mnist.rb +14 -55
  16. data/lib/chainer/device.rb +88 -0
  17. data/lib/chainer/function.rb +103 -41
  18. data/lib/chainer/function_node.rb +454 -0
  19. data/lib/chainer/functions/activation/leaky_relu.rb +38 -13
  20. data/lib/chainer/functions/activation/log_softmax.rb +46 -9
  21. data/lib/chainer/functions/activation/relu.rb +8 -8
  22. data/lib/chainer/functions/activation/relu_grad2.rb +34 -0
  23. data/lib/chainer/functions/activation/sigmoid.rb +13 -11
  24. data/lib/chainer/functions/activation/sigmoid_grad.rb +25 -0
  25. data/lib/chainer/functions/activation/tanh.rb +48 -11
  26. data/lib/chainer/functions/array/broadcast_to.rb +56 -0
  27. data/lib/chainer/functions/array/cast.rb +41 -0
  28. data/lib/chainer/functions/array/reshape.rb +28 -0
  29. data/lib/chainer/functions/array/rollaxis.rb +57 -0
  30. data/lib/chainer/functions/array/select_item.rb +72 -0
  31. data/lib/chainer/functions/array/squeeze.rb +78 -0
  32. data/lib/chainer/functions/array/transpose.rb +44 -0
  33. data/lib/chainer/functions/connection/convolution_2d.rb +43 -26
  34. data/lib/chainer/functions/connection/convolution_2d_grad_w.rb +48 -0
  35. data/lib/chainer/functions/connection/deconvolution_2d.rb +159 -0
  36. data/lib/chainer/functions/connection/linear.rb +29 -22
  37. data/lib/chainer/functions/evaluation/accuracy.rb +5 -5
  38. data/lib/chainer/functions/loss/mean_squared_error.rb +21 -12
  39. data/lib/chainer/functions/loss/softmax_cross_entropy.rb +98 -71
  40. data/lib/chainer/functions/math/basic_math.rb +36 -30
  41. data/lib/chainer/functions/math/exp.rb +28 -0
  42. data/lib/chainer/functions/math/identity.rb +4 -3
  43. data/lib/chainer/functions/math/sum.rb +52 -0
  44. data/lib/chainer/functions/noise/dropout.rb +20 -4
  45. data/lib/chainer/functions/normalization/batch_normalization.rb +257 -104
  46. data/lib/chainer/functions/pooling/average_pooling_2d.rb +29 -6
  47. data/lib/chainer/functions/pooling/max_pooling_2d.rb +67 -12
  48. data/lib/chainer/functions/pooling/pooling_2d.rb +6 -4
  49. data/lib/chainer/gradient_check.rb +157 -73
  50. data/lib/chainer/gradient_method.rb +3 -2
  51. data/lib/chainer/initializers/init.rb +5 -5
  52. data/lib/chainer/initializers/normal.rb +4 -2
  53. data/lib/chainer/initializers/uniform.rb +15 -0
  54. data/lib/chainer/iterators/serial_iterator.rb +5 -3
  55. data/lib/chainer/link.rb +4 -2
  56. data/lib/chainer/links/connection/convolution_2d.rb +2 -2
  57. data/lib/chainer/links/model/classifier.rb +24 -5
  58. data/lib/chainer/links/normalization/batch_normalization.rb +7 -10
  59. data/lib/chainer/optimizer.rb +42 -11
  60. data/lib/chainer/optimizers/adam.rb +3 -2
  61. data/lib/chainer/optimizers/momentum_sgd.rb +1 -1
  62. data/lib/chainer/parameter.rb +7 -6
  63. data/lib/chainer/serializer.rb +4 -4
  64. data/lib/chainer/serializers/marshal.rb +10 -8
  65. data/lib/chainer/testing/array.rb +1 -1
  66. data/lib/chainer/training/extensions/evaluator.rb +2 -3
  67. data/lib/chainer/training/extensions/exponential_shift.rb +1 -1
  68. data/lib/chainer/training/extensions/progress_bar.rb +1 -0
  69. data/lib/chainer/training/trainer.rb +4 -9
  70. data/lib/chainer/training/triggers/interval.rb +7 -2
  71. data/lib/chainer/utils/array.rb +80 -1
  72. data/lib/chainer/utils/conv.rb +10 -2
  73. data/lib/chainer/utils/initializer.rb +2 -2
  74. data/lib/chainer/variable.rb +159 -69
  75. data/lib/chainer/variable_node.rb +64 -10
  76. data/lib/chainer/version.rb +1 -1
  77. data/red-chainer.gemspec +4 -3
  78. data/templates/default/layout/html/layout.erb +40 -0
  79. data/templates/default/onefile/html/layout.erb +33 -0
  80. metadata +44 -11
  81. data/lib/chainer/dataset/download.rb +0 -56
@@ -2,7 +2,7 @@ module Chainer
2
2
  module Functions
3
3
  module Activation
4
4
  # Leaky rectifier unit.
5
- class LeakyReLU < Function
5
+ class LeakyReLU < FunctionNode
6
6
  # Leaky Rectified Linear Unit function.
7
7
  #
8
8
  # This function is expressed as
@@ -13,7 +13,7 @@ module Chainer
13
13
  #
14
14
  # where $a$ is a configurable slope value.
15
15
  #
16
- # @param [Chainer::Variable or Numo::NArray] x Input variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
16
+ # @param [Chainer::Variable or Numo::NArray or Cumo::NArray] x Input variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
17
17
  # @param [float] slope Slope value $a$.
18
18
  # @return [Chainer::Variable] Output variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
19
19
  # @example
@@ -31,32 +31,57 @@ module Chainer
31
31
  # [-0.4, 1]]
32
32
  #
33
33
  def self.leaky_relu(x, slope: 0.2)
34
- self.new(slope: slope).(x)
34
+ self.new(slope: slope).apply([x])[0]
35
35
  end
36
36
 
37
37
  def initialize(slope:0.2)
38
38
  @slope = slope
39
39
  end
40
40
 
41
- def forward_cpu(x)
42
- y = x[0].dup()
43
- y[x[0] < 0] *= @slope
41
+ def forward(inputs)
42
+ x, = inputs
43
+ y = x.dup
44
+ y[x < 0] *= @slope
44
45
  if @slope >= 0
45
- retain_inputs([])
46
46
  retain_outputs([0])
47
+ else
48
+ retain_inputs([0])
47
49
  end
48
50
  [y]
49
51
  end
50
52
 
51
- def backward_cpu(x, gy)
52
- gx = gy[0].dup()
53
+ def backward(indexes, grad_outputs)
54
+ if @slope >= 0
55
+ x = nil
56
+ y = get_retained_outputs.first.data
57
+ else
58
+ x = get_retained_inputs.first.data
59
+ y = nil
60
+ end
61
+ LeakyReLUGrad.new(x, y, @slope).apply(grad_outputs)
62
+ end
63
+ end
64
+
65
+ class LeakyReLUGrad < FunctionNode
66
+ def initialize(x, y, slope)
67
+ @x = x
68
+ @y = y
69
+ @slope = slope
70
+ end
71
+
72
+ def forward(inputs)
73
+ gy, = inputs
74
+ gy = gy.dup
53
75
  if @slope >= 0
54
- y = @output_data
55
- gx[y[0] < 0] *= @slope
76
+ gy[@y < 0] *= @slope
56
77
  else
57
- gx[x[0] < 0] *= @slope
78
+ gy[@x < 0] *= @slope
58
79
  end
59
- [gx]
80
+ [gy]
81
+ end
82
+
83
+ def backward(indexes, grad_outputs)
84
+ LeakyReLUGrad.new(@x, @y, @slope).apply(grad_outputs)
60
85
  end
61
86
  end
62
87
  end
@@ -2,11 +2,12 @@ module Chainer
2
2
  module Functions
3
3
  module Activation
4
4
  def self.logsumexp(x)
5
+ xm = Chainer.get_array_module(x)
5
6
  m = x.max(axis: 1, keepdims: true)
6
7
  y = x - m
7
- y = Numo::NMath.exp(y)
8
+ y = xm::NMath.exp(y)
8
9
  s = y.sum(axis: 1, keepdims: true)
9
- s = Numo::NMath.log(s)
10
+ s = xm::NMath.log(s)
10
11
  m + s
11
12
  end
12
13
 
@@ -16,7 +17,7 @@ module Chainer
16
17
  end
17
18
 
18
19
  # Log-softmax activation function.
19
- class LogSoftmax < Function
20
+ class LogSoftmax < FunctionNode
20
21
  # Channel-wise log-softmax function.
21
22
  #
22
23
  # This function computes its logarithm of softmax along the second axis.
@@ -36,7 +37,7 @@ module Chainer
36
37
  # because +softmax(x)+ may returns +0+.
37
38
  # +log_softmax+ method is more stable.
38
39
  #
39
- # @param [Chainer::Variable or Numo::NArray] x Input variable. A $n$-dimensional ($n \\geq 2$) float array.
40
+ # @param [Chainer::Variable or Numo::NArray or Cumo::NArray] x Input variable. A $n$-dimensional ($n \\geq 2$) float array.
40
41
  # @return [Chainer::Variable] Output variable. A $n$-dimensional ($n \\geq 2$) float array, which is the same shape with x.
41
42
  #
42
43
  # @see Chainer::Functions::Softmax
@@ -56,23 +57,59 @@ module Chainer
56
57
  # => true
57
58
  #
58
59
  def self.log_softmax(x)
59
- self.new.(x)
60
+ self.new.apply([x]).first
60
61
  end
61
62
 
62
63
  def forward(xs)
63
64
  y = Chainer::Functions::Activation._log_softmax(xs[0])
64
65
  @x_shape = xs[0].shape
65
66
  @x_dtype = xs[0].class
66
- retain_inputs([])
67
67
  retain_outputs([0])
68
68
  [y]
69
69
  end
70
70
 
71
- def backward(x, gy)
72
- y = @output_data[0]
73
- gx = gy[0] - Numo::NMath.exp(y) * gy[0].sum(axis: 1, keepdims: true)
71
+ def backward(indexes, gy)
72
+ y = get_retained_outputs.first
73
+ LogSoftmaxGrad.new(@x_shape, @x_dtype).apply([y, gy[0]])
74
+ end
75
+ end
76
+
77
+ class LogSoftmaxGrad < FunctionNode
78
+ def initialize(x_shape, x_dtype)
79
+ @x_shape = x_shape
80
+ @x_dtype = x_dtype
81
+ end
82
+
83
+ def forward(inputs)
84
+ retain_inputs([0, 1])
85
+ y, gy = inputs
86
+
87
+ xm = Chainer.get_array_module(y)
88
+ gx = gy - xm::NMath.exp(y) * gy.sum(axis: 1, keepdims: true)
74
89
  [gx]
75
90
  end
91
+
92
+ def backward(indexes, ggx)
93
+ y, gy = get_retained_inputs
94
+ ret = []
95
+ exp_y = Chainer::Functions::Math::Exp.exp(y)
96
+
97
+ if indexes.include?(0)
98
+ gy_sum = Chainer::Functions::Math::Sum.sum(gy, axis: 1, keepdims: true)
99
+ gy_sum = Chainer::Functions::Array::BroadcastTo.broadcast_to(gy_sum, gy.shape)
100
+
101
+ g0 = -ggx.first * exp_y * gy_sum
102
+ ret << g0
103
+ end
104
+ if indexes.include?(1)
105
+ a = Chainer::Functions::Math::Sum.sum(ggx.first * exp_y, axis: 1, keepdims: true)
106
+ a = Chainer::Functions::Array::BroadcastTo.broadcast_to(a, gy.shape)
107
+ g1 = ggx.first - a
108
+ ret << g1
109
+ end
110
+
111
+ ret
112
+ end
76
113
  end
77
114
  end
78
115
  end
@@ -2,14 +2,14 @@ module Chainer
2
2
  module Functions
3
3
  module Activation
4
4
  # Rectified Linear Unit.
5
- class Relu < Function
5
+ class Relu < FunctionNode
6
6
  # Rectified Linear Unit function.
7
7
  #
8
8
  # $$
9
9
  # f(x)=\\max(0, x).
10
10
  # $$
11
11
  #
12
- # @param [Chainer::Variable or Numo::NArray] x Input variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
12
+ # @param [Chainer::Variable or Numo::NArray or Cumo::NArray] x Input variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
13
13
  # @return [Chainer::Variable] Output variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
14
14
  # @example
15
15
  # > x = Numo::SFloat[[-1, 0], [2, -3], [-2, 1]]
@@ -23,18 +23,18 @@ module Chainer
23
23
  # => [3, 2]
24
24
  #
25
25
  def self.relu(x)
26
- self.new.(x)
26
+ y, = self.new.apply([x])
27
+ y
27
28
  end
28
29
 
29
- def forward_cpu(x)
30
- retain_inputs([])
30
+ def forward(x)
31
31
  retain_outputs([0])
32
32
  [Utils::Array.force_array(x[0].class.maximum(x[0], 0))]
33
33
  end
34
34
 
35
- def backward_cpu(x, gy)
36
- y = @output_data[0]
37
- [Utils::Array.force_array(gy[0] * (y > 0))]
35
+ def backward(indexes, gy)
36
+ y = get_retained_outputs.first
37
+ ReLUGrad2.new(y).apply([gy[0]])
38
38
  end
39
39
  end
40
40
  end
@@ -0,0 +1,34 @@
1
+ module Chainer
2
+ module Functions
3
+ module Activation
4
+ # Computes the gradient of the ReLU function.
5
+ #
6
+ # This function takes 2 variables b and c, and
7
+ # computes f(b, c) = sign(b) * c with backpropagation
8
+ # where operations are dones in elementwise manner
9
+ # and sign(x) = 1 when x > 0 is positive and 0 otherwise.
10
+ # As the gradient of f with respect to b is 0,
11
+ # we do not backpropagate errors toward b for computational efficiency.<Paste>
12
+ class ReLUGrad2 < FunctionNode
13
+ def initialize(b)
14
+ @b = b.data
15
+ end
16
+
17
+ def forward(inputs)
18
+ y = inputs[0] * (@b > 0)
19
+ [Utils::Array.force_array(y, y.class)]
20
+ end
21
+
22
+ def backward(indexes, gy)
23
+ [gy[0] * heaviside(@b)]
24
+ end
25
+
26
+ private
27
+
28
+ def heaviside(x)
29
+ (x > 0).cast_to(x.class)
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -2,14 +2,14 @@ module Chainer
2
2
  module Functions
3
3
  module Activation
4
4
  # Logistic sigmoid function.
5
- class Sigmoid < Function
5
+ class Sigmoid < FunctionNode
6
6
  # Element-wise sigmoid logistic function.
7
7
  #
8
8
  # $$
9
9
  # f(x)=(1 + \\exp(-x))^ { -1 }.
10
10
  # $$
11
11
  #
12
- # @param [Chainer::Variable or Numo::NArray] x Input variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
12
+ # @param [Chainer::Variable or Numo::NArray or Cumo::NArray] x Input variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
13
13
  # @return [Chainer::Variable] Output variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
14
14
  # @example It maps the input values into the range of $`[0, 1]`$.
15
15
  # > x = Numo::SFloat.new(3).seq(-2, 2)
@@ -21,21 +21,23 @@ module Chainer
21
21
  # [0.119203, 0.5, 0.880797]
22
22
  #
23
23
  def self.sigmoid(x)
24
- self.new.(x)
24
+ self.new.apply([x]).first
25
25
  end
26
26
 
27
- def forward_cpu(x)
27
+ def forward(inputs)
28
+ x, = inputs
28
29
  half = 0.5
29
- y = Utils::Array.force_array((Numo::NMath.tanh(x[0] * half) * half)+ half)
30
- retain_inputs([])
30
+ xm = Chainer.get_array_module(x)
31
+ y = Utils::Array.force_array((xm::NMath.tanh(x * half) * half)+ half)
31
32
  retain_outputs([0])
32
- return [y]
33
+ [y]
33
34
  end
34
35
 
35
- def backward_cpu(x, gy)
36
- one = 1
37
- y = @output_data[0]
38
- [Utils::Array.force_array((gy[0] * y) * (one - y))]
36
+ def backward(indexes, grad_outputs)
37
+ x = nil
38
+ y = get_retained_outputs.first
39
+ gy, = grad_outputs
40
+ Chainer::Functions::Activation::SigmoidGrad.new([x]).apply([y, gy])
39
41
  end
40
42
  end
41
43
  end
@@ -0,0 +1,25 @@
1
+ module Chainer
2
+ module Functions
3
+ module Activation
4
+ # Logistic sigmoid gradient function.
5
+ class SigmoidGrad < FunctionNode
6
+ def initialize(inputs)
7
+ @x, = inputs
8
+ end
9
+
10
+ def forward(inputs)
11
+ retain_inputs([0, 1])
12
+ y, gy = inputs
13
+ one = 1
14
+ [Utils::Array.force_array(gy * y * (one - y))]
15
+ end
16
+
17
+ def backward(indexes, grad_outputs)
18
+ y, gy = get_retained_inputs
19
+ g, = grad_outputs
20
+ [g * gy * ( 1 -2 * y), g * y * (1 - y)]
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -2,14 +2,14 @@ module Chainer
2
2
  module Functions
3
3
  module Activation
4
4
  # Hyperbolic tangent function.
5
- class Tanh < Function
5
+ class Tanh < FunctionNode
6
6
  # Elementwise hyperbolic tangent function.
7
7
  #
8
8
  # $$
9
9
  # f(x)=\\tanh(x).
10
10
  # $$
11
11
  #
12
- # @param [Chainer::Variable or Numo::NArray] x Input variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
12
+ # @param [Chainer::Variable or Numo::NArray or Cumo::NArray] x Input variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
13
13
  # @return [Chainer::Variable] Output variable. A $(s_1, s_2, ..., s_N)$-shaped float array.
14
14
  # @example
15
15
  # > x = Numo::SFloat.new(3).seq(-1, 2)
@@ -21,20 +21,57 @@ module Chainer
21
21
  # [-0.761594, 0.761594, 0.995055]
22
22
  #
23
23
  def self.tanh(x)
24
- self.new.(x)
24
+ self.new.apply([x]).first
25
25
  end
26
26
 
27
- def forward_cpu(x)
28
- y = Utils::Array.force_array(Numo::NMath.tanh(x[0]))
29
- retain_inputs([])
27
+ def forward(x)
28
+ xm = Chainer.get_array_module(x[0])
29
+ y = Utils::Array.force_array(xm::NMath.tanh(x[0]))
30
30
  retain_outputs([0])
31
- return [y]
31
+ @use_cudnn = false
32
+ [y]
32
33
  end
33
34
 
34
- def backward_cpu(x, gy)
35
- y = @output_data[0]
36
- one = y.class.cast(1)
37
- [Utils::Array.force_array(gy[0] * (one - y * y))]
35
+ def backward(indexes, grad_outputs)
36
+ if @use_cudnn
37
+ x = get_retained_inputs.first.data
38
+ else
39
+ x = nil
40
+ end
41
+
42
+ y = get_retained_outputs.first
43
+ gy = grad_outputs.first
44
+ TanhGrad.new(x).apply([y, gy])
45
+ end
46
+ end
47
+
48
+ class TanhGrad < FunctionNode
49
+ def initialize(x)
50
+ super()
51
+
52
+ # The original input `x` is only required for cuDNN.
53
+ # If it is None, this class does not use cuDNN.
54
+ # Note that x must be c-contiguous and it is checked
55
+ # in Tanh.forward_gpu.
56
+ @x = x
57
+ end
58
+
59
+ def forward(inputs)
60
+ retain_inputs([0, 1])
61
+ y, gy = inputs
62
+
63
+ one = y.class.new.fill(1)
64
+ [Utils::Array.force_array(gy * (one - y * y))]
65
+ end
66
+
67
+ def backward(indexes, grad_outputs)
68
+ y, gy = get_retained_inputs
69
+ g = grad_outputs[0]
70
+
71
+ y_mul_g = y * g
72
+ grad_y = -2 * gy * y_mul_g
73
+ ggy = g - y * y_mul_g
74
+ [grad_y, ggy]
38
75
  end
39
76
  end
40
77
  end
@@ -0,0 +1,56 @@
1
+ module Chainer
2
+ module Functions
3
+ module Array
4
+ # Function that broadcasts an array to a new shape.
5
+ class BroadcastTo < FunctionNode
6
+ def initialize(shape)
7
+ @shape = shape
8
+ end
9
+
10
+ def self.broadcast_to(x, shape)
11
+ return Chainer::Variable.as_variable(x) if x.shape == shape
12
+ self.new(shape).apply([x]).first
13
+ end
14
+
15
+ def forward(inputs)
16
+ x = inputs.first
17
+ [Chainer::Utils::Array.broadcast_to(x, @shape)]
18
+ end
19
+
20
+ def backward(indexes, grad_outputs)
21
+ gx = grad_outputs.first
22
+ shape = @inputs.first.shape
23
+ ndim = shape.size
24
+ lead = gx.ndim - ndim
25
+ lead_axis = lead.times.to_a
26
+ axis = shape.each_with_object([]).with_index do |(sx, res), i|
27
+ next unless sx == 1
28
+ res << i + lead
29
+ end
30
+ gx = Chainer::Functions::Math::Sum.sum(gx, axis: lead_axis + axis, keepdims: true)
31
+ return [Chainer::Functions::Array::Squeeze.squeeze(gx, axis: lead_axis)] if lead > 0
32
+ [gx]
33
+ end
34
+
35
+ private
36
+
37
+ def backward_one(shape, dtype, g)
38
+ return dtype.zeros(shape) unless g
39
+
40
+ ndim = shape.size
41
+ if g.ndim != ndim
42
+ g = g.sum(axis: 0...(g.ndim - ndim))
43
+ end
44
+
45
+ axis = shape.each_with_index.select{|sx, i| sx == 1 }.map{|sx, i| i }
46
+ if axis.size > 0
47
+ g.sum(keepdims: true, axis: axis)
48
+ else
49
+ g
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+