red-chainer 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/examples/cifar/models/vgg.rb +84 -0
  4. data/examples/cifar/train_cifar.rb +70 -0
  5. data/examples/iris.rb +103 -0
  6. data/lib/chainer.rb +17 -0
  7. data/lib/chainer/configuration.rb +2 -1
  8. data/lib/chainer/cuda.rb +18 -0
  9. data/lib/chainer/dataset/convert.rb +30 -9
  10. data/lib/chainer/datasets/cifar.rb +56 -0
  11. data/lib/chainer/datasets/mnist.rb +3 -3
  12. data/lib/chainer/datasets/tuple_dataset.rb +3 -1
  13. data/lib/chainer/function.rb +1 -0
  14. data/lib/chainer/functions/activation/leaky_relu.rb +4 -4
  15. data/lib/chainer/functions/activation/log_softmax.rb +4 -4
  16. data/lib/chainer/functions/activation/relu.rb +3 -4
  17. data/lib/chainer/functions/activation/sigmoid.rb +4 -4
  18. data/lib/chainer/functions/activation/tanh.rb +5 -5
  19. data/lib/chainer/functions/connection/convolution_2d.rb +92 -0
  20. data/lib/chainer/functions/connection/linear.rb +1 -1
  21. data/lib/chainer/functions/loss/mean_squared_error.rb +34 -0
  22. data/lib/chainer/functions/loss/softmax_cross_entropy.rb +67 -40
  23. data/lib/chainer/functions/math/identity.rb +26 -0
  24. data/lib/chainer/functions/noise/dropout.rb +45 -0
  25. data/lib/chainer/functions/normalization/batch_normalization.rb +136 -0
  26. data/lib/chainer/functions/pooling/max_pooling_2d.rb +57 -0
  27. data/lib/chainer/functions/pooling/pooling_2d.rb +20 -0
  28. data/lib/chainer/gradient_check.rb +240 -0
  29. data/lib/chainer/initializer.rb +2 -0
  30. data/lib/chainer/initializers/constant.rb +1 -1
  31. data/lib/chainer/initializers/init.rb +5 -1
  32. data/lib/chainer/initializers/normal.rb +1 -1
  33. data/lib/chainer/iterators/serial_iterator.rb +1 -1
  34. data/lib/chainer/link.rb +11 -0
  35. data/lib/chainer/links/connection/convolution_2d.rb +98 -0
  36. data/lib/chainer/links/normalization/batch_normalization.rb +106 -0
  37. data/lib/chainer/optimizer.rb +40 -1
  38. data/lib/chainer/optimizers/momentum_sgd.rb +49 -0
  39. data/lib/chainer/parameter.rb +1 -1
  40. data/lib/chainer/serializers/marshal.rb +7 -3
  41. data/lib/chainer/testing/array.rb +32 -0
  42. data/lib/chainer/training/extensions/exponential_shift.rb +78 -0
  43. data/lib/chainer/training/extensions/snapshot.rb +1 -1
  44. data/lib/chainer/training/standard_updater.rb +4 -0
  45. data/lib/chainer/training/trainer.rb +1 -1
  46. data/lib/chainer/utils/array.rb +13 -2
  47. data/lib/chainer/utils/conv.rb +59 -0
  48. data/lib/chainer/utils/math.rb +72 -0
  49. data/lib/chainer/utils/variable.rb +7 -3
  50. data/lib/chainer/version.rb +1 -1
  51. data/red-chainer.gemspec +1 -0
  52. metadata +37 -3
@@ -1,5 +1,7 @@
1
1
  module Chainer
2
2
  class Initializer
3
+ attr_accessor :dtype
4
+
3
5
  def initialize(dtype: nil)
4
6
  @dtype = dtype
5
7
  end
@@ -8,7 +8,7 @@ module Chainer
8
8
 
9
9
  def call(array)
10
10
  if @dtype
11
- raise ArgumentError unless array.dtype == @dtype
11
+ raise ArgumentError unless array.class == @dtype
12
12
  end
13
13
  array.store(@fill_value)
14
14
  array
@@ -1,7 +1,11 @@
1
1
  module Chainer
2
2
  module Initializers
3
3
  def self.generate_array(initializer, shape)
4
- array = Numo::DFloat.new(shape).rand
4
+ klass = Numo::SFloat
5
+ if initializer.respond_to?(:dtype) && initializer.dtype
6
+ klass = initializer.dtype
7
+ end
8
+ array = klass.new(shape).rand
5
9
  initializer.(array)
6
10
  end
7
11
 
@@ -8,7 +8,7 @@ module Chainer
8
8
 
9
9
  def call(array)
10
10
  args = { loc: 0.0, scale: @scale, size: array.shape}
11
- Numo::DFloat.new(array.shape).rand_norm(0.0, @scale)
11
+ array.class.new(array.shape).rand_norm(0.0, @scale)
12
12
  end
13
13
  end
14
14
 
@@ -18,8 +18,8 @@ module Chainer
18
18
  @previous_epoch_detail = epoch_detail
19
19
 
20
20
  i = @current_position
21
- i_end = i + @batch_size
22
21
  n = @dataset.size
22
+ i_end = [i + @batch_size, n].min
23
23
 
24
24
  batch = @order[i...i_end].to_a.map { |index| @dataset[index] }
25
25
 
data/lib/chainer/link.rb CHANGED
@@ -39,6 +39,17 @@ module Chainer
39
39
  end
40
40
  end
41
41
 
42
+ # Registers an attribute of a given name as a persistent value.
43
+ # This is a convenient method to register an existing attribute as a persistent value.
44
+ # If `name` has been already registered as a parameter,
45
+ # this method removes it from the list of parameter names and re-registers it as a persistent value.
46
+ #
47
+ # @param [string] name Name of the attribute to be registered.
48
+ def register_persistent(name)
49
+ @persistent << name
50
+ @params.delete(name)
51
+ end
52
+
42
53
  def params(include_uninit: true)
43
54
  @params.map do |name|
44
55
  data = self.instance_variable_get(name).data
@@ -0,0 +1,98 @@
1
+ module Chainer
2
+ module Links
3
+ module Connection
4
+ class Convolution2D < ::Chainer::Link
5
+ # Two-dimensional convolutional layer.
6
+ #
7
+ # This link wraps the :func:`chainer.functions.convolution_2d` function
8
+ # and holds the filter weight and bias vector as parameters.
9
+ #
10
+ # @param [integer or nil] in_channels Number of channels of input arrays.
11
+ # If `nil`, parameter initialization will be deferred until the first forward data pass at which time the size will be determined.
12
+ # @param [integer] out_channels Number of channels of output arrays.
13
+ # @param [integer or 2-d int array] ksize Size of filters (a.k.a. kernels).
14
+ # @param [integer or 2-d int array] stride Stride of filter applications.
15
+ # @param [integer or 2-d int array] pad Spatial padding width for input arrays.
16
+ # @param [boolean] nobias If `true`, then this link does not use the bias term.
17
+ # @param [Numo::NArray] initialW Initial weight value. If `nil`, the default initializer is used.
18
+ # @param [Numo::NArray] initial_bias Initial bias value. If `nil`, the bias is set to 0.
19
+ #
20
+ # Example
21
+ # There are several ways to make a Convolution2D link.
22
+ # Let an input vector `x` be:
23
+ # > x = Numo::DFloat.new(1, 3, 10, 10).seq
24
+ #
25
+ # 1. Give the first three arguments explicitly:
26
+ # > l = Chainer::Links::Connection::Convolution2D.new(3, 7, 5)
27
+ # > y = l.(x)
28
+ # > y.shape
29
+ # [1, 7, 6, 6]
30
+ #
31
+ # 2. Omit `in_channels` or fill it with `nil`:
32
+ # The below two cases are the same.
33
+ #
34
+ # > l = Chainer::Links::Connection::Convolution2D.new(7, 5)
35
+ # > y = l.(x)
36
+ # > y.shape
37
+ # [1, 7, 6, 6]
38
+ #
39
+ # > l = Chainer::Links::Connection::Convolution2D.new(nil, 7, 5)
40
+ # > y = l.(x)
41
+ # > y.shape
42
+ # [1, 7, 6, 6]
43
+ #
44
+ # When you omit the first argument, you need to specify the other subsequent arguments from `stride` as keyword auguments.
45
+ #
46
+ # > l = Chainer::Links::Connection::Convolution2D.new(7, 5, stride: 1, pad: 0)
47
+ # > y = l.(x)
48
+ # > y.shape
49
+ # [1, 7, 6, 6]
50
+ def initialize(in_channels, out_channels, ksize=nil, stride: 1, pad: 0, nobias: false, initial_w: nil, initial_bias: nil)
51
+ super()
52
+ if ksize.nil?
53
+ out_channels, ksize, in_channels = in_channels, out_channels, nil
54
+ end
55
+
56
+ @ksize = ksize
57
+ @stride = stride.is_a?(Array) ? stride : [stride, stride]
58
+ @pad = pad.is_a?(Array) ? pad : [pad, pad]
59
+ @out_channels = out_channels
60
+
61
+ init_scope do
62
+ w_initializer = Chainer::Initializers.get_initializer(initial_w)
63
+ @w = Chainer::Parameter.new(initializer: w_initializer)
64
+ if in_channels
65
+ initialize_params(in_channels)
66
+ end
67
+
68
+ if nobias
69
+ @b = nil
70
+ else
71
+ initial_bias = 0 if initial_bias.nil?
72
+ bias_initializer = Chainer::Initializers.get_initializer(initial_bias)
73
+ @b = Chainer::Parameter.new(initializer: bias_initializer, shape: out_channels)
74
+ end
75
+ end
76
+ end
77
+
78
+ # Applies the convolution layer.
79
+ # @param [Chainer::Variable] x Input image.
80
+ # @return [Chainer::Variable] Output of the convolution.
81
+ def call(x)
82
+ initialize_params(x.shape[1]) if @w.data.nil?
83
+ Chainer::Functions::Connection::Convolution2DFunction.convolution_2d(x, @w, b: @b, stride: @stride, pad: @pad)
84
+ end
85
+
86
+ private
87
+
88
+ def initialize_params(in_channels)
89
+ kh, kw = @ksize.is_a?(Array) ? @ksize : [@ksize, @ksize]
90
+ w_shape = [@out_channels, in_channels, kh, kw]
91
+ @w.init(w_shape)
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
97
+
98
+
@@ -0,0 +1,106 @@
1
+ module Chainer
2
+ module Links
3
+ module Normalization
4
+ class BatchNormalization < Chainer::Link
5
+ # Batch normalization layer on outputs of linear or convolution functions.
6
+ #
7
+ # It runs in three modes: training mode, fine-tuning mode, and testing mode.
8
+ # In training mode, it normalizes the input by *batch statistics*. It also
9
+ # maintains approximated population statistics by moving averages, which can
10
+ # be used for instant evaluation in testing mode.
11
+ #
12
+ # In fine-tuning mode, it accumulates the input to compute *population
13
+ # statistics*. In order to correctly compute the population statistics, a
14
+ # user must use this mode to feed mini-batches running through whole training dataset.
15
+ #
16
+ # In testing mode, it uses pre-computed population statistics to normalize the input variable.
17
+ # The population statistics is approximated if it is computed by training mode,
18
+ # or accurate if it is correctly computed by fine-tuning mode.
19
+ #
20
+ # @param [integer or int array] size Size (or shape) of channel dimensions.
21
+ # @param [float] decay Decay rate of moving average. It is used on training.
22
+ # @param [float] eps Epsilon value for numerical stability.
23
+ # @param [Numo::NArray.dtype] dtype Type to use in computing.
24
+ # @param [boolean] use_gamma If `true`, use scaling parameter. Otherwise, use unit(1) which makes no effect.
25
+ # @param [boolean] use_beta If `true`, use shifting parameter. Otherwise, use unit(0) which makes no effect.
26
+ def initialize(size, decay: 0.9, eps: 2e-5, dtype: Numo::DFloat, use_gamma: true, use_beta: true, initial_gamma: nil, initial_beta: nil)
27
+ super()
28
+ @avg_mean = dtype.zeros(size)
29
+ register_persistent('avg_mean')
30
+ @avg_var = dtype.zeros(size)
31
+ register_persistent('avg_var')
32
+ @n = 0
33
+ register_persistent('n')
34
+ @decay = decay
35
+ @eps = eps
36
+
37
+ init_scope do
38
+ if use_gamma
39
+ initial_gamma = 1 if initial_gamma.nil?
40
+ initial_gamma = Chainer::Initializers.get_initializer(initial_gamma)
41
+ initial_gamma.dtype = dtype
42
+ @gamma = Chainer::Parameter.new(initializer: initial_gamma, shape: size)
43
+ end
44
+ if use_beta
45
+ initial_beta = 0 if initial_beta.nil?
46
+ initial_beta = Chainer::Initializers.get_initializer(initial_beta)
47
+ initial_beta.dtype = dtype
48
+ @beta = Chainer::Parameter.new(initializer: initial_beta, shape: size)
49
+ end
50
+ end
51
+ end
52
+
53
+ # Invokes the forward propagation of BatchNormalization.
54
+ # In training mode, the BatchNormalization computes moving averages of
55
+ # mean and variance for evaluatino during training, and normalizes the input using batch statistics.
56
+ # @param [Chainer::Variable] x Input variable.
57
+ # @param [boolean] finetune If it is in the training mode and `finetune` is `True`,
58
+ # BatchNormalization runs in fine-tuning mode;
59
+ # it accumulates the input array to compute population statistics for normalization,
60
+ # and normalizes the input using batch statistics.
61
+ def call(x, finetune: false)
62
+ if self.instance_variable_defined?(:@gamma)
63
+ gamma = @gamma
64
+ else
65
+ gamma = Chainer::Variable.new(x.data.class.ones(@avg_mean.shape))
66
+ end
67
+
68
+ if self.instance_variable_defined?(:@beta)
69
+ beta = @beta
70
+ else
71
+ beta = Chainer::Variable.new(x.data.class.zeros(*@avg_mean.shape))
72
+ end
73
+
74
+ if Chainer.configuration.train
75
+ if finetune
76
+ @n += 1
77
+ decay = 1.0 - 1.0 / @n
78
+ else
79
+ decay = @decay
80
+ end
81
+
82
+ func = Chainer::Functions::Normalization::BatchNormalizationFunction.new(eps: @eps, mean: @avg_mean, var: @avg_var, decay: decay)
83
+ ret = func.(x, gamma, beta)
84
+
85
+ @avg_mean[false] = func.running_mean
86
+ @avg_var[false] = func.running_var
87
+ else
88
+ mean = Chainer::Variable(@avg_mean)
89
+ var = Chainer::Variable(@avg_var)
90
+ ret = Chainer::Functions::Normalization::BatchNormalizationFunction.fixed_batch_normalization(x, gamma, beta, mean, var, eps: @eps)
91
+ end
92
+
93
+ ret
94
+ end
95
+
96
+ # Resets the population count for collecting population statistics.
97
+ # This method can be skipped if it is the first time to use the fine-tuning mode.
98
+ # Otherwise, this method should be called before starting the fine-tuning mode again.
99
+ def start_finetuning
100
+ @n = 0
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+
@@ -81,7 +81,7 @@ module Chainer
81
81
  # try to initialize the state to retrieve state entries
82
82
  @state = {}
83
83
  self_copy = self.dup
84
- arr = Numo::DFloat.new(1)
84
+ arr = Numo::SFloat.new(1)
85
85
  self_copy.init_state(Chainer::Variable.new(arr, grad: arr))
86
86
  @state.keys.each do |key|
87
87
  @state[key] = serializer.(key.to_s, nil)
@@ -104,4 +104,43 @@ module Chainer
104
104
  @state.select! { |_, v| v.kind_of?(Numo::NArray) }
105
105
  end
106
106
  end
107
+
108
+ class HyperparameterProxy
109
+ def initialize(obj, attr_name)
110
+ obj.class.class_eval do
111
+ obj.class.send(:define_method, attr_name) do
112
+ self.instance_variable_get(:@hyperparam).instance_variable_get("@#{attr_name}")
113
+ end
114
+
115
+ obj.class.send(:define_method, "#{attr_name}=") do |val|
116
+ self.instance_variable_get(:@hyperparam).instance_variable_set("@#{attr_name}", val)
117
+ end
118
+ end
119
+ end
120
+ end
121
+
122
+ # Optimizer/UpdateRule hook function for weight decay regularization
123
+ #
124
+ # This hook function adds a scaled parameter to the correspondeing gradient
125
+ # It can be used as a regularization
126
+ #
127
+ # @param [Float] rate Coefficient for the weight decay
128
+ class WeightDecay
129
+ def self.name
130
+ "WeightDecay"
131
+ end
132
+
133
+ def self.call_for_each_param
134
+ true
135
+ end
136
+
137
+ def initialize(rate)
138
+ @rate = rate
139
+ end
140
+
141
+ def call(rule, param)
142
+ return if param.data.nil? || param.grad.nil?
143
+ param.grad += @rate * param.data
144
+ end
145
+ end
107
146
  end
@@ -0,0 +1,49 @@
1
+ module Chainer
2
+ module Optimizers
3
+ # Update rule for the classical momentum SGD
4
+ class MomentumSGDRule < UpdateRule
5
+ def initialize(parent_hyperparam: nil, lr: nil, mementum: nil)
6
+ hyperparam = Hyperparameter.new
7
+ hyperparam.instance_variable_set('@lr', 0.01)
8
+ hyperparam.instance_variable_set('@momentum', 0.9)
9
+
10
+ super(parent_hyperparam: parent_hyperparam || hyperparam)
11
+
12
+ @hyperparam.instance_variable_set('@lr', lr) if lr
13
+ @hyperparam.instance_variable_set('@mementum', mementum) if mementum
14
+ end
15
+
16
+ def init_state(param)
17
+ @state[:v] = param.data.new_zeros
18
+ end
19
+
20
+ def update_core_cpu(param)
21
+ grad = param.grad
22
+ return if grad.nil?
23
+
24
+ v = @state[:v]
25
+ v *= @hyperparam.momentum
26
+ v -= @hyperparam.lr * grad
27
+ param.data += v
28
+ end
29
+ end
30
+
31
+ # Momentum SGD optimizer
32
+ class MomentumSGD < GradientMethod
33
+ attr_accessor :lr, :momentum
34
+ # @param [Float] lr Learning rate
35
+ # @param [Float] momentum Exponential decay rate of the first order moment
36
+ def initialize(lr: nil, momentum: nil)
37
+ super()
38
+ @hyperparam.instance_variable_set('@lr', lr || 0.01)
39
+ @hyperparam.instance_variable_set('@momentum', momentum || 0.9)
40
+ Chainer::HyperparameterProxy.new(self, "lr")
41
+ Chainer::HyperparameterProxy.new(self, "momentum")
42
+ end
43
+
44
+ def create_update_rule
45
+ MomentumSGDRule.new(parent_hyperparam: @hyperparam)
46
+ end
47
+ end
48
+ end
49
+ end
@@ -15,7 +15,7 @@ module Chainer
15
15
  else
16
16
  super(name: name)
17
17
  @initializer = initializer
18
- dtype = initializer.respond_to?(:dtype) ? initializer.dtype : 'DFloat'
18
+ dtype = initializer.respond_to?(:dtype) ? initializer.dtype : 'SFloat'
19
19
  @grad_initializer = Chainer::Initializers.nan()
20
20
  end
21
21
  else
@@ -3,10 +3,14 @@ module Chainer
3
3
  class MarshalSerializer < Chainer::Serializer
4
4
  attr_accessor :target, :path
5
5
 
6
- def self.save_file(filename, obj)
6
+ # @param [string] file_path Target file path
7
+ # @param [Object] obj Object to be serialized
8
+ def self.save_file(file_path, obj)
7
9
  s = self.new
8
10
  s.save(obj)
9
- Marshal.dump(s.target, filename)
11
+ File.open(file_path, 'wb') do |f|
12
+ Marshal.dump(s.target, f)
13
+ end
10
14
  end
11
15
 
12
16
  def initialize(target: nil, path: "")
@@ -24,7 +28,7 @@ module Chainer
24
28
  arr = Numo::Bit[1]
25
29
  elsif value.is_a?(FalseClass)
26
30
  arr = Numo::Bit[0]
27
- elsif value.instance_of?(String)
31
+ elsif value.instance_of?(String) || value.nil?
28
32
  arr = value
29
33
  else
30
34
  arr = Numo::NArray.cast(value)
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chainer
4
+ module Testing
5
+ def assert_allclose(expect, actual, atol: 1e-5, rtol: 1e-4)
6
+ # Asserts if some corresponding element of x and y differs too much.
7
+ #
8
+ # This function can handle both CPU and GPU arrays simultaneously.
9
+ #
10
+ # Args:
11
+ # expect: Left-hand-side array.
12
+ # actual: Right-hand-side array.
13
+ # atol (float): Absolute tolerance.
14
+ # rtol (float): Relative tolerance.
15
+ #
16
+ expect = Utils::Array.force_array(expect)
17
+ actual = Utils::Array.force_array(actual)
18
+
19
+ # If the expected is 0-dim arrary, extend the dimension to the actual.
20
+ if (expect.shape != actual.shape) and (expect.ndim == 0)
21
+ expect = actual.class.new(actual.shape).fill(expect.to_f)
22
+ end
23
+
24
+ actual.each_with_index{|actual_val, *i|
25
+ if (expect[*i].to_f - actual_val.to_f).abs > atol + rtol * expect[*i].abs
26
+ raise "assert_allclose Error\n expect: #{expect.inspect}\n actual : #{actual.inspect}\n (#{i})=> #{(expect - actual).abs.max()} > #{atol + rtol * expect[*i].abs}"
27
+ end
28
+ }
29
+ end
30
+ module_function :assert_allclose
31
+ end
32
+ end