red-chainer 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/examples/cifar/models/vgg.rb +84 -0
  4. data/examples/cifar/train_cifar.rb +70 -0
  5. data/examples/iris.rb +103 -0
  6. data/lib/chainer.rb +17 -0
  7. data/lib/chainer/configuration.rb +2 -1
  8. data/lib/chainer/cuda.rb +18 -0
  9. data/lib/chainer/dataset/convert.rb +30 -9
  10. data/lib/chainer/datasets/cifar.rb +56 -0
  11. data/lib/chainer/datasets/mnist.rb +3 -3
  12. data/lib/chainer/datasets/tuple_dataset.rb +3 -1
  13. data/lib/chainer/function.rb +1 -0
  14. data/lib/chainer/functions/activation/leaky_relu.rb +4 -4
  15. data/lib/chainer/functions/activation/log_softmax.rb +4 -4
  16. data/lib/chainer/functions/activation/relu.rb +3 -4
  17. data/lib/chainer/functions/activation/sigmoid.rb +4 -4
  18. data/lib/chainer/functions/activation/tanh.rb +5 -5
  19. data/lib/chainer/functions/connection/convolution_2d.rb +92 -0
  20. data/lib/chainer/functions/connection/linear.rb +1 -1
  21. data/lib/chainer/functions/loss/mean_squared_error.rb +34 -0
  22. data/lib/chainer/functions/loss/softmax_cross_entropy.rb +67 -40
  23. data/lib/chainer/functions/math/identity.rb +26 -0
  24. data/lib/chainer/functions/noise/dropout.rb +45 -0
  25. data/lib/chainer/functions/normalization/batch_normalization.rb +136 -0
  26. data/lib/chainer/functions/pooling/max_pooling_2d.rb +57 -0
  27. data/lib/chainer/functions/pooling/pooling_2d.rb +20 -0
  28. data/lib/chainer/gradient_check.rb +240 -0
  29. data/lib/chainer/initializer.rb +2 -0
  30. data/lib/chainer/initializers/constant.rb +1 -1
  31. data/lib/chainer/initializers/init.rb +5 -1
  32. data/lib/chainer/initializers/normal.rb +1 -1
  33. data/lib/chainer/iterators/serial_iterator.rb +1 -1
  34. data/lib/chainer/link.rb +11 -0
  35. data/lib/chainer/links/connection/convolution_2d.rb +98 -0
  36. data/lib/chainer/links/normalization/batch_normalization.rb +106 -0
  37. data/lib/chainer/optimizer.rb +40 -1
  38. data/lib/chainer/optimizers/momentum_sgd.rb +49 -0
  39. data/lib/chainer/parameter.rb +1 -1
  40. data/lib/chainer/serializers/marshal.rb +7 -3
  41. data/lib/chainer/testing/array.rb +32 -0
  42. data/lib/chainer/training/extensions/exponential_shift.rb +78 -0
  43. data/lib/chainer/training/extensions/snapshot.rb +1 -1
  44. data/lib/chainer/training/standard_updater.rb +4 -0
  45. data/lib/chainer/training/trainer.rb +1 -1
  46. data/lib/chainer/utils/array.rb +13 -2
  47. data/lib/chainer/utils/conv.rb +59 -0
  48. data/lib/chainer/utils/math.rb +72 -0
  49. data/lib/chainer/utils/variable.rb +7 -3
  50. data/lib/chainer/version.rb +1 -1
  51. data/red-chainer.gemspec +1 -0
  52. metadata +37 -3
@@ -1,5 +1,7 @@
1
1
  module Chainer
2
2
  class Initializer
3
+ attr_accessor :dtype
4
+
3
5
  def initialize(dtype: nil)
4
6
  @dtype = dtype
5
7
  end
@@ -8,7 +8,7 @@ module Chainer
8
8
 
9
9
  def call(array)
10
10
  if @dtype
11
- raise ArgumentError unless array.dtype == @dtype
11
+ raise ArgumentError unless array.class == @dtype
12
12
  end
13
13
  array.store(@fill_value)
14
14
  array
@@ -1,7 +1,11 @@
1
1
  module Chainer
2
2
  module Initializers
3
3
  def self.generate_array(initializer, shape)
4
- array = Numo::DFloat.new(shape).rand
4
+ klass = Numo::SFloat
5
+ if initializer.respond_to?(:dtype) && initializer.dtype
6
+ klass = initializer.dtype
7
+ end
8
+ array = klass.new(shape).rand
5
9
  initializer.(array)
6
10
  end
7
11
 
@@ -8,7 +8,7 @@ module Chainer
8
8
 
9
9
  def call(array)
10
10
  args = { loc: 0.0, scale: @scale, size: array.shape}
11
- Numo::DFloat.new(array.shape).rand_norm(0.0, @scale)
11
+ array.class.new(array.shape).rand_norm(0.0, @scale)
12
12
  end
13
13
  end
14
14
 
@@ -18,8 +18,8 @@ module Chainer
18
18
  @previous_epoch_detail = epoch_detail
19
19
 
20
20
  i = @current_position
21
- i_end = i + @batch_size
22
21
  n = @dataset.size
22
+ i_end = [i + @batch_size, n].min
23
23
 
24
24
  batch = @order[i...i_end].to_a.map { |index| @dataset[index] }
25
25
 
data/lib/chainer/link.rb CHANGED
@@ -39,6 +39,17 @@ module Chainer
39
39
  end
40
40
  end
41
41
 
42
+ # Registers an attribute of a given name as a persistent value.
43
+ # This is a convenient method to register an existing attribute as a persistent value.
44
+ # If `name` has been already registered as a parameter,
45
+ # this method removes it from the list of parameter names and re-registers it as a persistent value.
46
+ #
47
+ # @param [string] name Name of the attribute to be registered.
48
+ def register_persistent(name)
49
+ @persistent << name
50
+ @params.delete(name)
51
+ end
52
+
42
53
  def params(include_uninit: true)
43
54
  @params.map do |name|
44
55
  data = self.instance_variable_get(name).data
@@ -0,0 +1,98 @@
1
+ module Chainer
2
+ module Links
3
+ module Connection
4
+ class Convolution2D < ::Chainer::Link
5
+ # Two-dimensional convolutional layer.
6
+ #
7
+ # This link wraps the :func:`chainer.functions.convolution_2d` function
8
+ # and holds the filter weight and bias vector as parameters.
9
+ #
10
+ # @param [integer or nil] in_channels Number of channels of input arrays.
11
+ # If `nil`, parameter initialization will be deferred until the first forward data pass at which time the size will be determined.
12
+ # @param [integer] out_channels Number of channels of output arrays.
13
+ # @param [integer or 2-d int array] ksize Size of filters (a.k.a. kernels).
14
+ # @param [integer or 2-d int array] stride Stride of filter applications.
15
+ # @param [integer or 2-d int array] pad Spatial padding width for input arrays.
16
+ # @param [boolean] nobias If `true`, then this link does not use the bias term.
17
+ # @param [Numo::NArray] initialW Initial weight value. If `nil`, the default initializer is used.
18
+ # @param [Numo::NArray] initial_bias Initial bias value. If `nil`, the bias is set to 0.
19
+ #
20
+ # Example
21
+ # There are several ways to make a Convolution2D link.
22
+ # Let an input vector `x` be:
23
+ # > x = Numo::DFloat.new(1, 3, 10, 10).seq
24
+ #
25
+ # 1. Give the first three arguments explicitly:
26
+ # > l = Chainer::Links::Connection::Convolution2D.new(3, 7, 5)
27
+ # > y = l.(x)
28
+ # > y.shape
29
+ # [1, 7, 6, 6]
30
+ #
31
+ # 2. Omit `in_channels` or fill it with `nil`:
32
+ # The below two cases are the same.
33
+ #
34
+ # > l = Chainer::Links::Connection::Convolution2D.new(7, 5)
35
+ # > y = l.(x)
36
+ # > y.shape
37
+ # [1, 7, 6, 6]
38
+ #
39
+ # > l = Chainer::Links::Connection::Convolution2D.new(nil, 7, 5)
40
+ # > y = l.(x)
41
+ # > y.shape
42
+ # [1, 7, 6, 6]
43
+ #
44
+ # When you omit the first argument, you need to specify the other subsequent arguments from `stride` as keyword auguments.
45
+ #
46
+ # > l = Chainer::Links::Connection::Convolution2D.new(7, 5, stride: 1, pad: 0)
47
+ # > y = l.(x)
48
+ # > y.shape
49
+ # [1, 7, 6, 6]
50
+ def initialize(in_channels, out_channels, ksize=nil, stride: 1, pad: 0, nobias: false, initial_w: nil, initial_bias: nil)
51
+ super()
52
+ if ksize.nil?
53
+ out_channels, ksize, in_channels = in_channels, out_channels, nil
54
+ end
55
+
56
+ @ksize = ksize
57
+ @stride = stride.is_a?(Array) ? stride : [stride, stride]
58
+ @pad = pad.is_a?(Array) ? pad : [pad, pad]
59
+ @out_channels = out_channels
60
+
61
+ init_scope do
62
+ w_initializer = Chainer::Initializers.get_initializer(initial_w)
63
+ @w = Chainer::Parameter.new(initializer: w_initializer)
64
+ if in_channels
65
+ initialize_params(in_channels)
66
+ end
67
+
68
+ if nobias
69
+ @b = nil
70
+ else
71
+ initial_bias = 0 if initial_bias.nil?
72
+ bias_initializer = Chainer::Initializers.get_initializer(initial_bias)
73
+ @b = Chainer::Parameter.new(initializer: bias_initializer, shape: out_channels)
74
+ end
75
+ end
76
+ end
77
+
78
+ # Applies the convolution layer.
79
+ # @param [Chainer::Variable] x Input image.
80
+ # @return [Chainer::Variable] Output of the convolution.
81
+ def call(x)
82
+ initialize_params(x.shape[1]) if @w.data.nil?
83
+ Chainer::Functions::Connection::Convolution2DFunction.convolution_2d(x, @w, b: @b, stride: @stride, pad: @pad)
84
+ end
85
+
86
+ private
87
+
88
+ def initialize_params(in_channels)
89
+ kh, kw = @ksize.is_a?(Array) ? @ksize : [@ksize, @ksize]
90
+ w_shape = [@out_channels, in_channels, kh, kw]
91
+ @w.init(w_shape)
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
97
+
98
+
@@ -0,0 +1,106 @@
1
+ module Chainer
2
+ module Links
3
+ module Normalization
4
+ class BatchNormalization < Chainer::Link
5
+ # Batch normalization layer on outputs of linear or convolution functions.
6
+ #
7
+ # It runs in three modes: training mode, fine-tuning mode, and testing mode.
8
+ # In training mode, it normalizes the input by *batch statistics*. It also
9
+ # maintains approximated population statistics by moving averages, which can
10
+ # be used for instant evaluation in testing mode.
11
+ #
12
+ # In fine-tuning mode, it accumulates the input to compute *population
13
+ # statistics*. In order to correctly compute the population statistics, a
14
+ # user must use this mode to feed mini-batches running through whole training dataset.
15
+ #
16
+ # In testing mode, it uses pre-computed population statistics to normalize the input variable.
17
+ # The population statistics is approximated if it is computed by training mode,
18
+ # or accurate if it is correctly computed by fine-tuning mode.
19
+ #
20
+ # @param [integer or int array] size Size (or shape) of channel dimensions.
21
+ # @param [float] decay Decay rate of moving average. It is used on training.
22
+ # @param [float] eps Epsilon value for numerical stability.
23
+ # @param [Numo::NArray.dtype] dtype Type to use in computing.
24
+ # @param [boolean] use_gamma If `true`, use scaling parameter. Otherwise, use unit(1) which makes no effect.
25
+ # @param [boolean] use_beta If `true`, use shifting parameter. Otherwise, use unit(0) which makes no effect.
26
+ def initialize(size, decay: 0.9, eps: 2e-5, dtype: Numo::DFloat, use_gamma: true, use_beta: true, initial_gamma: nil, initial_beta: nil)
27
+ super()
28
+ @avg_mean = dtype.zeros(size)
29
+ register_persistent('avg_mean')
30
+ @avg_var = dtype.zeros(size)
31
+ register_persistent('avg_var')
32
+ @n = 0
33
+ register_persistent('n')
34
+ @decay = decay
35
+ @eps = eps
36
+
37
+ init_scope do
38
+ if use_gamma
39
+ initial_gamma = 1 if initial_gamma.nil?
40
+ initial_gamma = Chainer::Initializers.get_initializer(initial_gamma)
41
+ initial_gamma.dtype = dtype
42
+ @gamma = Chainer::Parameter.new(initializer: initial_gamma, shape: size)
43
+ end
44
+ if use_beta
45
+ initial_beta = 0 if initial_beta.nil?
46
+ initial_beta = Chainer::Initializers.get_initializer(initial_beta)
47
+ initial_beta.dtype = dtype
48
+ @beta = Chainer::Parameter.new(initializer: initial_beta, shape: size)
49
+ end
50
+ end
51
+ end
52
+
53
+ # Invokes the forward propagation of BatchNormalization.
54
+ # In training mode, the BatchNormalization computes moving averages of
55
+ # mean and variance for evaluatino during training, and normalizes the input using batch statistics.
56
+ # @param [Chainer::Variable] x Input variable.
57
+ # @param [boolean] finetune If it is in the training mode and `finetune` is `True`,
58
+ # BatchNormalization runs in fine-tuning mode;
59
+ # it accumulates the input array to compute population statistics for normalization,
60
+ # and normalizes the input using batch statistics.
61
+ def call(x, finetune: false)
62
+ if self.instance_variable_defined?(:@gamma)
63
+ gamma = @gamma
64
+ else
65
+ gamma = Chainer::Variable.new(x.data.class.ones(@avg_mean.shape))
66
+ end
67
+
68
+ if self.instance_variable_defined?(:@beta)
69
+ beta = @beta
70
+ else
71
+ beta = Chainer::Variable.new(x.data.class.zeros(*@avg_mean.shape))
72
+ end
73
+
74
+ if Chainer.configuration.train
75
+ if finetune
76
+ @n += 1
77
+ decay = 1.0 - 1.0 / @n
78
+ else
79
+ decay = @decay
80
+ end
81
+
82
+ func = Chainer::Functions::Normalization::BatchNormalizationFunction.new(eps: @eps, mean: @avg_mean, var: @avg_var, decay: decay)
83
+ ret = func.(x, gamma, beta)
84
+
85
+ @avg_mean[false] = func.running_mean
86
+ @avg_var[false] = func.running_var
87
+ else
88
+ mean = Chainer::Variable(@avg_mean)
89
+ var = Chainer::Variable(@avg_var)
90
+ ret = Chainer::Functions::Normalization::BatchNormalizationFunction.fixed_batch_normalization(x, gamma, beta, mean, var, eps: @eps)
91
+ end
92
+
93
+ ret
94
+ end
95
+
96
+ # Resets the population count for collecting population statistics.
97
+ # This method can be skipped if it is the first time to use the fine-tuning mode.
98
+ # Otherwise, this method should be called before starting the fine-tuning mode again.
99
+ def start_finetuning
100
+ @n = 0
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+
@@ -81,7 +81,7 @@ module Chainer
81
81
  # try to initialize the state to retrieve state entries
82
82
  @state = {}
83
83
  self_copy = self.dup
84
- arr = Numo::DFloat.new(1)
84
+ arr = Numo::SFloat.new(1)
85
85
  self_copy.init_state(Chainer::Variable.new(arr, grad: arr))
86
86
  @state.keys.each do |key|
87
87
  @state[key] = serializer.(key.to_s, nil)
@@ -104,4 +104,43 @@ module Chainer
104
104
  @state.select! { |_, v| v.kind_of?(Numo::NArray) }
105
105
  end
106
106
  end
107
+
108
+ class HyperparameterProxy
109
+ def initialize(obj, attr_name)
110
+ obj.class.class_eval do
111
+ obj.class.send(:define_method, attr_name) do
112
+ self.instance_variable_get(:@hyperparam).instance_variable_get("@#{attr_name}")
113
+ end
114
+
115
+ obj.class.send(:define_method, "#{attr_name}=") do |val|
116
+ self.instance_variable_get(:@hyperparam).instance_variable_set("@#{attr_name}", val)
117
+ end
118
+ end
119
+ end
120
+ end
121
+
122
+ # Optimizer/UpdateRule hook function for weight decay regularization
123
+ #
124
+ # This hook function adds a scaled parameter to the correspondeing gradient
125
+ # It can be used as a regularization
126
+ #
127
+ # @param [Float] rate Coefficient for the weight decay
128
+ class WeightDecay
129
+ def self.name
130
+ "WeightDecay"
131
+ end
132
+
133
+ def self.call_for_each_param
134
+ true
135
+ end
136
+
137
+ def initialize(rate)
138
+ @rate = rate
139
+ end
140
+
141
+ def call(rule, param)
142
+ return if param.data.nil? || param.grad.nil?
143
+ param.grad += @rate * param.data
144
+ end
145
+ end
107
146
  end
@@ -0,0 +1,49 @@
1
+ module Chainer
2
+ module Optimizers
3
+ # Update rule for the classical momentum SGD
4
+ class MomentumSGDRule < UpdateRule
5
+ def initialize(parent_hyperparam: nil, lr: nil, mementum: nil)
6
+ hyperparam = Hyperparameter.new
7
+ hyperparam.instance_variable_set('@lr', 0.01)
8
+ hyperparam.instance_variable_set('@momentum', 0.9)
9
+
10
+ super(parent_hyperparam: parent_hyperparam || hyperparam)
11
+
12
+ @hyperparam.instance_variable_set('@lr', lr) if lr
13
+ @hyperparam.instance_variable_set('@mementum', mementum) if mementum
14
+ end
15
+
16
+ def init_state(param)
17
+ @state[:v] = param.data.new_zeros
18
+ end
19
+
20
+ def update_core_cpu(param)
21
+ grad = param.grad
22
+ return if grad.nil?
23
+
24
+ v = @state[:v]
25
+ v *= @hyperparam.momentum
26
+ v -= @hyperparam.lr * grad
27
+ param.data += v
28
+ end
29
+ end
30
+
31
+ # Momentum SGD optimizer
32
+ class MomentumSGD < GradientMethod
33
+ attr_accessor :lr, :momentum
34
+ # @param [Float] lr Learning rate
35
+ # @param [Float] momentum Exponential decay rate of the first order moment
36
+ def initialize(lr: nil, momentum: nil)
37
+ super()
38
+ @hyperparam.instance_variable_set('@lr', lr || 0.01)
39
+ @hyperparam.instance_variable_set('@momentum', momentum || 0.9)
40
+ Chainer::HyperparameterProxy.new(self, "lr")
41
+ Chainer::HyperparameterProxy.new(self, "momentum")
42
+ end
43
+
44
+ def create_update_rule
45
+ MomentumSGDRule.new(parent_hyperparam: @hyperparam)
46
+ end
47
+ end
48
+ end
49
+ end
@@ -15,7 +15,7 @@ module Chainer
15
15
  else
16
16
  super(name: name)
17
17
  @initializer = initializer
18
- dtype = initializer.respond_to?(:dtype) ? initializer.dtype : 'DFloat'
18
+ dtype = initializer.respond_to?(:dtype) ? initializer.dtype : 'SFloat'
19
19
  @grad_initializer = Chainer::Initializers.nan()
20
20
  end
21
21
  else
@@ -3,10 +3,14 @@ module Chainer
3
3
  class MarshalSerializer < Chainer::Serializer
4
4
  attr_accessor :target, :path
5
5
 
6
- def self.save_file(filename, obj)
6
+ # @param [string] file_path Target file path
7
+ # @param [Object] obj Object to be serialized
8
+ def self.save_file(file_path, obj)
7
9
  s = self.new
8
10
  s.save(obj)
9
- Marshal.dump(s.target, filename)
11
+ File.open(file_path, 'wb') do |f|
12
+ Marshal.dump(s.target, f)
13
+ end
10
14
  end
11
15
 
12
16
  def initialize(target: nil, path: "")
@@ -24,7 +28,7 @@ module Chainer
24
28
  arr = Numo::Bit[1]
25
29
  elsif value.is_a?(FalseClass)
26
30
  arr = Numo::Bit[0]
27
- elsif value.instance_of?(String)
31
+ elsif value.instance_of?(String) || value.nil?
28
32
  arr = value
29
33
  else
30
34
  arr = Numo::NArray.cast(value)
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chainer
4
+ module Testing
5
+ def assert_allclose(expect, actual, atol: 1e-5, rtol: 1e-4)
6
+ # Asserts if some corresponding element of x and y differs too much.
7
+ #
8
+ # This function can handle both CPU and GPU arrays simultaneously.
9
+ #
10
+ # Args:
11
+ # expect: Left-hand-side array.
12
+ # actual: Right-hand-side array.
13
+ # atol (float): Absolute tolerance.
14
+ # rtol (float): Relative tolerance.
15
+ #
16
+ expect = Utils::Array.force_array(expect)
17
+ actual = Utils::Array.force_array(actual)
18
+
19
+ # If the expected is 0-dim arrary, extend the dimension to the actual.
20
+ if (expect.shape != actual.shape) and (expect.ndim == 0)
21
+ expect = actual.class.new(actual.shape).fill(expect.to_f)
22
+ end
23
+
24
+ actual.each_with_index{|actual_val, *i|
25
+ if (expect[*i].to_f - actual_val.to_f).abs > atol + rtol * expect[*i].abs
26
+ raise "assert_allclose Error\n expect: #{expect.inspect}\n actual : #{actual.inspect}\n (#{i})=> #{(expect - actual).abs.max()} > #{atol + rtol * expect[*i].abs}"
27
+ end
28
+ }
29
+ end
30
+ module_function :assert_allclose
31
+ end
32
+ end