red-chainer 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/examples/cifar/models/vgg.rb +84 -0
- data/examples/cifar/train_cifar.rb +70 -0
- data/examples/iris.rb +103 -0
- data/lib/chainer.rb +17 -0
- data/lib/chainer/configuration.rb +2 -1
- data/lib/chainer/cuda.rb +18 -0
- data/lib/chainer/dataset/convert.rb +30 -9
- data/lib/chainer/datasets/cifar.rb +56 -0
- data/lib/chainer/datasets/mnist.rb +3 -3
- data/lib/chainer/datasets/tuple_dataset.rb +3 -1
- data/lib/chainer/function.rb +1 -0
- data/lib/chainer/functions/activation/leaky_relu.rb +4 -4
- data/lib/chainer/functions/activation/log_softmax.rb +4 -4
- data/lib/chainer/functions/activation/relu.rb +3 -4
- data/lib/chainer/functions/activation/sigmoid.rb +4 -4
- data/lib/chainer/functions/activation/tanh.rb +5 -5
- data/lib/chainer/functions/connection/convolution_2d.rb +92 -0
- data/lib/chainer/functions/connection/linear.rb +1 -1
- data/lib/chainer/functions/loss/mean_squared_error.rb +34 -0
- data/lib/chainer/functions/loss/softmax_cross_entropy.rb +67 -40
- data/lib/chainer/functions/math/identity.rb +26 -0
- data/lib/chainer/functions/noise/dropout.rb +45 -0
- data/lib/chainer/functions/normalization/batch_normalization.rb +136 -0
- data/lib/chainer/functions/pooling/max_pooling_2d.rb +57 -0
- data/lib/chainer/functions/pooling/pooling_2d.rb +20 -0
- data/lib/chainer/gradient_check.rb +240 -0
- data/lib/chainer/initializer.rb +2 -0
- data/lib/chainer/initializers/constant.rb +1 -1
- data/lib/chainer/initializers/init.rb +5 -1
- data/lib/chainer/initializers/normal.rb +1 -1
- data/lib/chainer/iterators/serial_iterator.rb +1 -1
- data/lib/chainer/link.rb +11 -0
- data/lib/chainer/links/connection/convolution_2d.rb +98 -0
- data/lib/chainer/links/normalization/batch_normalization.rb +106 -0
- data/lib/chainer/optimizer.rb +40 -1
- data/lib/chainer/optimizers/momentum_sgd.rb +49 -0
- data/lib/chainer/parameter.rb +1 -1
- data/lib/chainer/serializers/marshal.rb +7 -3
- data/lib/chainer/testing/array.rb +32 -0
- data/lib/chainer/training/extensions/exponential_shift.rb +78 -0
- data/lib/chainer/training/extensions/snapshot.rb +1 -1
- data/lib/chainer/training/standard_updater.rb +4 -0
- data/lib/chainer/training/trainer.rb +1 -1
- data/lib/chainer/utils/array.rb +13 -2
- data/lib/chainer/utils/conv.rb +59 -0
- data/lib/chainer/utils/math.rb +72 -0
- data/lib/chainer/utils/variable.rb +7 -3
- data/lib/chainer/version.rb +1 -1
- data/red-chainer.gemspec +1 -0
- metadata +37 -3
data/lib/chainer/initializer.rb
CHANGED
@@ -1,7 +1,11 @@
|
|
1
1
|
module Chainer
|
2
2
|
module Initializers
|
3
3
|
def self.generate_array(initializer, shape)
|
4
|
-
|
4
|
+
klass = Numo::SFloat
|
5
|
+
if initializer.respond_to?(:dtype) && initializer.dtype
|
6
|
+
klass = initializer.dtype
|
7
|
+
end
|
8
|
+
array = klass.new(shape).rand
|
5
9
|
initializer.(array)
|
6
10
|
end
|
7
11
|
|
data/lib/chainer/link.rb
CHANGED
@@ -39,6 +39,17 @@ module Chainer
|
|
39
39
|
end
|
40
40
|
end
|
41
41
|
|
42
|
+
# Registers an attribute of a given name as a persistent value.
|
43
|
+
# This is a convenient method to register an existing attribute as a persistent value.
|
44
|
+
# If `name` has been already registered as a parameter,
|
45
|
+
# this method removes it from the list of parameter names and re-registers it as a persistent value.
|
46
|
+
#
|
47
|
+
# @param [string] name Name of the attribute to be registered.
|
48
|
+
def register_persistent(name)
|
49
|
+
@persistent << name
|
50
|
+
@params.delete(name)
|
51
|
+
end
|
52
|
+
|
42
53
|
def params(include_uninit: true)
|
43
54
|
@params.map do |name|
|
44
55
|
data = self.instance_variable_get(name).data
|
@@ -0,0 +1,98 @@
|
|
1
|
+
module Chainer
|
2
|
+
module Links
|
3
|
+
module Connection
|
4
|
+
class Convolution2D < ::Chainer::Link
|
5
|
+
# Two-dimensional convolutional layer.
|
6
|
+
#
|
7
|
+
# This link wraps the :func:`chainer.functions.convolution_2d` function
|
8
|
+
# and holds the filter weight and bias vector as parameters.
|
9
|
+
#
|
10
|
+
# @param [integer or nil] in_channels Number of channels of input arrays.
|
11
|
+
# If `nil`, parameter initialization will be deferred until the first forward data pass at which time the size will be determined.
|
12
|
+
# @param [integer] out_channels Number of channels of output arrays.
|
13
|
+
# @param [integer or 2-d int array] ksize Size of filters (a.k.a. kernels).
|
14
|
+
# @param [integer or 2-d int array] stride Stride of filter applications.
|
15
|
+
# @param [integer or 2-d int array] pad Spatial padding width for input arrays.
|
16
|
+
# @param [boolean] nobias If `true`, then this link does not use the bias term.
|
17
|
+
# @param [Numo::NArray] initialW Initial weight value. If `nil`, the default initializer is used.
|
18
|
+
# @param [Numo::NArray] initial_bias Initial bias value. If `nil`, the bias is set to 0.
|
19
|
+
#
|
20
|
+
# Example
|
21
|
+
# There are several ways to make a Convolution2D link.
|
22
|
+
# Let an input vector `x` be:
|
23
|
+
# > x = Numo::DFloat.new(1, 3, 10, 10).seq
|
24
|
+
#
|
25
|
+
# 1. Give the first three arguments explicitly:
|
26
|
+
# > l = Chainer::Links::Connection::Convolution2D.new(3, 7, 5)
|
27
|
+
# > y = l.(x)
|
28
|
+
# > y.shape
|
29
|
+
# [1, 7, 6, 6]
|
30
|
+
#
|
31
|
+
# 2. Omit `in_channels` or fill it with `nil`:
|
32
|
+
# The below two cases are the same.
|
33
|
+
#
|
34
|
+
# > l = Chainer::Links::Connection::Convolution2D.new(7, 5)
|
35
|
+
# > y = l.(x)
|
36
|
+
# > y.shape
|
37
|
+
# [1, 7, 6, 6]
|
38
|
+
#
|
39
|
+
# > l = Chainer::Links::Connection::Convolution2D.new(nil, 7, 5)
|
40
|
+
# > y = l.(x)
|
41
|
+
# > y.shape
|
42
|
+
# [1, 7, 6, 6]
|
43
|
+
#
|
44
|
+
# When you omit the first argument, you need to specify the other subsequent arguments from `stride` as keyword auguments.
|
45
|
+
#
|
46
|
+
# > l = Chainer::Links::Connection::Convolution2D.new(7, 5, stride: 1, pad: 0)
|
47
|
+
# > y = l.(x)
|
48
|
+
# > y.shape
|
49
|
+
# [1, 7, 6, 6]
|
50
|
+
def initialize(in_channels, out_channels, ksize=nil, stride: 1, pad: 0, nobias: false, initial_w: nil, initial_bias: nil)
|
51
|
+
super()
|
52
|
+
if ksize.nil?
|
53
|
+
out_channels, ksize, in_channels = in_channels, out_channels, nil
|
54
|
+
end
|
55
|
+
|
56
|
+
@ksize = ksize
|
57
|
+
@stride = stride.is_a?(Array) ? stride : [stride, stride]
|
58
|
+
@pad = pad.is_a?(Array) ? pad : [pad, pad]
|
59
|
+
@out_channels = out_channels
|
60
|
+
|
61
|
+
init_scope do
|
62
|
+
w_initializer = Chainer::Initializers.get_initializer(initial_w)
|
63
|
+
@w = Chainer::Parameter.new(initializer: w_initializer)
|
64
|
+
if in_channels
|
65
|
+
initialize_params(in_channels)
|
66
|
+
end
|
67
|
+
|
68
|
+
if nobias
|
69
|
+
@b = nil
|
70
|
+
else
|
71
|
+
initial_bias = 0 if initial_bias.nil?
|
72
|
+
bias_initializer = Chainer::Initializers.get_initializer(initial_bias)
|
73
|
+
@b = Chainer::Parameter.new(initializer: bias_initializer, shape: out_channels)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Applies the convolution layer.
|
79
|
+
# @param [Chainer::Variable] x Input image.
|
80
|
+
# @return [Chainer::Variable] Output of the convolution.
|
81
|
+
def call(x)
|
82
|
+
initialize_params(x.shape[1]) if @w.data.nil?
|
83
|
+
Chainer::Functions::Connection::Convolution2DFunction.convolution_2d(x, @w, b: @b, stride: @stride, pad: @pad)
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
def initialize_params(in_channels)
|
89
|
+
kh, kw = @ksize.is_a?(Array) ? @ksize : [@ksize, @ksize]
|
90
|
+
w_shape = [@out_channels, in_channels, kh, kw]
|
91
|
+
@w.init(w_shape)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
|
@@ -0,0 +1,106 @@
|
|
1
|
+
module Chainer
|
2
|
+
module Links
|
3
|
+
module Normalization
|
4
|
+
class BatchNormalization < Chainer::Link
|
5
|
+
# Batch normalization layer on outputs of linear or convolution functions.
|
6
|
+
#
|
7
|
+
# It runs in three modes: training mode, fine-tuning mode, and testing mode.
|
8
|
+
# In training mode, it normalizes the input by *batch statistics*. It also
|
9
|
+
# maintains approximated population statistics by moving averages, which can
|
10
|
+
# be used for instant evaluation in testing mode.
|
11
|
+
#
|
12
|
+
# In fine-tuning mode, it accumulates the input to compute *population
|
13
|
+
# statistics*. In order to correctly compute the population statistics, a
|
14
|
+
# user must use this mode to feed mini-batches running through whole training dataset.
|
15
|
+
#
|
16
|
+
# In testing mode, it uses pre-computed population statistics to normalize the input variable.
|
17
|
+
# The population statistics is approximated if it is computed by training mode,
|
18
|
+
# or accurate if it is correctly computed by fine-tuning mode.
|
19
|
+
#
|
20
|
+
# @param [integer or int array] size Size (or shape) of channel dimensions.
|
21
|
+
# @param [float] decay Decay rate of moving average. It is used on training.
|
22
|
+
# @param [float] eps Epsilon value for numerical stability.
|
23
|
+
# @param [Numo::NArray.dtype] dtype Type to use in computing.
|
24
|
+
# @param [boolean] use_gamma If `true`, use scaling parameter. Otherwise, use unit(1) which makes no effect.
|
25
|
+
# @param [boolean] use_beta If `true`, use shifting parameter. Otherwise, use unit(0) which makes no effect.
|
26
|
+
def initialize(size, decay: 0.9, eps: 2e-5, dtype: Numo::DFloat, use_gamma: true, use_beta: true, initial_gamma: nil, initial_beta: nil)
|
27
|
+
super()
|
28
|
+
@avg_mean = dtype.zeros(size)
|
29
|
+
register_persistent('avg_mean')
|
30
|
+
@avg_var = dtype.zeros(size)
|
31
|
+
register_persistent('avg_var')
|
32
|
+
@n = 0
|
33
|
+
register_persistent('n')
|
34
|
+
@decay = decay
|
35
|
+
@eps = eps
|
36
|
+
|
37
|
+
init_scope do
|
38
|
+
if use_gamma
|
39
|
+
initial_gamma = 1 if initial_gamma.nil?
|
40
|
+
initial_gamma = Chainer::Initializers.get_initializer(initial_gamma)
|
41
|
+
initial_gamma.dtype = dtype
|
42
|
+
@gamma = Chainer::Parameter.new(initializer: initial_gamma, shape: size)
|
43
|
+
end
|
44
|
+
if use_beta
|
45
|
+
initial_beta = 0 if initial_beta.nil?
|
46
|
+
initial_beta = Chainer::Initializers.get_initializer(initial_beta)
|
47
|
+
initial_beta.dtype = dtype
|
48
|
+
@beta = Chainer::Parameter.new(initializer: initial_beta, shape: size)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Invokes the forward propagation of BatchNormalization.
|
54
|
+
# In training mode, the BatchNormalization computes moving averages of
|
55
|
+
# mean and variance for evaluatino during training, and normalizes the input using batch statistics.
|
56
|
+
# @param [Chainer::Variable] x Input variable.
|
57
|
+
# @param [boolean] finetune If it is in the training mode and `finetune` is `True`,
|
58
|
+
# BatchNormalization runs in fine-tuning mode;
|
59
|
+
# it accumulates the input array to compute population statistics for normalization,
|
60
|
+
# and normalizes the input using batch statistics.
|
61
|
+
def call(x, finetune: false)
|
62
|
+
if self.instance_variable_defined?(:@gamma)
|
63
|
+
gamma = @gamma
|
64
|
+
else
|
65
|
+
gamma = Chainer::Variable.new(x.data.class.ones(@avg_mean.shape))
|
66
|
+
end
|
67
|
+
|
68
|
+
if self.instance_variable_defined?(:@beta)
|
69
|
+
beta = @beta
|
70
|
+
else
|
71
|
+
beta = Chainer::Variable.new(x.data.class.zeros(*@avg_mean.shape))
|
72
|
+
end
|
73
|
+
|
74
|
+
if Chainer.configuration.train
|
75
|
+
if finetune
|
76
|
+
@n += 1
|
77
|
+
decay = 1.0 - 1.0 / @n
|
78
|
+
else
|
79
|
+
decay = @decay
|
80
|
+
end
|
81
|
+
|
82
|
+
func = Chainer::Functions::Normalization::BatchNormalizationFunction.new(eps: @eps, mean: @avg_mean, var: @avg_var, decay: decay)
|
83
|
+
ret = func.(x, gamma, beta)
|
84
|
+
|
85
|
+
@avg_mean[false] = func.running_mean
|
86
|
+
@avg_var[false] = func.running_var
|
87
|
+
else
|
88
|
+
mean = Chainer::Variable(@avg_mean)
|
89
|
+
var = Chainer::Variable(@avg_var)
|
90
|
+
ret = Chainer::Functions::Normalization::BatchNormalizationFunction.fixed_batch_normalization(x, gamma, beta, mean, var, eps: @eps)
|
91
|
+
end
|
92
|
+
|
93
|
+
ret
|
94
|
+
end
|
95
|
+
|
96
|
+
# Resets the population count for collecting population statistics.
|
97
|
+
# This method can be skipped if it is the first time to use the fine-tuning mode.
|
98
|
+
# Otherwise, this method should be called before starting the fine-tuning mode again.
|
99
|
+
def start_finetuning
|
100
|
+
@n = 0
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
data/lib/chainer/optimizer.rb
CHANGED
@@ -81,7 +81,7 @@ module Chainer
|
|
81
81
|
# try to initialize the state to retrieve state entries
|
82
82
|
@state = {}
|
83
83
|
self_copy = self.dup
|
84
|
-
arr = Numo::
|
84
|
+
arr = Numo::SFloat.new(1)
|
85
85
|
self_copy.init_state(Chainer::Variable.new(arr, grad: arr))
|
86
86
|
@state.keys.each do |key|
|
87
87
|
@state[key] = serializer.(key.to_s, nil)
|
@@ -104,4 +104,43 @@ module Chainer
|
|
104
104
|
@state.select! { |_, v| v.kind_of?(Numo::NArray) }
|
105
105
|
end
|
106
106
|
end
|
107
|
+
|
108
|
+
class HyperparameterProxy
|
109
|
+
def initialize(obj, attr_name)
|
110
|
+
obj.class.class_eval do
|
111
|
+
obj.class.send(:define_method, attr_name) do
|
112
|
+
self.instance_variable_get(:@hyperparam).instance_variable_get("@#{attr_name}")
|
113
|
+
end
|
114
|
+
|
115
|
+
obj.class.send(:define_method, "#{attr_name}=") do |val|
|
116
|
+
self.instance_variable_get(:@hyperparam).instance_variable_set("@#{attr_name}", val)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Optimizer/UpdateRule hook function for weight decay regularization
|
123
|
+
#
|
124
|
+
# This hook function adds a scaled parameter to the correspondeing gradient
|
125
|
+
# It can be used as a regularization
|
126
|
+
#
|
127
|
+
# @param [Float] rate Coefficient for the weight decay
|
128
|
+
class WeightDecay
|
129
|
+
def self.name
|
130
|
+
"WeightDecay"
|
131
|
+
end
|
132
|
+
|
133
|
+
def self.call_for_each_param
|
134
|
+
true
|
135
|
+
end
|
136
|
+
|
137
|
+
def initialize(rate)
|
138
|
+
@rate = rate
|
139
|
+
end
|
140
|
+
|
141
|
+
def call(rule, param)
|
142
|
+
return if param.data.nil? || param.grad.nil?
|
143
|
+
param.grad += @rate * param.data
|
144
|
+
end
|
145
|
+
end
|
107
146
|
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Chainer
|
2
|
+
module Optimizers
|
3
|
+
# Update rule for the classical momentum SGD
|
4
|
+
class MomentumSGDRule < UpdateRule
|
5
|
+
def initialize(parent_hyperparam: nil, lr: nil, mementum: nil)
|
6
|
+
hyperparam = Hyperparameter.new
|
7
|
+
hyperparam.instance_variable_set('@lr', 0.01)
|
8
|
+
hyperparam.instance_variable_set('@momentum', 0.9)
|
9
|
+
|
10
|
+
super(parent_hyperparam: parent_hyperparam || hyperparam)
|
11
|
+
|
12
|
+
@hyperparam.instance_variable_set('@lr', lr) if lr
|
13
|
+
@hyperparam.instance_variable_set('@mementum', mementum) if mementum
|
14
|
+
end
|
15
|
+
|
16
|
+
def init_state(param)
|
17
|
+
@state[:v] = param.data.new_zeros
|
18
|
+
end
|
19
|
+
|
20
|
+
def update_core_cpu(param)
|
21
|
+
grad = param.grad
|
22
|
+
return if grad.nil?
|
23
|
+
|
24
|
+
v = @state[:v]
|
25
|
+
v *= @hyperparam.momentum
|
26
|
+
v -= @hyperparam.lr * grad
|
27
|
+
param.data += v
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Momentum SGD optimizer
|
32
|
+
class MomentumSGD < GradientMethod
|
33
|
+
attr_accessor :lr, :momentum
|
34
|
+
# @param [Float] lr Learning rate
|
35
|
+
# @param [Float] momentum Exponential decay rate of the first order moment
|
36
|
+
def initialize(lr: nil, momentum: nil)
|
37
|
+
super()
|
38
|
+
@hyperparam.instance_variable_set('@lr', lr || 0.01)
|
39
|
+
@hyperparam.instance_variable_set('@momentum', momentum || 0.9)
|
40
|
+
Chainer::HyperparameterProxy.new(self, "lr")
|
41
|
+
Chainer::HyperparameterProxy.new(self, "momentum")
|
42
|
+
end
|
43
|
+
|
44
|
+
def create_update_rule
|
45
|
+
MomentumSGDRule.new(parent_hyperparam: @hyperparam)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/lib/chainer/parameter.rb
CHANGED
@@ -15,7 +15,7 @@ module Chainer
|
|
15
15
|
else
|
16
16
|
super(name: name)
|
17
17
|
@initializer = initializer
|
18
|
-
dtype = initializer.respond_to?(:dtype) ? initializer.dtype : '
|
18
|
+
dtype = initializer.respond_to?(:dtype) ? initializer.dtype : 'SFloat'
|
19
19
|
@grad_initializer = Chainer::Initializers.nan()
|
20
20
|
end
|
21
21
|
else
|
@@ -3,10 +3,14 @@ module Chainer
|
|
3
3
|
class MarshalSerializer < Chainer::Serializer
|
4
4
|
attr_accessor :target, :path
|
5
5
|
|
6
|
-
|
6
|
+
# @param [string] file_path Target file path
|
7
|
+
# @param [Object] obj Object to be serialized
|
8
|
+
def self.save_file(file_path, obj)
|
7
9
|
s = self.new
|
8
10
|
s.save(obj)
|
9
|
-
|
11
|
+
File.open(file_path, 'wb') do |f|
|
12
|
+
Marshal.dump(s.target, f)
|
13
|
+
end
|
10
14
|
end
|
11
15
|
|
12
16
|
def initialize(target: nil, path: "")
|
@@ -24,7 +28,7 @@ module Chainer
|
|
24
28
|
arr = Numo::Bit[1]
|
25
29
|
elsif value.is_a?(FalseClass)
|
26
30
|
arr = Numo::Bit[0]
|
27
|
-
elsif value.instance_of?(String)
|
31
|
+
elsif value.instance_of?(String) || value.nil?
|
28
32
|
arr = value
|
29
33
|
else
|
30
34
|
arr = Numo::NArray.cast(value)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Chainer
|
4
|
+
module Testing
|
5
|
+
def assert_allclose(expect, actual, atol: 1e-5, rtol: 1e-4)
|
6
|
+
# Asserts if some corresponding element of x and y differs too much.
|
7
|
+
#
|
8
|
+
# This function can handle both CPU and GPU arrays simultaneously.
|
9
|
+
#
|
10
|
+
# Args:
|
11
|
+
# expect: Left-hand-side array.
|
12
|
+
# actual: Right-hand-side array.
|
13
|
+
# atol (float): Absolute tolerance.
|
14
|
+
# rtol (float): Relative tolerance.
|
15
|
+
#
|
16
|
+
expect = Utils::Array.force_array(expect)
|
17
|
+
actual = Utils::Array.force_array(actual)
|
18
|
+
|
19
|
+
# If the expected is 0-dim arrary, extend the dimension to the actual.
|
20
|
+
if (expect.shape != actual.shape) and (expect.ndim == 0)
|
21
|
+
expect = actual.class.new(actual.shape).fill(expect.to_f)
|
22
|
+
end
|
23
|
+
|
24
|
+
actual.each_with_index{|actual_val, *i|
|
25
|
+
if (expect[*i].to_f - actual_val.to_f).abs > atol + rtol * expect[*i].abs
|
26
|
+
raise "assert_allclose Error\n expect: #{expect.inspect}\n actual : #{actual.inspect}\n (#{i})=> #{(expect - actual).abs.max()} > #{atol + rtol * expect[*i].abs}"
|
27
|
+
end
|
28
|
+
}
|
29
|
+
end
|
30
|
+
module_function :assert_allclose
|
31
|
+
end
|
32
|
+
end
|