red-chainer 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/examples/cifar/models/vgg.rb +84 -0
- data/examples/cifar/train_cifar.rb +70 -0
- data/examples/iris.rb +103 -0
- data/lib/chainer.rb +17 -0
- data/lib/chainer/configuration.rb +2 -1
- data/lib/chainer/cuda.rb +18 -0
- data/lib/chainer/dataset/convert.rb +30 -9
- data/lib/chainer/datasets/cifar.rb +56 -0
- data/lib/chainer/datasets/mnist.rb +3 -3
- data/lib/chainer/datasets/tuple_dataset.rb +3 -1
- data/lib/chainer/function.rb +1 -0
- data/lib/chainer/functions/activation/leaky_relu.rb +4 -4
- data/lib/chainer/functions/activation/log_softmax.rb +4 -4
- data/lib/chainer/functions/activation/relu.rb +3 -4
- data/lib/chainer/functions/activation/sigmoid.rb +4 -4
- data/lib/chainer/functions/activation/tanh.rb +5 -5
- data/lib/chainer/functions/connection/convolution_2d.rb +92 -0
- data/lib/chainer/functions/connection/linear.rb +1 -1
- data/lib/chainer/functions/loss/mean_squared_error.rb +34 -0
- data/lib/chainer/functions/loss/softmax_cross_entropy.rb +67 -40
- data/lib/chainer/functions/math/identity.rb +26 -0
- data/lib/chainer/functions/noise/dropout.rb +45 -0
- data/lib/chainer/functions/normalization/batch_normalization.rb +136 -0
- data/lib/chainer/functions/pooling/max_pooling_2d.rb +57 -0
- data/lib/chainer/functions/pooling/pooling_2d.rb +20 -0
- data/lib/chainer/gradient_check.rb +240 -0
- data/lib/chainer/initializer.rb +2 -0
- data/lib/chainer/initializers/constant.rb +1 -1
- data/lib/chainer/initializers/init.rb +5 -1
- data/lib/chainer/initializers/normal.rb +1 -1
- data/lib/chainer/iterators/serial_iterator.rb +1 -1
- data/lib/chainer/link.rb +11 -0
- data/lib/chainer/links/connection/convolution_2d.rb +98 -0
- data/lib/chainer/links/normalization/batch_normalization.rb +106 -0
- data/lib/chainer/optimizer.rb +40 -1
- data/lib/chainer/optimizers/momentum_sgd.rb +49 -0
- data/lib/chainer/parameter.rb +1 -1
- data/lib/chainer/serializers/marshal.rb +7 -3
- data/lib/chainer/testing/array.rb +32 -0
- data/lib/chainer/training/extensions/exponential_shift.rb +78 -0
- data/lib/chainer/training/extensions/snapshot.rb +1 -1
- data/lib/chainer/training/standard_updater.rb +4 -0
- data/lib/chainer/training/trainer.rb +1 -1
- data/lib/chainer/utils/array.rb +13 -2
- data/lib/chainer/utils/conv.rb +59 -0
- data/lib/chainer/utils/math.rb +72 -0
- data/lib/chainer/utils/variable.rb +7 -3
- data/lib/chainer/version.rb +1 -1
- data/red-chainer.gemspec +1 -0
- metadata +37 -3
data/lib/chainer/initializer.rb
CHANGED
@@ -1,7 +1,11 @@
|
|
1
1
|
module Chainer
|
2
2
|
module Initializers
|
3
3
|
def self.generate_array(initializer, shape)
|
4
|
-
|
4
|
+
klass = Numo::SFloat
|
5
|
+
if initializer.respond_to?(:dtype) && initializer.dtype
|
6
|
+
klass = initializer.dtype
|
7
|
+
end
|
8
|
+
array = klass.new(shape).rand
|
5
9
|
initializer.(array)
|
6
10
|
end
|
7
11
|
|
data/lib/chainer/link.rb
CHANGED
@@ -39,6 +39,17 @@ module Chainer
|
|
39
39
|
end
|
40
40
|
end
|
41
41
|
|
42
|
+
# Registers an attribute of a given name as a persistent value.
|
43
|
+
# This is a convenient method to register an existing attribute as a persistent value.
|
44
|
+
# If `name` has been already registered as a parameter,
|
45
|
+
# this method removes it from the list of parameter names and re-registers it as a persistent value.
|
46
|
+
#
|
47
|
+
# @param [string] name Name of the attribute to be registered.
|
48
|
+
def register_persistent(name)
|
49
|
+
@persistent << name
|
50
|
+
@params.delete(name)
|
51
|
+
end
|
52
|
+
|
42
53
|
def params(include_uninit: true)
|
43
54
|
@params.map do |name|
|
44
55
|
data = self.instance_variable_get(name).data
|
@@ -0,0 +1,98 @@
|
|
1
|
+
module Chainer
|
2
|
+
module Links
|
3
|
+
module Connection
|
4
|
+
class Convolution2D < ::Chainer::Link
|
5
|
+
# Two-dimensional convolutional layer.
|
6
|
+
#
|
7
|
+
# This link wraps the :func:`chainer.functions.convolution_2d` function
|
8
|
+
# and holds the filter weight and bias vector as parameters.
|
9
|
+
#
|
10
|
+
# @param [integer or nil] in_channels Number of channels of input arrays.
|
11
|
+
# If `nil`, parameter initialization will be deferred until the first forward data pass at which time the size will be determined.
|
12
|
+
# @param [integer] out_channels Number of channels of output arrays.
|
13
|
+
# @param [integer or 2-d int array] ksize Size of filters (a.k.a. kernels).
|
14
|
+
# @param [integer or 2-d int array] stride Stride of filter applications.
|
15
|
+
# @param [integer or 2-d int array] pad Spatial padding width for input arrays.
|
16
|
+
# @param [boolean] nobias If `true`, then this link does not use the bias term.
|
17
|
+
# @param [Numo::NArray] initialW Initial weight value. If `nil`, the default initializer is used.
|
18
|
+
# @param [Numo::NArray] initial_bias Initial bias value. If `nil`, the bias is set to 0.
|
19
|
+
#
|
20
|
+
# Example
|
21
|
+
# There are several ways to make a Convolution2D link.
|
22
|
+
# Let an input vector `x` be:
|
23
|
+
# > x = Numo::DFloat.new(1, 3, 10, 10).seq
|
24
|
+
#
|
25
|
+
# 1. Give the first three arguments explicitly:
|
26
|
+
# > l = Chainer::Links::Connection::Convolution2D.new(3, 7, 5)
|
27
|
+
# > y = l.(x)
|
28
|
+
# > y.shape
|
29
|
+
# [1, 7, 6, 6]
|
30
|
+
#
|
31
|
+
# 2. Omit `in_channels` or fill it with `nil`:
|
32
|
+
# The below two cases are the same.
|
33
|
+
#
|
34
|
+
# > l = Chainer::Links::Connection::Convolution2D.new(7, 5)
|
35
|
+
# > y = l.(x)
|
36
|
+
# > y.shape
|
37
|
+
# [1, 7, 6, 6]
|
38
|
+
#
|
39
|
+
# > l = Chainer::Links::Connection::Convolution2D.new(nil, 7, 5)
|
40
|
+
# > y = l.(x)
|
41
|
+
# > y.shape
|
42
|
+
# [1, 7, 6, 6]
|
43
|
+
#
|
44
|
+
# When you omit the first argument, you need to specify the other subsequent arguments from `stride` as keyword auguments.
|
45
|
+
#
|
46
|
+
# > l = Chainer::Links::Connection::Convolution2D.new(7, 5, stride: 1, pad: 0)
|
47
|
+
# > y = l.(x)
|
48
|
+
# > y.shape
|
49
|
+
# [1, 7, 6, 6]
|
50
|
+
def initialize(in_channels, out_channels, ksize=nil, stride: 1, pad: 0, nobias: false, initial_w: nil, initial_bias: nil)
|
51
|
+
super()
|
52
|
+
if ksize.nil?
|
53
|
+
out_channels, ksize, in_channels = in_channels, out_channels, nil
|
54
|
+
end
|
55
|
+
|
56
|
+
@ksize = ksize
|
57
|
+
@stride = stride.is_a?(Array) ? stride : [stride, stride]
|
58
|
+
@pad = pad.is_a?(Array) ? pad : [pad, pad]
|
59
|
+
@out_channels = out_channels
|
60
|
+
|
61
|
+
init_scope do
|
62
|
+
w_initializer = Chainer::Initializers.get_initializer(initial_w)
|
63
|
+
@w = Chainer::Parameter.new(initializer: w_initializer)
|
64
|
+
if in_channels
|
65
|
+
initialize_params(in_channels)
|
66
|
+
end
|
67
|
+
|
68
|
+
if nobias
|
69
|
+
@b = nil
|
70
|
+
else
|
71
|
+
initial_bias = 0 if initial_bias.nil?
|
72
|
+
bias_initializer = Chainer::Initializers.get_initializer(initial_bias)
|
73
|
+
@b = Chainer::Parameter.new(initializer: bias_initializer, shape: out_channels)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Applies the convolution layer.
|
79
|
+
# @param [Chainer::Variable] x Input image.
|
80
|
+
# @return [Chainer::Variable] Output of the convolution.
|
81
|
+
def call(x)
|
82
|
+
initialize_params(x.shape[1]) if @w.data.nil?
|
83
|
+
Chainer::Functions::Connection::Convolution2DFunction.convolution_2d(x, @w, b: @b, stride: @stride, pad: @pad)
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
def initialize_params(in_channels)
|
89
|
+
kh, kw = @ksize.is_a?(Array) ? @ksize : [@ksize, @ksize]
|
90
|
+
w_shape = [@out_channels, in_channels, kh, kw]
|
91
|
+
@w.init(w_shape)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
|
@@ -0,0 +1,106 @@
|
|
1
|
+
module Chainer
|
2
|
+
module Links
|
3
|
+
module Normalization
|
4
|
+
class BatchNormalization < Chainer::Link
|
5
|
+
# Batch normalization layer on outputs of linear or convolution functions.
|
6
|
+
#
|
7
|
+
# It runs in three modes: training mode, fine-tuning mode, and testing mode.
|
8
|
+
# In training mode, it normalizes the input by *batch statistics*. It also
|
9
|
+
# maintains approximated population statistics by moving averages, which can
|
10
|
+
# be used for instant evaluation in testing mode.
|
11
|
+
#
|
12
|
+
# In fine-tuning mode, it accumulates the input to compute *population
|
13
|
+
# statistics*. In order to correctly compute the population statistics, a
|
14
|
+
# user must use this mode to feed mini-batches running through whole training dataset.
|
15
|
+
#
|
16
|
+
# In testing mode, it uses pre-computed population statistics to normalize the input variable.
|
17
|
+
# The population statistics is approximated if it is computed by training mode,
|
18
|
+
# or accurate if it is correctly computed by fine-tuning mode.
|
19
|
+
#
|
20
|
+
# @param [integer or int array] size Size (or shape) of channel dimensions.
|
21
|
+
# @param [float] decay Decay rate of moving average. It is used on training.
|
22
|
+
# @param [float] eps Epsilon value for numerical stability.
|
23
|
+
# @param [Numo::NArray.dtype] dtype Type to use in computing.
|
24
|
+
# @param [boolean] use_gamma If `true`, use scaling parameter. Otherwise, use unit(1) which makes no effect.
|
25
|
+
# @param [boolean] use_beta If `true`, use shifting parameter. Otherwise, use unit(0) which makes no effect.
|
26
|
+
def initialize(size, decay: 0.9, eps: 2e-5, dtype: Numo::DFloat, use_gamma: true, use_beta: true, initial_gamma: nil, initial_beta: nil)
|
27
|
+
super()
|
28
|
+
@avg_mean = dtype.zeros(size)
|
29
|
+
register_persistent('avg_mean')
|
30
|
+
@avg_var = dtype.zeros(size)
|
31
|
+
register_persistent('avg_var')
|
32
|
+
@n = 0
|
33
|
+
register_persistent('n')
|
34
|
+
@decay = decay
|
35
|
+
@eps = eps
|
36
|
+
|
37
|
+
init_scope do
|
38
|
+
if use_gamma
|
39
|
+
initial_gamma = 1 if initial_gamma.nil?
|
40
|
+
initial_gamma = Chainer::Initializers.get_initializer(initial_gamma)
|
41
|
+
initial_gamma.dtype = dtype
|
42
|
+
@gamma = Chainer::Parameter.new(initializer: initial_gamma, shape: size)
|
43
|
+
end
|
44
|
+
if use_beta
|
45
|
+
initial_beta = 0 if initial_beta.nil?
|
46
|
+
initial_beta = Chainer::Initializers.get_initializer(initial_beta)
|
47
|
+
initial_beta.dtype = dtype
|
48
|
+
@beta = Chainer::Parameter.new(initializer: initial_beta, shape: size)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Invokes the forward propagation of BatchNormalization.
|
54
|
+
# In training mode, the BatchNormalization computes moving averages of
|
55
|
+
# mean and variance for evaluatino during training, and normalizes the input using batch statistics.
|
56
|
+
# @param [Chainer::Variable] x Input variable.
|
57
|
+
# @param [boolean] finetune If it is in the training mode and `finetune` is `True`,
|
58
|
+
# BatchNormalization runs in fine-tuning mode;
|
59
|
+
# it accumulates the input array to compute population statistics for normalization,
|
60
|
+
# and normalizes the input using batch statistics.
|
61
|
+
def call(x, finetune: false)
|
62
|
+
if self.instance_variable_defined?(:@gamma)
|
63
|
+
gamma = @gamma
|
64
|
+
else
|
65
|
+
gamma = Chainer::Variable.new(x.data.class.ones(@avg_mean.shape))
|
66
|
+
end
|
67
|
+
|
68
|
+
if self.instance_variable_defined?(:@beta)
|
69
|
+
beta = @beta
|
70
|
+
else
|
71
|
+
beta = Chainer::Variable.new(x.data.class.zeros(*@avg_mean.shape))
|
72
|
+
end
|
73
|
+
|
74
|
+
if Chainer.configuration.train
|
75
|
+
if finetune
|
76
|
+
@n += 1
|
77
|
+
decay = 1.0 - 1.0 / @n
|
78
|
+
else
|
79
|
+
decay = @decay
|
80
|
+
end
|
81
|
+
|
82
|
+
func = Chainer::Functions::Normalization::BatchNormalizationFunction.new(eps: @eps, mean: @avg_mean, var: @avg_var, decay: decay)
|
83
|
+
ret = func.(x, gamma, beta)
|
84
|
+
|
85
|
+
@avg_mean[false] = func.running_mean
|
86
|
+
@avg_var[false] = func.running_var
|
87
|
+
else
|
88
|
+
mean = Chainer::Variable(@avg_mean)
|
89
|
+
var = Chainer::Variable(@avg_var)
|
90
|
+
ret = Chainer::Functions::Normalization::BatchNormalizationFunction.fixed_batch_normalization(x, gamma, beta, mean, var, eps: @eps)
|
91
|
+
end
|
92
|
+
|
93
|
+
ret
|
94
|
+
end
|
95
|
+
|
96
|
+
# Resets the population count for collecting population statistics.
|
97
|
+
# This method can be skipped if it is the first time to use the fine-tuning mode.
|
98
|
+
# Otherwise, this method should be called before starting the fine-tuning mode again.
|
99
|
+
def start_finetuning
|
100
|
+
@n = 0
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
data/lib/chainer/optimizer.rb
CHANGED
@@ -81,7 +81,7 @@ module Chainer
|
|
81
81
|
# try to initialize the state to retrieve state entries
|
82
82
|
@state = {}
|
83
83
|
self_copy = self.dup
|
84
|
-
arr = Numo::
|
84
|
+
arr = Numo::SFloat.new(1)
|
85
85
|
self_copy.init_state(Chainer::Variable.new(arr, grad: arr))
|
86
86
|
@state.keys.each do |key|
|
87
87
|
@state[key] = serializer.(key.to_s, nil)
|
@@ -104,4 +104,43 @@ module Chainer
|
|
104
104
|
@state.select! { |_, v| v.kind_of?(Numo::NArray) }
|
105
105
|
end
|
106
106
|
end
|
107
|
+
|
108
|
+
class HyperparameterProxy
|
109
|
+
def initialize(obj, attr_name)
|
110
|
+
obj.class.class_eval do
|
111
|
+
obj.class.send(:define_method, attr_name) do
|
112
|
+
self.instance_variable_get(:@hyperparam).instance_variable_get("@#{attr_name}")
|
113
|
+
end
|
114
|
+
|
115
|
+
obj.class.send(:define_method, "#{attr_name}=") do |val|
|
116
|
+
self.instance_variable_get(:@hyperparam).instance_variable_set("@#{attr_name}", val)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Optimizer/UpdateRule hook function for weight decay regularization
|
123
|
+
#
|
124
|
+
# This hook function adds a scaled parameter to the correspondeing gradient
|
125
|
+
# It can be used as a regularization
|
126
|
+
#
|
127
|
+
# @param [Float] rate Coefficient for the weight decay
|
128
|
+
class WeightDecay
|
129
|
+
def self.name
|
130
|
+
"WeightDecay"
|
131
|
+
end
|
132
|
+
|
133
|
+
def self.call_for_each_param
|
134
|
+
true
|
135
|
+
end
|
136
|
+
|
137
|
+
def initialize(rate)
|
138
|
+
@rate = rate
|
139
|
+
end
|
140
|
+
|
141
|
+
def call(rule, param)
|
142
|
+
return if param.data.nil? || param.grad.nil?
|
143
|
+
param.grad += @rate * param.data
|
144
|
+
end
|
145
|
+
end
|
107
146
|
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Chainer
|
2
|
+
module Optimizers
|
3
|
+
# Update rule for the classical momentum SGD
|
4
|
+
class MomentumSGDRule < UpdateRule
|
5
|
+
def initialize(parent_hyperparam: nil, lr: nil, mementum: nil)
|
6
|
+
hyperparam = Hyperparameter.new
|
7
|
+
hyperparam.instance_variable_set('@lr', 0.01)
|
8
|
+
hyperparam.instance_variable_set('@momentum', 0.9)
|
9
|
+
|
10
|
+
super(parent_hyperparam: parent_hyperparam || hyperparam)
|
11
|
+
|
12
|
+
@hyperparam.instance_variable_set('@lr', lr) if lr
|
13
|
+
@hyperparam.instance_variable_set('@mementum', mementum) if mementum
|
14
|
+
end
|
15
|
+
|
16
|
+
def init_state(param)
|
17
|
+
@state[:v] = param.data.new_zeros
|
18
|
+
end
|
19
|
+
|
20
|
+
def update_core_cpu(param)
|
21
|
+
grad = param.grad
|
22
|
+
return if grad.nil?
|
23
|
+
|
24
|
+
v = @state[:v]
|
25
|
+
v *= @hyperparam.momentum
|
26
|
+
v -= @hyperparam.lr * grad
|
27
|
+
param.data += v
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Momentum SGD optimizer
|
32
|
+
class MomentumSGD < GradientMethod
|
33
|
+
attr_accessor :lr, :momentum
|
34
|
+
# @param [Float] lr Learning rate
|
35
|
+
# @param [Float] momentum Exponential decay rate of the first order moment
|
36
|
+
def initialize(lr: nil, momentum: nil)
|
37
|
+
super()
|
38
|
+
@hyperparam.instance_variable_set('@lr', lr || 0.01)
|
39
|
+
@hyperparam.instance_variable_set('@momentum', momentum || 0.9)
|
40
|
+
Chainer::HyperparameterProxy.new(self, "lr")
|
41
|
+
Chainer::HyperparameterProxy.new(self, "momentum")
|
42
|
+
end
|
43
|
+
|
44
|
+
def create_update_rule
|
45
|
+
MomentumSGDRule.new(parent_hyperparam: @hyperparam)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/lib/chainer/parameter.rb
CHANGED
@@ -15,7 +15,7 @@ module Chainer
|
|
15
15
|
else
|
16
16
|
super(name: name)
|
17
17
|
@initializer = initializer
|
18
|
-
dtype = initializer.respond_to?(:dtype) ? initializer.dtype : '
|
18
|
+
dtype = initializer.respond_to?(:dtype) ? initializer.dtype : 'SFloat'
|
19
19
|
@grad_initializer = Chainer::Initializers.nan()
|
20
20
|
end
|
21
21
|
else
|
@@ -3,10 +3,14 @@ module Chainer
|
|
3
3
|
class MarshalSerializer < Chainer::Serializer
|
4
4
|
attr_accessor :target, :path
|
5
5
|
|
6
|
-
|
6
|
+
# @param [string] file_path Target file path
|
7
|
+
# @param [Object] obj Object to be serialized
|
8
|
+
def self.save_file(file_path, obj)
|
7
9
|
s = self.new
|
8
10
|
s.save(obj)
|
9
|
-
|
11
|
+
File.open(file_path, 'wb') do |f|
|
12
|
+
Marshal.dump(s.target, f)
|
13
|
+
end
|
10
14
|
end
|
11
15
|
|
12
16
|
def initialize(target: nil, path: "")
|
@@ -24,7 +28,7 @@ module Chainer
|
|
24
28
|
arr = Numo::Bit[1]
|
25
29
|
elsif value.is_a?(FalseClass)
|
26
30
|
arr = Numo::Bit[0]
|
27
|
-
elsif value.instance_of?(String)
|
31
|
+
elsif value.instance_of?(String) || value.nil?
|
28
32
|
arr = value
|
29
33
|
else
|
30
34
|
arr = Numo::NArray.cast(value)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Chainer
|
4
|
+
module Testing
|
5
|
+
def assert_allclose(expect, actual, atol: 1e-5, rtol: 1e-4)
|
6
|
+
# Asserts if some corresponding element of x and y differs too much.
|
7
|
+
#
|
8
|
+
# This function can handle both CPU and GPU arrays simultaneously.
|
9
|
+
#
|
10
|
+
# Args:
|
11
|
+
# expect: Left-hand-side array.
|
12
|
+
# actual: Right-hand-side array.
|
13
|
+
# atol (float): Absolute tolerance.
|
14
|
+
# rtol (float): Relative tolerance.
|
15
|
+
#
|
16
|
+
expect = Utils::Array.force_array(expect)
|
17
|
+
actual = Utils::Array.force_array(actual)
|
18
|
+
|
19
|
+
# If the expected is 0-dim arrary, extend the dimension to the actual.
|
20
|
+
if (expect.shape != actual.shape) and (expect.ndim == 0)
|
21
|
+
expect = actual.class.new(actual.shape).fill(expect.to_f)
|
22
|
+
end
|
23
|
+
|
24
|
+
actual.each_with_index{|actual_val, *i|
|
25
|
+
if (expect[*i].to_f - actual_val.to_f).abs > atol + rtol * expect[*i].abs
|
26
|
+
raise "assert_allclose Error\n expect: #{expect.inspect}\n actual : #{actual.inspect}\n (#{i})=> #{(expect - actual).abs.max()} > #{atol + rtol * expect[*i].abs}"
|
27
|
+
end
|
28
|
+
}
|
29
|
+
end
|
30
|
+
module_function :assert_allclose
|
31
|
+
end
|
32
|
+
end
|