ruby-dnn 0.9.4 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +39 -3
- data/Rakefile +6 -0
- data/examples/cifar100_example.rb +71 -0
- data/examples/cifar10_example.rb +2 -1
- data/examples/iris_example.rb +2 -1
- data/examples/mnist_conv2d_example.rb +2 -1
- data/examples/mnist_example.rb +2 -3
- data/examples/mnist_lstm_example.rb +2 -1
- data/ext/cifar_loader/cifar_loader.c +77 -0
- data/ext/cifar_loader/extconf.rb +3 -0
- data/lib/dnn.rb +1 -0
- data/lib/dnn/{lib/cifar10.rb → cifar10.rb} +9 -11
- data/lib/dnn/cifar100.rb +49 -0
- data/lib/dnn/core/activations.rb +28 -24
- data/lib/dnn/core/cnn_layers.rb +216 -94
- data/lib/dnn/core/dataset.rb +21 -5
- data/lib/dnn/core/initializers.rb +3 -3
- data/lib/dnn/core/layers.rb +81 -150
- data/lib/dnn/core/losses.rb +88 -49
- data/lib/dnn/core/model.rb +97 -74
- data/lib/dnn/core/normalizations.rb +72 -0
- data/lib/dnn/core/optimizers.rb +171 -78
- data/lib/dnn/core/regularizers.rb +92 -22
- data/lib/dnn/core/rnn_layers.rb +146 -121
- data/lib/dnn/core/utils.rb +4 -3
- data/lib/dnn/{lib/downloader.rb → downloader.rb} +5 -1
- data/lib/dnn/{lib/image.rb → image.rb} +1 -1
- data/lib/dnn/{lib/iris.rb → iris.rb} +1 -1
- data/lib/dnn/{lib/mnist.rb → mnist.rb} +4 -3
- data/lib/dnn/version.rb +1 -1
- data/ruby-dnn.gemspec +1 -1
- metadata +13 -12
- data/API-Reference.ja.md +0 -978
- data/LIB-API-Reference.ja.md +0 -97
- data/ext/cifar10_loader/cifar10_loader.c +0 -44
- data/ext/cifar10_loader/extconf.rb +0 -3
data/lib/dnn/core/model.rb
CHANGED
@@ -22,14 +22,14 @@ module DNN
|
|
22
22
|
# @return [DNN::Model]
|
23
23
|
def self.load_json(json_str)
|
24
24
|
hash = JSON.parse(json_str, symbolize_names: true)
|
25
|
-
model = self.
|
26
|
-
model.compile(Utils.
|
25
|
+
model = self.from_hash(hash)
|
26
|
+
model.compile(Utils.from_hash(hash[:optimizer]), Utils.from_hash(hash[:loss]))
|
27
27
|
model
|
28
28
|
end
|
29
29
|
|
30
|
-
def self.
|
30
|
+
def self.from_hash(hash)
|
31
31
|
model = self.new
|
32
|
-
model.layers = hash[:layers].map { |hash_layer| Utils.
|
32
|
+
model.layers = hash[:layers].map { |hash_layer| Utils.from_hash(hash_layer) }
|
33
33
|
model
|
34
34
|
end
|
35
35
|
|
@@ -97,63 +97,61 @@ module DNN
|
|
97
97
|
# @param [DNN::Layers::Layer] layer Layer to add to the model.
|
98
98
|
# @return [DNN::Model] return self.
|
99
99
|
def <<(layer)
|
100
|
-
|
101
|
-
|
102
|
-
# raise TypeError.new("layer is not an instance of the DNN::Layers::Layer class or DNN::Model class.")
|
103
|
-
# end
|
104
|
-
unless layer.is_a?(Layers::Layer)
|
105
|
-
raise TypeError.new("layer:#{layer.class.name} is not an instance of the DNN::Layers::Layer class.")
|
100
|
+
if !layer.is_a?(Layers::Layer) && !layer.is_a?(Model)
|
101
|
+
raise TypeError.new("layer is not an instance of the DNN::Layers::Layer class or DNN::Model class.")
|
106
102
|
end
|
107
103
|
@layers << layer
|
108
104
|
self
|
109
105
|
end
|
110
106
|
|
111
|
-
# Set optimizer and
|
107
|
+
# Set optimizer and loss_func to model and build all layers.
|
112
108
|
# @param [DNN::Optimizers::Optimizer] optimizer Optimizer to use for learning.
|
113
|
-
# @param [DNN::Losses::Loss]
|
114
|
-
def compile(optimizer,
|
109
|
+
# @param [DNN::Losses::Loss] loss_func Loss function to use for learning.
|
110
|
+
def compile(optimizer, loss_func)
|
115
111
|
raise DNN_Error.new("The model is already compiled.") if compiled?
|
116
112
|
unless optimizer.is_a?(Optimizers::Optimizer)
|
117
113
|
raise TypeError.new("optimizer:#{optimizer.class} is not an instance of DNN::Optimizers::Optimizer class.")
|
118
114
|
end
|
119
|
-
unless
|
120
|
-
raise TypeError.new("
|
115
|
+
unless loss_func.is_a?(Losses::Loss)
|
116
|
+
raise TypeError.new("loss_func:#{loss_func.class} is not an instance of DNN::Losses::Loss class.")
|
121
117
|
end
|
122
118
|
@compiled = true
|
123
119
|
layers_check
|
124
120
|
@optimizer = optimizer
|
125
|
-
@
|
121
|
+
@loss_func = loss_func
|
126
122
|
build
|
127
123
|
layers_shape_check
|
128
124
|
end
|
129
125
|
|
130
|
-
# Set optimizer and
|
126
|
+
# Set optimizer and loss_func to model and recompile. But does not build layers.
|
131
127
|
# @param [DNN::Optimizers::Optimizer] optimizer Optimizer to use for learning.
|
132
|
-
# @param [DNN::Losses::Loss]
|
133
|
-
def recompile(optimizer,
|
128
|
+
# @param [DNN::Losses::Loss] loss_func Loss function to use for learning.
|
129
|
+
def recompile(optimizer, loss_func)
|
134
130
|
unless optimizer.is_a?(Optimizers::Optimizer)
|
135
131
|
raise TypeError.new("optimizer:#{optimizer.class} is not an instance of DNN::Optimizers::Optimizer class.")
|
136
132
|
end
|
137
|
-
unless
|
138
|
-
raise TypeError.new("
|
133
|
+
unless loss_func.is_a?(Losses::Loss)
|
134
|
+
raise TypeError.new("loss_func:#{loss_func.class} is not an instance of DNN::Losses::Loss class.")
|
139
135
|
end
|
140
136
|
@compiled = true
|
141
137
|
layers_check
|
142
138
|
@optimizer = optimizer
|
143
|
-
@
|
139
|
+
@loss_func = loss_func
|
144
140
|
layers_shape_check
|
145
141
|
end
|
146
142
|
|
147
143
|
def build(super_model = nil)
|
148
144
|
@super_model = super_model
|
149
145
|
shape = if super_model
|
150
|
-
super_model.output_shape
|
146
|
+
super_model.get_prev_layer(self).output_shape
|
151
147
|
else
|
152
148
|
@layers.first.build
|
153
149
|
end
|
154
|
-
@layers[1..-1]
|
150
|
+
layers = super_model ? @layers : @layers[1..-1]
|
151
|
+
layers.each do |layer|
|
155
152
|
if layer.is_a?(Model)
|
156
153
|
layer.build(self)
|
154
|
+
layer.recompile(@optimizer, @loss_func)
|
157
155
|
else
|
158
156
|
layer.build(shape)
|
159
157
|
end
|
@@ -174,13 +172,13 @@ module DNN
|
|
174
172
|
# @return [DNN::Optimizers::Optimizer] optimizer Return the optimizer to use for learning.
|
175
173
|
def optimizer
|
176
174
|
raise DNN_Error.new("The model is not compiled.") unless compiled?
|
177
|
-
@optimizer
|
175
|
+
@optimizer
|
178
176
|
end
|
179
177
|
|
180
178
|
# @return [DNN::Losses::Loss] loss Return the loss to use for learning.
|
181
|
-
def
|
179
|
+
def loss_func
|
182
180
|
raise DNN_Error.new("The model is not compiled.") unless compiled?
|
183
|
-
@
|
181
|
+
@loss_func
|
184
182
|
end
|
185
183
|
|
186
184
|
# @return [Bool] Returns whether the model is learning.
|
@@ -195,25 +193,31 @@ module DNN
|
|
195
193
|
# @param [Integer] epochs Number of training.
|
196
194
|
# @param [Integer] batch_size Batch size used for one training.
|
197
195
|
# @param [Array or NilClass] test If you to test the model for every 1 epoch,
|
198
|
-
#
|
196
|
+
# specify [x_test, y_test]. Don't test to the model, specify nil.
|
199
197
|
# @param [Bool] verbose Set true to display the log. If false is set, the log is not displayed.
|
200
|
-
# @param [
|
201
|
-
# @
|
198
|
+
# @param [Lambda] before_epoch_cbk Process performed before one training.
|
199
|
+
# @param [Lambda] after_epoch_cbk Process performed after one training.
|
200
|
+
# @param [Lambda] before_batch_cbk Set the proc to be performed before batch processing.
|
201
|
+
# @param [Lambda] after_batch_cbk Set the proc to be performed after batch processing.
|
202
202
|
def train(x, y, epochs,
|
203
203
|
batch_size: 1,
|
204
204
|
test: nil,
|
205
205
|
verbose: true,
|
206
|
-
|
207
|
-
|
206
|
+
before_epoch_cbk: nil,
|
207
|
+
after_epoch_cbk: nil,
|
208
|
+
before_batch_cbk: nil,
|
209
|
+
after_batch_cbk: nil)
|
208
210
|
raise DNN_Error.new("The model is not compiled.") unless compiled?
|
209
211
|
check_xy_type(x, y)
|
210
212
|
dataset = Dataset.new(x, y)
|
211
213
|
num_train_datas = x.shape[0]
|
212
214
|
(1..epochs).each do |epoch|
|
215
|
+
before_epoch_cbk.call(epoch) if before_epoch_cbk
|
213
216
|
puts "【 epoch #{epoch}/#{epochs} 】" if verbose
|
214
217
|
(num_train_datas.to_f / batch_size).ceil.times do |index|
|
215
|
-
x_batch, y_batch = dataset.
|
216
|
-
loss_value = train_on_batch(x_batch, y_batch,
|
218
|
+
x_batch, y_batch = dataset.next_batch(batch_size)
|
219
|
+
loss_value = train_on_batch(x_batch, y_batch,
|
220
|
+
before_batch_cbk: before_batch_cbk, after_batch_cbk: after_batch_cbk)
|
217
221
|
if loss_value.is_a?(Numo::SFloat)
|
218
222
|
loss_value = loss_value.mean
|
219
223
|
elsif loss_value.nan?
|
@@ -236,11 +240,12 @@ module DNN
|
|
236
240
|
print log if verbose
|
237
241
|
end
|
238
242
|
if verbose && test
|
239
|
-
acc = accurate(test[0], test[1], batch_size,
|
240
|
-
|
243
|
+
acc, test_loss = accurate(test[0], test[1], batch_size,
|
244
|
+
before_batch_cbk: before_batch_cbk, after_batch_cbk: after_batch_cbk)
|
245
|
+
print " accurate: #{acc}, test loss: #{sprintf('%.8f', test_loss)}"
|
241
246
|
end
|
242
247
|
puts "" if verbose
|
243
|
-
|
248
|
+
after_epoch_cbk.call(epoch) if after_epoch_cbk
|
244
249
|
end
|
245
250
|
end
|
246
251
|
|
@@ -248,51 +253,58 @@ module DNN
|
|
248
253
|
# Compile the model before use this method.
|
249
254
|
# @param [Numo::SFloat] x Input training data.
|
250
255
|
# @param [Numo::SFloat] y Output training data.
|
256
|
+
# @param [Lambda] before_batch_cbk Set the proc to be performed before batch processing.
|
257
|
+
# @param [Lambda] after_batch_cbk Set the proc to be performed after batch processing.
|
251
258
|
# @return [Float | Numo::SFloat] Return loss value in the form of Float or Numo::SFloat.
|
252
|
-
|
253
|
-
def train_on_batch(x, y, &batch_proc)
|
259
|
+
def train_on_batch(x, y, before_batch_cbk: nil, after_batch_cbk: nil)
|
254
260
|
raise DNN_Error.new("The model is not compiled.") unless compiled?
|
255
261
|
check_xy_type(x, y)
|
256
262
|
input_data_shape_check(x, y)
|
257
|
-
x, y =
|
258
|
-
|
259
|
-
loss_value = @
|
260
|
-
|
261
|
-
backward(
|
262
|
-
@loss.regularizes_backward(get_all_layers)
|
263
|
+
x, y = before_batch_cbk.call(x, y, true) if before_batch_cbk
|
264
|
+
x = forward(x, true)
|
265
|
+
loss_value = @loss_func.forward(x, y, get_all_layers)
|
266
|
+
dy = @loss_func.backward(y, get_all_layers)
|
267
|
+
backward(dy)
|
263
268
|
update
|
269
|
+
after_batch_cbk.call(loss_value, true) if after_batch_cbk
|
264
270
|
loss_value
|
265
271
|
end
|
266
272
|
|
267
273
|
# Evaluate model and get accurate of test data.
|
268
274
|
# @param [Numo::SFloat] x Input test data.
|
269
275
|
# @param [Numo::SFloat] y Output test data.
|
270
|
-
# @
|
271
|
-
|
276
|
+
# @param [Lambda] before_batch_cbk Set the proc to be performed before batch processing.
|
277
|
+
# @param [Lambda] after_batch_cbk Set the proc to be performed after batch processing.
|
278
|
+
# @return [Array] Returns the test data accurate and mean loss in the form [accurate, mean_loss].
|
279
|
+
def accurate(x, y, batch_size = 100, before_batch_cbk: nil, after_batch_cbk: nil)
|
272
280
|
check_xy_type(x, y)
|
273
281
|
input_data_shape_check(x, y)
|
274
282
|
batch_size = batch_size >= x.shape[0] ? x.shape[0] : batch_size
|
283
|
+
dataset = Dataset.new(x, y, false)
|
275
284
|
correct = 0
|
285
|
+
sum_loss = 0
|
276
286
|
(x.shape[0].to_f / batch_size).ceil.times do |i|
|
277
|
-
x_batch =
|
278
|
-
y_batch =
|
279
|
-
|
280
|
-
|
281
|
-
break if k >= x.shape[0]
|
282
|
-
x_batch[j, false] = x[k, false]
|
283
|
-
y_batch[j, false] = y[k, false]
|
284
|
-
end
|
285
|
-
x_batch, y_batch = batch_proc.call(x_batch, y_batch) if batch_proc
|
286
|
-
out = forward(x_batch, false)
|
287
|
+
x_batch, y_batch = dataset.next_batch(batch_size)
|
288
|
+
x_batch, y_batch = before_batch_cbk.call(x_batch, y_batch, true) if before_batch_cbk
|
289
|
+
x_batch = forward(x_batch, false)
|
290
|
+
sigmoid = Sigmoid.new
|
287
291
|
batch_size.times do |j|
|
288
292
|
if @layers.last.output_shape == [1]
|
289
|
-
|
293
|
+
if @loss_func.is_a?(SigmoidCrossEntropy)
|
294
|
+
correct += 1 if sigmoid.forward(x_batch[j, 0]).round == y_batch[j, 0].round
|
295
|
+
else
|
296
|
+
correct += 1 if x_batch[j, 0].round == y_batch[j, 0].round
|
297
|
+
end
|
290
298
|
else
|
291
|
-
correct += 1 if
|
299
|
+
correct += 1 if x_batch[j, true].max_index == y_batch[j, true].max_index
|
292
300
|
end
|
293
301
|
end
|
302
|
+
loss_value = @loss_func.forward(x_batch, y_batch, get_all_layers)
|
303
|
+
after_batch_cbk.call(loss_value, false) if after_batch_cbk
|
304
|
+
sum_loss += loss_value.is_a?(Numo::SFloat) ? loss_value.mean : loss_value
|
294
305
|
end
|
295
|
-
|
306
|
+
mean_loss = sum_loss / batch_size
|
307
|
+
[correct.to_f / x.shape[0], mean_loss]
|
296
308
|
end
|
297
309
|
|
298
310
|
# Predict data.
|
@@ -310,6 +322,17 @@ module DNN
|
|
310
322
|
predict(x.reshape(1, *x.shape))[0, false]
|
311
323
|
end
|
312
324
|
|
325
|
+
# Get loss value.
|
326
|
+
# @param [Numo::SFloat] x Input data.
|
327
|
+
# @param [Numo::SFloat] y Output data.
|
328
|
+
# @return [Float | Numo::SFloat] Return loss value in the form of Float or Numo::SFloat.
|
329
|
+
def loss(x, y)
|
330
|
+
check_xy_type(x, y)
|
331
|
+
input_data_shape_check(x, y)
|
332
|
+
x = forward(x, false)
|
333
|
+
@loss_func.forward(x, y, get_all_layers)
|
334
|
+
end
|
335
|
+
|
313
336
|
# @return [DNN::Model] Copy this model.
|
314
337
|
def copy
|
315
338
|
Marshal.load(Marshal.dump(self))
|
@@ -334,35 +357,35 @@ module DNN
|
|
334
357
|
}.flatten
|
335
358
|
end
|
336
359
|
|
337
|
-
# TODO
|
338
|
-
# It is not good to write the Layer class name directly in the Model class. I will fix it later.
|
339
360
|
def forward(x, learning_phase)
|
340
361
|
@layers.each do |layer|
|
341
|
-
x = if layer.is_a?(
|
362
|
+
x = if layer.is_a?(Model)
|
342
363
|
layer.forward(x, learning_phase)
|
343
364
|
else
|
365
|
+
layer.learning_phase = learning_phase
|
344
366
|
layer.forward(x)
|
345
367
|
end
|
346
368
|
end
|
347
369
|
x
|
348
370
|
end
|
349
371
|
|
350
|
-
def backward(
|
372
|
+
def backward(dy)
|
351
373
|
@layers.reverse.each do |layer|
|
352
|
-
|
374
|
+
dy = layer.backward(dy)
|
353
375
|
end
|
354
|
-
|
376
|
+
dy
|
355
377
|
end
|
356
378
|
|
357
379
|
def update
|
358
380
|
return unless @trainable
|
359
|
-
@layers.
|
360
|
-
if layer.is_a?(
|
361
|
-
layer.
|
362
|
-
|
363
|
-
layer
|
381
|
+
all_trainable_layers = @layers.map { |layer|
|
382
|
+
if layer.is_a?(Model)
|
383
|
+
layer.trainable ? layer.get_all_layers : nil
|
384
|
+
else
|
385
|
+
layer
|
364
386
|
end
|
365
|
-
|
387
|
+
}.flatten.compact.uniq
|
388
|
+
@optimizer.update(all_trainable_layers)
|
366
389
|
end
|
367
390
|
|
368
391
|
def get_prev_layer(layer)
|
@@ -385,13 +408,13 @@ module DNN
|
|
385
408
|
|
386
409
|
def to_hash
|
387
410
|
hash_layers = @layers.map { |layer| layer.to_hash }
|
388
|
-
{class: Model.name, layers: hash_layers, optimizer: @optimizer.to_hash, loss: @
|
411
|
+
{class: Model.name, layers: hash_layers, optimizer: @optimizer.to_hash, loss: @loss_func.to_hash}
|
389
412
|
end
|
390
413
|
|
391
414
|
private
|
392
415
|
|
393
416
|
def layers_check
|
394
|
-
|
417
|
+
if !@layers.first.is_a?(Layers::InputLayer) && !@super_model
|
395
418
|
raise TypeError.new("The first layer is not an InputLayer.")
|
396
419
|
end
|
397
420
|
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module DNN
|
2
|
+
module Layers
|
3
|
+
|
4
|
+
class BatchNormalization < HasParamLayer
|
5
|
+
# @return [Integer] The axis to normalization.
|
6
|
+
attr_reader :axis
|
7
|
+
# @return [Float] Exponential moving average of mean and variance.
|
8
|
+
attr_accessor :momentum
|
9
|
+
# @return [Float] Value to avoid division by zero.
|
10
|
+
attr_accessor :eps
|
11
|
+
|
12
|
+
def self.from_hash(hash)
|
13
|
+
self.new(axis: hash[:axis], momentum: hash[:momentum])
|
14
|
+
end
|
15
|
+
|
16
|
+
# @param [integer] axis The axis to normalization.
|
17
|
+
# @param [Float] momentum Exponential moving average of mean and variance.
|
18
|
+
# @param [Float] eps Value to avoid division by zero.
|
19
|
+
def initialize(axis: 0, momentum: 0.9, eps: 1e-7)
|
20
|
+
super()
|
21
|
+
@axis = axis
|
22
|
+
@momentum = momentum
|
23
|
+
@eps = eps
|
24
|
+
end
|
25
|
+
|
26
|
+
def build(input_shape)
|
27
|
+
super
|
28
|
+
@params[:gamma] = @gamma = Param.new(Xumo::SFloat.ones(*output_shape), 0)
|
29
|
+
@params[:beta] = @beta = Param.new(Xumo::SFloat.zeros(*output_shape), 0)
|
30
|
+
@params[:running_mean] = @running_mean = Param.new(Xumo::SFloat.zeros(*output_shape))
|
31
|
+
@params[:running_var] = @running_var = Param.new(Xumo::SFloat.zeros(*output_shape))
|
32
|
+
end
|
33
|
+
|
34
|
+
def forward(x)
|
35
|
+
if learning_phase
|
36
|
+
mean = x.mean(axis: @axis, keepdims: true)
|
37
|
+
@xc = x - mean
|
38
|
+
var = (@xc**2).mean(axis: @axis, keepdims: true)
|
39
|
+
@std = NMath.sqrt(var + @eps)
|
40
|
+
xn = @xc / @std
|
41
|
+
@xn = xn
|
42
|
+
@running_mean.data = @momentum * @running_mean.data + (1 - @momentum) * mean
|
43
|
+
@running_var.data = @momentum * @running_var.data + (1 - @momentum) * var
|
44
|
+
else
|
45
|
+
xc = x - @running_mean.data
|
46
|
+
xn = xc / NMath.sqrt(@running_var.data + @eps)
|
47
|
+
end
|
48
|
+
@gamma.data * xn + @beta.data
|
49
|
+
end
|
50
|
+
|
51
|
+
def backward(dy)
|
52
|
+
batch_size = dy.shape[@axis]
|
53
|
+
if @trainable
|
54
|
+
@beta.grad = dy.sum(axis: @axis, keepdims: true)
|
55
|
+
@gamma.grad = (@xn * dy).sum(axis: @axis, keepdims: true)
|
56
|
+
end
|
57
|
+
dxn = @gamma.data * dy
|
58
|
+
dxc = dxn / @std
|
59
|
+
dstd = -((dxn * @xc) / (@std**2)).sum(axis: @axis, keepdims: true)
|
60
|
+
dvar = 0.5 * dstd / @std
|
61
|
+
dxc += (2.0 / batch_size) * @xc * dvar
|
62
|
+
dmean = dxc.sum(axis: @axis, keepdims: true)
|
63
|
+
dxc - dmean / batch_size
|
64
|
+
end
|
65
|
+
|
66
|
+
def to_hash
|
67
|
+
super({axis: @axis, momentum: @momentum, eps: @eps})
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
data/lib/dnn/core/optimizers.rb
CHANGED
@@ -3,16 +3,22 @@ module DNN
|
|
3
3
|
|
4
4
|
# Super class of all optimizer classes.
|
5
5
|
class Optimizer
|
6
|
+
# @return [Float] Return the Learning rate.
|
6
7
|
attr_accessor :learning_rate
|
7
8
|
|
8
9
|
def initialize(learning_rate)
|
9
10
|
@learning_rate = learning_rate
|
10
11
|
end
|
11
12
|
|
12
|
-
# Update
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
# Update layers has param.
|
14
|
+
def update(layers)
|
15
|
+
target_params = layers.select { |layer| layer.is_a?(HasParamLayer) && layer.trainable }
|
16
|
+
.map { |layer| layer.params.values }.flatten
|
17
|
+
.select { |param| param.grad }
|
18
|
+
target_params.each do |param|
|
19
|
+
update_param(param)
|
20
|
+
param.grad = 0
|
21
|
+
end
|
16
22
|
end
|
17
23
|
|
18
24
|
def to_hash(merge_hash = nil)
|
@@ -20,169 +26,256 @@ module DNN
|
|
20
26
|
hash.merge!(merge_hash) if merge_hash
|
21
27
|
hash
|
22
28
|
end
|
29
|
+
|
30
|
+
# Update param.
|
31
|
+
# Classes that inherit from this class must implement this method.
|
32
|
+
private def update_param(param)
|
33
|
+
raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'update_param'")
|
34
|
+
end
|
23
35
|
end
|
24
36
|
|
25
37
|
|
26
38
|
class SGD < Optimizer
|
39
|
+
# @return [Float] Return the momentum coefficient.
|
27
40
|
attr_accessor :momentum
|
28
41
|
|
29
|
-
def self.
|
42
|
+
def self.from_hash(hash)
|
30
43
|
self.new(hash[:learning_rate], momentum: hash[:momentum])
|
31
44
|
end
|
32
45
|
|
46
|
+
# @param [Float] learning_rate Learning rate.
|
47
|
+
# @param [Float] momentum momentum coefficient.
|
33
48
|
def initialize(learning_rate = 0.01, momentum: 0)
|
34
49
|
super(learning_rate)
|
35
50
|
@momentum = momentum
|
36
51
|
@v = {}
|
37
52
|
end
|
38
|
-
|
39
|
-
def update(params)
|
40
|
-
params.select { |key, param| param.grad }.each_value do |param|
|
41
|
-
amount = param.grad * @learning_rate
|
42
|
-
if @momentum > 0
|
43
|
-
@v[param] ||= 0
|
44
|
-
amount += @momentum * @v[param]
|
45
|
-
@v[param] = amount
|
46
|
-
end
|
47
|
-
param.data -= amount
|
48
|
-
end
|
49
|
-
end
|
50
53
|
|
51
54
|
def to_hash
|
52
|
-
super(
|
55
|
+
super(momentum: @momentum)
|
56
|
+
end
|
57
|
+
|
58
|
+
private def update_param(param)
|
59
|
+
amount = param.grad * @learning_rate
|
60
|
+
if @momentum > 0
|
61
|
+
@v[param] ||= 0
|
62
|
+
amount += @momentum * @v[param]
|
63
|
+
@v[param] = amount
|
64
|
+
end
|
65
|
+
param.data -= amount
|
53
66
|
end
|
54
67
|
end
|
55
68
|
|
56
69
|
|
57
|
-
class Nesterov <
|
58
|
-
|
70
|
+
class Nesterov < Optimizer
|
71
|
+
attr_accessor :momentum
|
72
|
+
|
73
|
+
def self.from_hash(hash)
|
59
74
|
self.new(hash[:learning_rate], momentum: hash[:momentum])
|
60
75
|
end
|
61
76
|
|
77
|
+
# @param [Float] learning_rate Learning rate.
|
78
|
+
# @param [Float] momentum momentum coefficient.
|
62
79
|
def initialize(learning_rate = 0.01, momentum: 0.9)
|
63
|
-
super(learning_rate
|
80
|
+
super(learning_rate)
|
81
|
+
@momentum = momentum
|
82
|
+
@v = {}
|
83
|
+
end
|
84
|
+
|
85
|
+
def to_hash
|
86
|
+
super(momentum: @momentum)
|
64
87
|
end
|
65
88
|
|
66
|
-
def
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
param.data = (param.data + @momentum**2 * @v[param]) - (1 + @momentum) * amount
|
72
|
-
end
|
89
|
+
private def update_param(param)
|
90
|
+
@v[param] ||= 0
|
91
|
+
amount = param.grad * @learning_rate
|
92
|
+
@v[param] = @v[param] * @momentum - amount
|
93
|
+
param.data = (param.data + @momentum**2 * @v[param]) - (1 + @momentum) * amount
|
73
94
|
end
|
74
95
|
end
|
75
96
|
|
76
97
|
|
77
98
|
class AdaGrad < Optimizer
|
78
|
-
|
99
|
+
# @return [Float] Return the eps value.
|
100
|
+
attr_accessor :eps
|
101
|
+
|
102
|
+
# @param [Float] learning_rate Learning rate.
|
103
|
+
# @param [Float] eps Value to avoid division by zero.
|
104
|
+
def initialize(learning_rate = 0.01, eps: 1e-7)
|
79
105
|
super(learning_rate)
|
106
|
+
@eps = eps
|
80
107
|
@g = {}
|
81
108
|
end
|
82
109
|
|
83
|
-
def self.
|
84
|
-
self.new(hash[:learning_rate])
|
110
|
+
def self.from_hash(hash)
|
111
|
+
self.new(hash[:learning_rate], eps: hash[:eps])
|
85
112
|
end
|
86
113
|
|
87
|
-
def
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
114
|
+
private def update_param(param)
|
115
|
+
@g[param] ||= 0
|
116
|
+
@g[param] += param.grad**2
|
117
|
+
param.data -= (@learning_rate / NMath.sqrt(@g[param] + @eps)) * param.grad
|
118
|
+
end
|
119
|
+
|
120
|
+
def to_hash
|
121
|
+
super(eps: @eps)
|
93
122
|
end
|
94
123
|
end
|
95
124
|
|
96
|
-
|
125
|
+
|
97
126
|
class RMSProp < Optimizer
|
127
|
+
# @return [Float] Return the alpha value.
|
98
128
|
attr_accessor :alpha
|
129
|
+
# @return [Float] Return the eps value.
|
130
|
+
attr_accessor :eps
|
99
131
|
|
100
|
-
def self.
|
101
|
-
self.new(hash[:learning_rate], alpha: hash[:alpha])
|
132
|
+
def self.from_hash(hash)
|
133
|
+
self.new(hash[:learning_rate], alpha: hash[:alpha], eps: hash[:eps])
|
102
134
|
end
|
103
|
-
|
104
|
-
|
135
|
+
|
136
|
+
# @param [Float] learning_rate Learning rate.
|
137
|
+
# @param [Float] alpha Moving average index of past slopes.
|
138
|
+
# @param [Float] eps Value to avoid division by zero.
|
139
|
+
def initialize(learning_rate = 0.001, alpha: 0.9, eps: 1e-7)
|
105
140
|
super(learning_rate)
|
106
141
|
@alpha = alpha
|
142
|
+
@eps = eps
|
107
143
|
@g = {}
|
108
144
|
end
|
109
|
-
|
110
|
-
def update(params)
|
111
|
-
params.select { |key, param| param.grad }.each_value do |param|
|
112
|
-
@g[param] ||= 0
|
113
|
-
@g[param] = @alpha * @g[param] + (1 - @alpha) * param.grad**2
|
114
|
-
param.data -= (@learning_rate / NMath.sqrt(@g[param] + 1e-7)) * param.grad
|
115
|
-
end
|
116
|
-
end
|
117
145
|
|
118
146
|
def to_hash
|
119
|
-
super(
|
147
|
+
super(alpha: @alpha, eps: @eps)
|
148
|
+
end
|
149
|
+
|
150
|
+
private def update_param(param)
|
151
|
+
@g[param] ||= 0
|
152
|
+
@g[param] = @alpha * @g[param] + (1 - @alpha) * param.grad**2
|
153
|
+
param.data -= (@learning_rate / NMath.sqrt(@g[param] + @eps)) * param.grad
|
120
154
|
end
|
121
155
|
end
|
122
156
|
|
123
157
|
|
124
158
|
class AdaDelta < Optimizer
|
159
|
+
# @return [Float] Return the rho value.
|
125
160
|
attr_accessor :rho
|
161
|
+
# @return [Float] Return the eps value.
|
162
|
+
attr_accessor :eps
|
126
163
|
|
127
|
-
def self.
|
128
|
-
self.new(rho: hash[:rho])
|
164
|
+
def self.from_hash(hash)
|
165
|
+
self.new(rho: hash[:rho], eps: hash[:eps])
|
129
166
|
end
|
130
167
|
|
131
|
-
|
168
|
+
# @param [Float] rho Moving average index of past slopes.
|
169
|
+
# @param [Float] eps Value to avoid division by zero.
|
170
|
+
def initialize(rho: 0.95, eps: 1e-6)
|
132
171
|
super(nil)
|
133
172
|
@rho = rho
|
173
|
+
@eps = eps
|
134
174
|
@h = {}
|
135
175
|
@s = {}
|
136
176
|
end
|
137
177
|
|
138
|
-
def
|
139
|
-
|
140
|
-
@h[param] ||= Xumo::SFloat.zeros(*param.data.shape)
|
141
|
-
@s[param] ||= Xumo::SFloat.zeros(*param.data.shape)
|
142
|
-
@h[param] = @rho * @h[param] + (1 - @rho) * param.grad**2
|
143
|
-
v = (NMath.sqrt(@s[param] + 1e-6) / NMath.sqrt(@h[param] + 1e-6)) * param.grad
|
144
|
-
@s[param] = @rho * @s[param] + (1 - @rho) * v**2
|
145
|
-
param.data -= v
|
146
|
-
end
|
178
|
+
def to_hash
|
179
|
+
super(rho: @rho, eps: @eps)
|
147
180
|
end
|
148
181
|
|
149
|
-
def
|
150
|
-
|
182
|
+
private def update_param(param)
|
183
|
+
@h[param] ||= Xumo::SFloat.zeros(*param.data.shape)
|
184
|
+
@s[param] ||= Xumo::SFloat.zeros(*param.data.shape)
|
185
|
+
@h[param] = @rho * @h[param] + (1 - @rho) * param.grad**2
|
186
|
+
v = (NMath.sqrt(@s[param] + @eps) / NMath.sqrt(@h[param] + @eps)) * param.grad
|
187
|
+
@s[param] = @rho * @s[param] + (1 - @rho) * v**2
|
188
|
+
param.data -= v
|
151
189
|
end
|
152
190
|
end
|
153
191
|
|
154
192
|
|
155
193
|
class Adam < Optimizer
|
194
|
+
# @return [Float] Return the alpha value.
|
195
|
+
attr_accessor :alpha
|
196
|
+
# @return [Float] Return the beta1 value.
|
156
197
|
attr_accessor :beta1
|
198
|
+
# @return [Float] Return the beta2 value.
|
157
199
|
attr_accessor :beta2
|
200
|
+
# @return [Float] Return the eps value.
|
201
|
+
attr_accessor :eps
|
158
202
|
|
159
|
-
def self.
|
160
|
-
self.new(hash[:
|
203
|
+
def self.from_hash(hash)
|
204
|
+
self.new(alpha: hash[:alpha], beta1: hash[:beta1], beta2: hash[:beta2], eps: hash[:eps])
|
161
205
|
end
|
162
206
|
|
163
|
-
|
164
|
-
|
207
|
+
# @param [Float] alpha Value used to calculate learning rate.
|
208
|
+
# @param [Float] beta1 Moving average index of beta1.
|
209
|
+
# @param [Float] beta2 Moving average index of beta2.
|
210
|
+
# @param [Float] eps Value to avoid division by zero.
|
211
|
+
def initialize(alpha: 0.001, beta1: 0.9, beta2: 0.999, eps: 1e-7)
|
212
|
+
super(nil)
|
213
|
+
@alpha = alpha
|
165
214
|
@beta1 = beta1
|
166
215
|
@beta2 = beta2
|
216
|
+
@eps = eps
|
167
217
|
@iter = 0
|
168
218
|
@m = {}
|
169
219
|
@v = {}
|
170
220
|
end
|
171
221
|
|
172
|
-
def update(
|
222
|
+
def update(layers)
|
173
223
|
@iter += 1
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
param.
|
224
|
+
learning_rate = @alpha * Math.sqrt(1 - @beta2**@iter) / (1 - @beta1**@iter)
|
225
|
+
target_params = layers.select { |layer| layer.is_a?(HasParamLayer) && layer.trainable }
|
226
|
+
.map { |layer| layer.params.values }.flatten
|
227
|
+
.select { |param| param.grad }
|
228
|
+
target_params.each do |param|
|
229
|
+
update_param(param, learning_rate)
|
230
|
+
param.grad = 0
|
181
231
|
end
|
182
232
|
end
|
183
233
|
|
184
234
|
def to_hash
|
185
|
-
super(
|
235
|
+
super(alpha: @alpha, beta1: @beta1, beta2: @beta2, eps: @eps)
|
236
|
+
end
|
237
|
+
|
238
|
+
private def update_param(param, learning_rate)
|
239
|
+
@m[param] ||= 0
|
240
|
+
@v[param] ||= 0
|
241
|
+
@m[param] += (1 - @beta1) * (param.grad - @m[param])
|
242
|
+
@v[param] += (1 - @beta2) * (param.grad**2 - @v[param])
|
243
|
+
param.data -= learning_rate * @m[param] / NMath.sqrt(@v[param] + @eps)
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
|
248
|
+
class RMSPropGraves < Optimizer
|
249
|
+
# @return [Float] Return the alpha value.
|
250
|
+
attr_accessor :alpha
|
251
|
+
# @return [Float] Return the eps value.
|
252
|
+
attr_accessor :eps
|
253
|
+
|
254
|
+
def self.from_hash(hash)
|
255
|
+
self.new(hash[:learning_rate], alpha: hash[:alpha], eps: hash[:eps])
|
256
|
+
end
|
257
|
+
|
258
|
+
# @param [Float] learning_rate Learning rate.
|
259
|
+
# @param [Float] alpha Moving average index of past slopes.
|
260
|
+
# @param [Float] eps Value to avoid division by zero.
|
261
|
+
def initialize(learning_rate = 0.0001, alpha: 0.95, eps: 0.0001)
|
262
|
+
super(learning_rate)
|
263
|
+
@alpha = alpha
|
264
|
+
@eps = eps
|
265
|
+
@m = {}
|
266
|
+
@v = {}
|
267
|
+
end
|
268
|
+
|
269
|
+
def to_hash
|
270
|
+
super(alpha: @alpha, eps: @eps)
|
271
|
+
end
|
272
|
+
|
273
|
+
private def update_param(param)
|
274
|
+
@m[param] ||= 0
|
275
|
+
@v[param] ||= 0
|
276
|
+
@m[param] = @alpha * @m[param] + (1 - @alpha) * param.grad
|
277
|
+
@v[param] = @alpha * @v[param] + (1 - @alpha) * param.grad**2
|
278
|
+
param.data -= (@learning_rate / NMath.sqrt(@v[param] - @m[param]**2 + @eps)) * param.grad
|
186
279
|
end
|
187
280
|
end
|
188
281
|
|