ruby-dnn 0.9.4 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,14 +22,14 @@ module DNN
22
22
  # @return [DNN::Model]
23
23
  def self.load_json(json_str)
24
24
  hash = JSON.parse(json_str, symbolize_names: true)
25
- model = self.load_hash(hash)
26
- model.compile(Utils.load_hash(hash[:optimizer]), Utils.load_hash(hash[:loss]))
25
+ model = self.from_hash(hash)
26
+ model.compile(Utils.from_hash(hash[:optimizer]), Utils.from_hash(hash[:loss]))
27
27
  model
28
28
  end
29
29
 
30
- def self.load_hash(hash)
30
+ def self.from_hash(hash)
31
31
  model = self.new
32
- model.layers = hash[:layers].map { |hash_layer| Utils.load_hash(hash_layer) }
32
+ model.layers = hash[:layers].map { |hash_layer| Utils.from_hash(hash_layer) }
33
33
  model
34
34
  end
35
35
 
@@ -97,63 +97,61 @@ module DNN
97
97
  # @param [DNN::Layers::Layer] layer Layer to add to the model.
98
98
  # @return [DNN::Model] return self.
99
99
  def <<(layer)
100
- # Due to a bug in saving nested models, temporarily prohibit model nesting.
101
- # if !layer.is_a?(Layers::Layer) && !layer.is_a?(Model)
102
- # raise TypeError.new("layer is not an instance of the DNN::Layers::Layer class or DNN::Model class.")
103
- # end
104
- unless layer.is_a?(Layers::Layer)
105
- raise TypeError.new("layer:#{layer.class.name} is not an instance of the DNN::Layers::Layer class.")
100
+ if !layer.is_a?(Layers::Layer) && !layer.is_a?(Model)
101
+ raise TypeError.new("layer is not an instance of the DNN::Layers::Layer class or DNN::Model class.")
106
102
  end
107
103
  @layers << layer
108
104
  self
109
105
  end
110
106
 
111
- # Set optimizer and loss to model and build all layers.
107
+ # Set optimizer and loss_func to model and build all layers.
112
108
  # @param [DNN::Optimizers::Optimizer] optimizer Optimizer to use for learning.
113
- # @param [DNN::Losses::Loss] loss Lptimizer to use for learning.
114
- def compile(optimizer, loss)
109
+ # @param [DNN::Losses::Loss] loss_func Loss function to use for learning.
110
+ def compile(optimizer, loss_func)
115
111
  raise DNN_Error.new("The model is already compiled.") if compiled?
116
112
  unless optimizer.is_a?(Optimizers::Optimizer)
117
113
  raise TypeError.new("optimizer:#{optimizer.class} is not an instance of DNN::Optimizers::Optimizer class.")
118
114
  end
119
- unless loss.is_a?(Losses::Loss)
120
- raise TypeError.new("loss:#{loss.class} is not an instance of DNN::Losses::Loss class.")
115
+ unless loss_func.is_a?(Losses::Loss)
116
+ raise TypeError.new("loss_func:#{loss_func.class} is not an instance of DNN::Losses::Loss class.")
121
117
  end
122
118
  @compiled = true
123
119
  layers_check
124
120
  @optimizer = optimizer
125
- @loss = loss
121
+ @loss_func = loss_func
126
122
  build
127
123
  layers_shape_check
128
124
  end
129
125
 
130
- # Set optimizer and loss to model and recompile. But does not build layers.
126
+ # Set optimizer and loss_func to model and recompile. But does not build layers.
131
127
  # @param [DNN::Optimizers::Optimizer] optimizer Optimizer to use for learning.
132
- # @param [DNN::Losses::Loss] loss Lptimizer to use for learning.
133
- def recompile(optimizer, loss)
128
+ # @param [DNN::Losses::Loss] loss_func Loss function to use for learning.
129
+ def recompile(optimizer, loss_func)
134
130
  unless optimizer.is_a?(Optimizers::Optimizer)
135
131
  raise TypeError.new("optimizer:#{optimizer.class} is not an instance of DNN::Optimizers::Optimizer class.")
136
132
  end
137
- unless loss.is_a?(Losses::Loss)
138
- raise TypeError.new("loss:#{loss.class} is not an instance of DNN::Losses::Loss class.")
133
+ unless loss_func.is_a?(Losses::Loss)
134
+ raise TypeError.new("loss_func:#{loss_func.class} is not an instance of DNN::Losses::Loss class.")
139
135
  end
140
136
  @compiled = true
141
137
  layers_check
142
138
  @optimizer = optimizer
143
- @loss = loss
139
+ @loss_func = loss_func
144
140
  layers_shape_check
145
141
  end
146
142
 
147
143
  def build(super_model = nil)
148
144
  @super_model = super_model
149
145
  shape = if super_model
150
- super_model.output_shape
146
+ super_model.get_prev_layer(self).output_shape
151
147
  else
152
148
  @layers.first.build
153
149
  end
154
- @layers[1..-1].each do |layer|
150
+ layers = super_model ? @layers : @layers[1..-1]
151
+ layers.each do |layer|
155
152
  if layer.is_a?(Model)
156
153
  layer.build(self)
154
+ layer.recompile(@optimizer, @loss_func)
157
155
  else
158
156
  layer.build(shape)
159
157
  end
@@ -174,13 +172,13 @@ module DNN
174
172
  # @return [DNN::Optimizers::Optimizer] optimizer Return the optimizer to use for learning.
175
173
  def optimizer
176
174
  raise DNN_Error.new("The model is not compiled.") unless compiled?
177
- @optimizer ? @optimizer : @super_model.optimizer
175
+ @optimizer
178
176
  end
179
177
 
180
178
  # @return [DNN::Losses::Loss] loss Return the loss to use for learning.
181
- def loss
179
+ def loss_func
182
180
  raise DNN_Error.new("The model is not compiled.") unless compiled?
183
- @loss ? @loss : @super_model.loss
181
+ @loss_func
184
182
  end
185
183
 
186
184
  # @return [Bool] Returns whether the model is learning.
@@ -195,25 +193,31 @@ module DNN
195
193
  # @param [Integer] epochs Number of training.
196
194
  # @param [Integer] batch_size Batch size used for one training.
197
195
  # @param [Array or NilClass] test If you to test the model for every 1 epoch,
198
- # specify [x_test, y_test]. Don't test to the model, specify nil.
196
+ # specify [x_test, y_test]. Don't test to the model, specify nil.
199
197
  # @param [Bool] verbose Set true to display the log. If false is set, the log is not displayed.
200
- # @param [Proc] batch_proc Set proc to process per batch.
201
- # @yield [epoch] Process performed before one training.
198
+ # @param [Lambda] before_epoch_cbk Process performed before one training.
199
+ # @param [Lambda] after_epoch_cbk Process performed after one training.
200
+ # @param [Lambda] before_batch_cbk Set the proc to be performed before batch processing.
201
+ # @param [Lambda] after_batch_cbk Set the proc to be performed after batch processing.
202
202
  def train(x, y, epochs,
203
203
  batch_size: 1,
204
204
  test: nil,
205
205
  verbose: true,
206
- batch_proc: nil,
207
- &epoch_proc)
206
+ before_epoch_cbk: nil,
207
+ after_epoch_cbk: nil,
208
+ before_batch_cbk: nil,
209
+ after_batch_cbk: nil)
208
210
  raise DNN_Error.new("The model is not compiled.") unless compiled?
209
211
  check_xy_type(x, y)
210
212
  dataset = Dataset.new(x, y)
211
213
  num_train_datas = x.shape[0]
212
214
  (1..epochs).each do |epoch|
215
+ before_epoch_cbk.call(epoch) if before_epoch_cbk
213
216
  puts "【 epoch #{epoch}/#{epochs} 】" if verbose
214
217
  (num_train_datas.to_f / batch_size).ceil.times do |index|
215
- x_batch, y_batch = dataset.get_batch(batch_size)
216
- loss_value = train_on_batch(x_batch, y_batch, &batch_proc)
218
+ x_batch, y_batch = dataset.next_batch(batch_size)
219
+ loss_value = train_on_batch(x_batch, y_batch,
220
+ before_batch_cbk: before_batch_cbk, after_batch_cbk: after_batch_cbk)
217
221
  if loss_value.is_a?(Numo::SFloat)
218
222
  loss_value = loss_value.mean
219
223
  elsif loss_value.nan?
@@ -236,11 +240,12 @@ module DNN
236
240
  print log if verbose
237
241
  end
238
242
  if verbose && test
239
- acc = accurate(test[0], test[1], batch_size, &batch_proc)
240
- print " accurate: #{acc}"
243
+ acc, test_loss = accurate(test[0], test[1], batch_size,
244
+ before_batch_cbk: before_batch_cbk, after_batch_cbk: after_batch_cbk)
245
+ print " accurate: #{acc}, test loss: #{sprintf('%.8f', test_loss)}"
241
246
  end
242
247
  puts "" if verbose
243
- epoch_proc.call(epoch) if epoch_proc
248
+ after_epoch_cbk.call(epoch) if after_epoch_cbk
244
249
  end
245
250
  end
246
251
 
@@ -248,51 +253,58 @@ module DNN
248
253
  # Compile the model before use this method.
249
254
  # @param [Numo::SFloat] x Input training data.
250
255
  # @param [Numo::SFloat] y Output training data.
256
+ # @param [Lambda] before_batch_cbk Set the proc to be performed before batch processing.
257
+ # @param [Lambda] after_batch_cbk Set the proc to be performed after batch processing.
251
258
  # @return [Float | Numo::SFloat] Return loss value in the form of Float or Numo::SFloat.
252
- # @yield [x, y] batch_proc Set proc to process per batch.
253
- def train_on_batch(x, y, &batch_proc)
259
+ def train_on_batch(x, y, before_batch_cbk: nil, after_batch_cbk: nil)
254
260
  raise DNN_Error.new("The model is not compiled.") unless compiled?
255
261
  check_xy_type(x, y)
256
262
  input_data_shape_check(x, y)
257
- x, y = batch_proc.call(x, y) if batch_proc
258
- out = forward(x, true)
259
- loss_value = @loss.forward(out, y, get_all_layers)
260
- dout = @loss.backward(y)
261
- backward(dout)
262
- @loss.regularizes_backward(get_all_layers)
263
+ x, y = before_batch_cbk.call(x, y, true) if before_batch_cbk
264
+ x = forward(x, true)
265
+ loss_value = @loss_func.forward(x, y, get_all_layers)
266
+ dy = @loss_func.backward(y, get_all_layers)
267
+ backward(dy)
263
268
  update
269
+ after_batch_cbk.call(loss_value, true) if after_batch_cbk
264
270
  loss_value
265
271
  end
266
272
 
267
273
  # Evaluate model and get accurate of test data.
268
274
  # @param [Numo::SFloat] x Input test data.
269
275
  # @param [Numo::SFloat] y Output test data.
270
- # @yield [x, y] batch_proc Set proc to process per batch.
271
- def accurate(x, y, batch_size = 100, &batch_proc)
276
+ # @param [Lambda] before_batch_cbk Set the proc to be performed before batch processing.
277
+ # @param [Lambda] after_batch_cbk Set the proc to be performed after batch processing.
278
+ # @return [Array] Returns the test data accurate and mean loss in the form [accurate, mean_loss].
279
+ def accurate(x, y, batch_size = 100, before_batch_cbk: nil, after_batch_cbk: nil)
272
280
  check_xy_type(x, y)
273
281
  input_data_shape_check(x, y)
274
282
  batch_size = batch_size >= x.shape[0] ? x.shape[0] : batch_size
283
+ dataset = Dataset.new(x, y, false)
275
284
  correct = 0
285
+ sum_loss = 0
276
286
  (x.shape[0].to_f / batch_size).ceil.times do |i|
277
- x_batch = Xumo::SFloat.zeros(batch_size, *x.shape[1..-1])
278
- y_batch = Xumo::SFloat.zeros(batch_size, *y.shape[1..-1])
279
- batch_size.times do |j|
280
- k = i * batch_size + j
281
- break if k >= x.shape[0]
282
- x_batch[j, false] = x[k, false]
283
- y_batch[j, false] = y[k, false]
284
- end
285
- x_batch, y_batch = batch_proc.call(x_batch, y_batch) if batch_proc
286
- out = forward(x_batch, false)
287
+ x_batch, y_batch = dataset.next_batch(batch_size)
288
+ x_batch, y_batch = before_batch_cbk.call(x_batch, y_batch, true) if before_batch_cbk
289
+ x_batch = forward(x_batch, false)
290
+ sigmoid = Sigmoid.new
287
291
  batch_size.times do |j|
288
292
  if @layers.last.output_shape == [1]
289
- correct += 1 if out[j, 0].round == y_batch[j, 0].round
293
+ if @loss_func.is_a?(SigmoidCrossEntropy)
294
+ correct += 1 if sigmoid.forward(x_batch[j, 0]).round == y_batch[j, 0].round
295
+ else
296
+ correct += 1 if x_batch[j, 0].round == y_batch[j, 0].round
297
+ end
290
298
  else
291
- correct += 1 if out[j, true].max_index == y_batch[j, true].max_index
299
+ correct += 1 if x_batch[j, true].max_index == y_batch[j, true].max_index
292
300
  end
293
301
  end
302
+ loss_value = @loss_func.forward(x_batch, y_batch, get_all_layers)
303
+ after_batch_cbk.call(loss_value, false) if after_batch_cbk
304
+ sum_loss += loss_value.is_a?(Numo::SFloat) ? loss_value.mean : loss_value
294
305
  end
295
- correct.to_f / x.shape[0]
306
+ mean_loss = sum_loss / batch_size
307
+ [correct.to_f / x.shape[0], mean_loss]
296
308
  end
297
309
 
298
310
  # Predict data.
@@ -310,6 +322,17 @@ module DNN
310
322
  predict(x.reshape(1, *x.shape))[0, false]
311
323
  end
312
324
 
325
+ # Get loss value.
326
+ # @param [Numo::SFloat] x Input data.
327
+ # @param [Numo::SFloat] y Output data.
328
+ # @return [Float | Numo::SFloat] Return loss value in the form of Float or Numo::SFloat.
329
+ def loss(x, y)
330
+ check_xy_type(x, y)
331
+ input_data_shape_check(x, y)
332
+ x = forward(x, false)
333
+ @loss_func.forward(x, y, get_all_layers)
334
+ end
335
+
313
336
  # @return [DNN::Model] Copy this model.
314
337
  def copy
315
338
  Marshal.load(Marshal.dump(self))
@@ -334,35 +357,35 @@ module DNN
334
357
  }.flatten
335
358
  end
336
359
 
337
- # TODO
338
- # It is not good to write the Layer class name directly in the Model class. I will fix it later.
339
360
  def forward(x, learning_phase)
340
361
  @layers.each do |layer|
341
- x = if layer.is_a?(Layers::Dropout) || layer.is_a?(Layers::BatchNormalization) || layer.is_a?(Model)
362
+ x = if layer.is_a?(Model)
342
363
  layer.forward(x, learning_phase)
343
364
  else
365
+ layer.learning_phase = learning_phase
344
366
  layer.forward(x)
345
367
  end
346
368
  end
347
369
  x
348
370
  end
349
371
 
350
- def backward(dout)
372
+ def backward(dy)
351
373
  @layers.reverse.each do |layer|
352
- dout = layer.backward(dout)
374
+ dy = layer.backward(dy)
353
375
  end
354
- dout
376
+ dy
355
377
  end
356
378
 
357
379
  def update
358
380
  return unless @trainable
359
- @layers.each do |layer|
360
- if layer.is_a?(Layers::HasParamLayer)
361
- layer.update(@optimizer)
362
- elsif layer.is_a?(Model)
363
- layer.update
381
+ all_trainable_layers = @layers.map { |layer|
382
+ if layer.is_a?(Model)
383
+ layer.trainable ? layer.get_all_layers : nil
384
+ else
385
+ layer
364
386
  end
365
- end
387
+ }.flatten.compact.uniq
388
+ @optimizer.update(all_trainable_layers)
366
389
  end
367
390
 
368
391
  def get_prev_layer(layer)
@@ -385,13 +408,13 @@ module DNN
385
408
 
386
409
  def to_hash
387
410
  hash_layers = @layers.map { |layer| layer.to_hash }
388
- {class: Model.name, layers: hash_layers, optimizer: @optimizer.to_hash, loss: @loss.to_hash}
411
+ {class: Model.name, layers: hash_layers, optimizer: @optimizer.to_hash, loss: @loss_func.to_hash}
389
412
  end
390
413
 
391
414
  private
392
415
 
393
416
  def layers_check
394
- unless @layers.first.is_a?(Layers::InputLayer)
417
+ if !@layers.first.is_a?(Layers::InputLayer) && !@super_model
395
418
  raise TypeError.new("The first layer is not an InputLayer.")
396
419
  end
397
420
  end
@@ -0,0 +1,72 @@
1
+ module DNN
2
+ module Layers
3
+
4
+ class BatchNormalization < HasParamLayer
5
+ # @return [Integer] The axis to normalization.
6
+ attr_reader :axis
7
+ # @return [Float] Exponential moving average of mean and variance.
8
+ attr_accessor :momentum
9
+ # @return [Float] Value to avoid division by zero.
10
+ attr_accessor :eps
11
+
12
+ def self.from_hash(hash)
13
+ self.new(axis: hash[:axis], momentum: hash[:momentum])
14
+ end
15
+
16
+ # @param [integer] axis The axis to normalization.
17
+ # @param [Float] momentum Exponential moving average of mean and variance.
18
+ # @param [Float] eps Value to avoid division by zero.
19
+ def initialize(axis: 0, momentum: 0.9, eps: 1e-7)
20
+ super()
21
+ @axis = axis
22
+ @momentum = momentum
23
+ @eps = eps
24
+ end
25
+
26
+ def build(input_shape)
27
+ super
28
+ @params[:gamma] = @gamma = Param.new(Xumo::SFloat.ones(*output_shape), 0)
29
+ @params[:beta] = @beta = Param.new(Xumo::SFloat.zeros(*output_shape), 0)
30
+ @params[:running_mean] = @running_mean = Param.new(Xumo::SFloat.zeros(*output_shape))
31
+ @params[:running_var] = @running_var = Param.new(Xumo::SFloat.zeros(*output_shape))
32
+ end
33
+
34
+ def forward(x)
35
+ if learning_phase
36
+ mean = x.mean(axis: @axis, keepdims: true)
37
+ @xc = x - mean
38
+ var = (@xc**2).mean(axis: @axis, keepdims: true)
39
+ @std = NMath.sqrt(var + @eps)
40
+ xn = @xc / @std
41
+ @xn = xn
42
+ @running_mean.data = @momentum * @running_mean.data + (1 - @momentum) * mean
43
+ @running_var.data = @momentum * @running_var.data + (1 - @momentum) * var
44
+ else
45
+ xc = x - @running_mean.data
46
+ xn = xc / NMath.sqrt(@running_var.data + @eps)
47
+ end
48
+ @gamma.data * xn + @beta.data
49
+ end
50
+
51
+ def backward(dy)
52
+ batch_size = dy.shape[@axis]
53
+ if @trainable
54
+ @beta.grad = dy.sum(axis: @axis, keepdims: true)
55
+ @gamma.grad = (@xn * dy).sum(axis: @axis, keepdims: true)
56
+ end
57
+ dxn = @gamma.data * dy
58
+ dxc = dxn / @std
59
+ dstd = -((dxn * @xc) / (@std**2)).sum(axis: @axis, keepdims: true)
60
+ dvar = 0.5 * dstd / @std
61
+ dxc += (2.0 / batch_size) * @xc * dvar
62
+ dmean = dxc.sum(axis: @axis, keepdims: true)
63
+ dxc - dmean / batch_size
64
+ end
65
+
66
+ def to_hash
67
+ super({axis: @axis, momentum: @momentum, eps: @eps})
68
+ end
69
+ end
70
+
71
+ end
72
+ end
@@ -3,16 +3,22 @@ module DNN
3
3
 
4
4
  # Super class of all optimizer classes.
5
5
  class Optimizer
6
+ # @return [Float] Return the Learning rate.
6
7
  attr_accessor :learning_rate
7
8
 
8
9
  def initialize(learning_rate)
9
10
  @learning_rate = learning_rate
10
11
  end
11
12
 
12
- # Update params.
13
- # Classes that inherit from this class must implement this method.
14
- def update(params)
15
- raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'update'")
13
+ # Update layers has param.
14
+ def update(layers)
15
+ target_params = layers.select { |layer| layer.is_a?(HasParamLayer) && layer.trainable }
16
+ .map { |layer| layer.params.values }.flatten
17
+ .select { |param| param.grad }
18
+ target_params.each do |param|
19
+ update_param(param)
20
+ param.grad = 0
21
+ end
16
22
  end
17
23
 
18
24
  def to_hash(merge_hash = nil)
@@ -20,169 +26,256 @@ module DNN
20
26
  hash.merge!(merge_hash) if merge_hash
21
27
  hash
22
28
  end
29
+
30
+ # Update param.
31
+ # Classes that inherit from this class must implement this method.
32
+ private def update_param(param)
33
+ raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'update_param'")
34
+ end
23
35
  end
24
36
 
25
37
 
26
38
  class SGD < Optimizer
39
+ # @return [Float] Return the momentum coefficient.
27
40
  attr_accessor :momentum
28
41
 
29
- def self.load_hash(hash)
42
+ def self.from_hash(hash)
30
43
  self.new(hash[:learning_rate], momentum: hash[:momentum])
31
44
  end
32
45
 
46
+ # @param [Float] learning_rate Learning rate.
47
+ # @param [Float] momentum momentum coefficient.
33
48
  def initialize(learning_rate = 0.01, momentum: 0)
34
49
  super(learning_rate)
35
50
  @momentum = momentum
36
51
  @v = {}
37
52
  end
38
-
39
- def update(params)
40
- params.select { |key, param| param.grad }.each_value do |param|
41
- amount = param.grad * @learning_rate
42
- if @momentum > 0
43
- @v[param] ||= 0
44
- amount += @momentum * @v[param]
45
- @v[param] = amount
46
- end
47
- param.data -= amount
48
- end
49
- end
50
53
 
51
54
  def to_hash
52
- super({momentum: @momentum})
55
+ super(momentum: @momentum)
56
+ end
57
+
58
+ private def update_param(param)
59
+ amount = param.grad * @learning_rate
60
+ if @momentum > 0
61
+ @v[param] ||= 0
62
+ amount += @momentum * @v[param]
63
+ @v[param] = amount
64
+ end
65
+ param.data -= amount
53
66
  end
54
67
  end
55
68
 
56
69
 
57
- class Nesterov < SGD
58
- def self.load_hash(hash)
70
+ class Nesterov < Optimizer
71
+ attr_accessor :momentum
72
+
73
+ def self.from_hash(hash)
59
74
  self.new(hash[:learning_rate], momentum: hash[:momentum])
60
75
  end
61
76
 
77
+ # @param [Float] learning_rate Learning rate.
78
+ # @param [Float] momentum momentum coefficient.
62
79
  def initialize(learning_rate = 0.01, momentum: 0.9)
63
- super(learning_rate, momentum: momentum)
80
+ super(learning_rate)
81
+ @momentum = momentum
82
+ @v = {}
83
+ end
84
+
85
+ def to_hash
86
+ super(momentum: @momentum)
64
87
  end
65
88
 
66
- def update(params)
67
- params.select { |key, param| param.grad }.each_value do |param|
68
- @v[param] ||= 0
69
- amount = param.grad * @learning_rate
70
- @v[param] = @v[param] * @momentum - amount
71
- param.data = (param.data + @momentum**2 * @v[param]) - (1 + @momentum) * amount
72
- end
89
+ private def update_param(param)
90
+ @v[param] ||= 0
91
+ amount = param.grad * @learning_rate
92
+ @v[param] = @v[param] * @momentum - amount
93
+ param.data = (param.data + @momentum**2 * @v[param]) - (1 + @momentum) * amount
73
94
  end
74
95
  end
75
96
 
76
97
 
77
98
  class AdaGrad < Optimizer
78
- def initialize(learning_rate = 0.01)
99
+ # @return [Float] Return the eps value.
100
+ attr_accessor :eps
101
+
102
+ # @param [Float] learning_rate Learning rate.
103
+ # @param [Float] eps Value to avoid division by zero.
104
+ def initialize(learning_rate = 0.01, eps: 1e-7)
79
105
  super(learning_rate)
106
+ @eps = eps
80
107
  @g = {}
81
108
  end
82
109
 
83
- def self.load_hash(hash)
84
- self.new(hash[:learning_rate])
110
+ def self.from_hash(hash)
111
+ self.new(hash[:learning_rate], eps: hash[:eps])
85
112
  end
86
113
 
87
- def update(params)
88
- params.select { |key, param| param.grad }.each_value do |param|
89
- @g[param] ||= 0
90
- @g[param] += param.grad**2
91
- param.data -= (@learning_rate / NMath.sqrt(@g[param] + 1e-7)) * param.grad
92
- end
114
+ private def update_param(param)
115
+ @g[param] ||= 0
116
+ @g[param] += param.grad**2
117
+ param.data -= (@learning_rate / NMath.sqrt(@g[param] + @eps)) * param.grad
118
+ end
119
+
120
+ def to_hash
121
+ super(eps: @eps)
93
122
  end
94
123
  end
95
124
 
96
-
125
+
97
126
  class RMSProp < Optimizer
127
+ # @return [Float] Return the alpha value.
98
128
  attr_accessor :alpha
129
+ # @return [Float] Return the eps value.
130
+ attr_accessor :eps
99
131
 
100
- def self.load_hash(hash)
101
- self.new(hash[:learning_rate], alpha: hash[:alpha])
132
+ def self.from_hash(hash)
133
+ self.new(hash[:learning_rate], alpha: hash[:alpha], eps: hash[:eps])
102
134
  end
103
-
104
- def initialize(learning_rate = 0.001, alpha: 0.9)
135
+
136
+ # @param [Float] learning_rate Learning rate.
137
+ # @param [Float] alpha Moving average index of past slopes.
138
+ # @param [Float] eps Value to avoid division by zero.
139
+ def initialize(learning_rate = 0.001, alpha: 0.9, eps: 1e-7)
105
140
  super(learning_rate)
106
141
  @alpha = alpha
142
+ @eps = eps
107
143
  @g = {}
108
144
  end
109
-
110
- def update(params)
111
- params.select { |key, param| param.grad }.each_value do |param|
112
- @g[param] ||= 0
113
- @g[param] = @alpha * @g[param] + (1 - @alpha) * param.grad**2
114
- param.data -= (@learning_rate / NMath.sqrt(@g[param] + 1e-7)) * param.grad
115
- end
116
- end
117
145
 
118
146
  def to_hash
119
- super({alpha: @alpha})
147
+ super(alpha: @alpha, eps: @eps)
148
+ end
149
+
150
+ private def update_param(param)
151
+ @g[param] ||= 0
152
+ @g[param] = @alpha * @g[param] + (1 - @alpha) * param.grad**2
153
+ param.data -= (@learning_rate / NMath.sqrt(@g[param] + @eps)) * param.grad
120
154
  end
121
155
  end
122
156
 
123
157
 
124
158
  class AdaDelta < Optimizer
159
+ # @return [Float] Return the rho value.
125
160
  attr_accessor :rho
161
+ # @return [Float] Return the eps value.
162
+ attr_accessor :eps
126
163
 
127
- def self.load_hash(hash)
128
- self.new(rho: hash[:rho])
164
+ def self.from_hash(hash)
165
+ self.new(rho: hash[:rho], eps: hash[:eps])
129
166
  end
130
167
 
131
- def initialize(rho: 0.95)
168
+ # @param [Float] rho Moving average index of past slopes.
169
+ # @param [Float] eps Value to avoid division by zero.
170
+ def initialize(rho: 0.95, eps: 1e-6)
132
171
  super(nil)
133
172
  @rho = rho
173
+ @eps = eps
134
174
  @h = {}
135
175
  @s = {}
136
176
  end
137
177
 
138
- def update(params)
139
- params.select { |key, param| param.grad }.each_value do |param|
140
- @h[param] ||= Xumo::SFloat.zeros(*param.data.shape)
141
- @s[param] ||= Xumo::SFloat.zeros(*param.data.shape)
142
- @h[param] = @rho * @h[param] + (1 - @rho) * param.grad**2
143
- v = (NMath.sqrt(@s[param] + 1e-6) / NMath.sqrt(@h[param] + 1e-6)) * param.grad
144
- @s[param] = @rho * @s[param] + (1 - @rho) * v**2
145
- param.data -= v
146
- end
178
+ def to_hash
179
+ super(rho: @rho, eps: @eps)
147
180
  end
148
181
 
149
- def to_hash
150
- super({rho: @rho})
182
+ private def update_param(param)
183
+ @h[param] ||= Xumo::SFloat.zeros(*param.data.shape)
184
+ @s[param] ||= Xumo::SFloat.zeros(*param.data.shape)
185
+ @h[param] = @rho * @h[param] + (1 - @rho) * param.grad**2
186
+ v = (NMath.sqrt(@s[param] + @eps) / NMath.sqrt(@h[param] + @eps)) * param.grad
187
+ @s[param] = @rho * @s[param] + (1 - @rho) * v**2
188
+ param.data -= v
151
189
  end
152
190
  end
153
191
 
154
192
 
155
193
  class Adam < Optimizer
194
+ # @return [Float] Return the alpha value.
195
+ attr_accessor :alpha
196
+ # @return [Float] Return the beta1 value.
156
197
  attr_accessor :beta1
198
+ # @return [Float] Return the beta2 value.
157
199
  attr_accessor :beta2
200
+ # @return [Float] Return the eps value.
201
+ attr_accessor :eps
158
202
 
159
- def self.load_hash(hash)
160
- self.new(hash[:learning_rate], beta1: hash[:beta1], beta2: hash[:beta2])
203
+ def self.from_hash(hash)
204
+ self.new(alpha: hash[:alpha], beta1: hash[:beta1], beta2: hash[:beta2], eps: hash[:eps])
161
205
  end
162
206
 
163
- def initialize(learning_rate = 0.001, beta1: 0.9, beta2: 0.999)
164
- super(learning_rate)
207
+ # @param [Float] alpha Value used to calculate learning rate.
208
+ # @param [Float] beta1 Moving average index of beta1.
209
+ # @param [Float] beta2 Moving average index of beta2.
210
+ # @param [Float] eps Value to avoid division by zero.
211
+ def initialize(alpha: 0.001, beta1: 0.9, beta2: 0.999, eps: 1e-7)
212
+ super(nil)
213
+ @alpha = alpha
165
214
  @beta1 = beta1
166
215
  @beta2 = beta2
216
+ @eps = eps
167
217
  @iter = 0
168
218
  @m = {}
169
219
  @v = {}
170
220
  end
171
221
 
172
- def update(params)
222
+ def update(layers)
173
223
  @iter += 1
174
- lr = @learning_rate * Math.sqrt(1 - @beta2**@iter) / (1 - @beta1**@iter)
175
- params.select { |key, param| param.grad }.each_value do |param|
176
- @m[param] ||= 0
177
- @v[param] ||= 0
178
- @m[param] += (1 - @beta1) * (param.grad - @m[param])
179
- @v[param] += (1 - @beta2) * (param.grad**2 - @v[param])
180
- param.data -= lr * @m[param] / NMath.sqrt(@v[param] + 1e-7)
224
+ learning_rate = @alpha * Math.sqrt(1 - @beta2**@iter) / (1 - @beta1**@iter)
225
+ target_params = layers.select { |layer| layer.is_a?(HasParamLayer) && layer.trainable }
226
+ .map { |layer| layer.params.values }.flatten
227
+ .select { |param| param.grad }
228
+ target_params.each do |param|
229
+ update_param(param, learning_rate)
230
+ param.grad = 0
181
231
  end
182
232
  end
183
233
 
184
234
  def to_hash
185
- super({beta1: @beta1, beta2: @beta2})
235
+ super(alpha: @alpha, beta1: @beta1, beta2: @beta2, eps: @eps)
236
+ end
237
+
238
+ private def update_param(param, learning_rate)
239
+ @m[param] ||= 0
240
+ @v[param] ||= 0
241
+ @m[param] += (1 - @beta1) * (param.grad - @m[param])
242
+ @v[param] += (1 - @beta2) * (param.grad**2 - @v[param])
243
+ param.data -= learning_rate * @m[param] / NMath.sqrt(@v[param] + @eps)
244
+ end
245
+ end
246
+
247
+
248
+ class RMSPropGraves < Optimizer
249
+ # @return [Float] Return the alpha value.
250
+ attr_accessor :alpha
251
+ # @return [Float] Return the eps value.
252
+ attr_accessor :eps
253
+
254
+ def self.from_hash(hash)
255
+ self.new(hash[:learning_rate], alpha: hash[:alpha], eps: hash[:eps])
256
+ end
257
+
258
+ # @param [Float] learning_rate Learning rate.
259
+ # @param [Float] alpha Moving average index of past slopes.
260
+ # @param [Float] eps Value to avoid division by zero.
261
+ def initialize(learning_rate = 0.0001, alpha: 0.95, eps: 0.0001)
262
+ super(learning_rate)
263
+ @alpha = alpha
264
+ @eps = eps
265
+ @m = {}
266
+ @v = {}
267
+ end
268
+
269
+ def to_hash
270
+ super(alpha: @alpha, eps: @eps)
271
+ end
272
+
273
+ private def update_param(param)
274
+ @m[param] ||= 0
275
+ @v[param] ||= 0
276
+ @m[param] = @alpha * @m[param] + (1 - @alpha) * param.grad
277
+ @v[param] = @alpha * @v[param] + (1 - @alpha) * param.grad**2
278
+ param.data -= (@learning_rate / NMath.sqrt(@v[param] - @m[param]**2 + @eps)) * param.grad
186
279
  end
187
280
  end
188
281