ruby-dnn 0.10.1 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,72 +1,72 @@
1
- module DNN
2
- module Layers
3
-
4
- class BatchNormalization < HasParamLayer
5
- # @return [Integer] The axis to normalization.
6
- attr_reader :axis
7
- # @return [Float] Exponential moving average of mean and variance.
8
- attr_accessor :momentum
9
- # @return [Float] Value to avoid division by zero.
10
- attr_accessor :eps
11
-
12
- def self.from_hash(hash)
13
- self.new(axis: hash[:axis], momentum: hash[:momentum])
14
- end
15
-
16
- # @param [integer] axis The axis to normalization.
17
- # @param [Float] momentum Exponential moving average of mean and variance.
18
- # @param [Float] eps Value to avoid division by zero.
19
- def initialize(axis: 0, momentum: 0.9, eps: 1e-7)
20
- super()
21
- @axis = axis
22
- @momentum = momentum
23
- @eps = eps
24
- end
25
-
26
- def build(input_shape)
27
- super
28
- @params[:gamma] = @gamma = Param.new(Xumo::SFloat.ones(*output_shape), 0)
29
- @params[:beta] = @beta = Param.new(Xumo::SFloat.zeros(*output_shape), 0)
30
- @params[:running_mean] = @running_mean = Param.new(Xumo::SFloat.zeros(*output_shape))
31
- @params[:running_var] = @running_var = Param.new(Xumo::SFloat.zeros(*output_shape))
32
- end
33
-
34
- def forward(x)
35
- if learning_phase
36
- mean = x.mean(axis: @axis, keepdims: true)
37
- @xc = x - mean
38
- var = (@xc**2).mean(axis: @axis, keepdims: true)
39
- @std = NMath.sqrt(var + @eps)
40
- xn = @xc / @std
41
- @xn = xn
42
- @running_mean.data = @momentum * @running_mean.data + (1 - @momentum) * mean
43
- @running_var.data = @momentum * @running_var.data + (1 - @momentum) * var
44
- else
45
- xc = x - @running_mean.data
46
- xn = xc / NMath.sqrt(@running_var.data + @eps)
47
- end
48
- @gamma.data * xn + @beta.data
49
- end
50
-
51
- def backward(dy)
52
- batch_size = dy.shape[@axis]
53
- if @trainable
54
- @beta.grad = dy.sum(axis: @axis, keepdims: true)
55
- @gamma.grad = (@xn * dy).sum(axis: @axis, keepdims: true)
56
- end
57
- dxn = @gamma.data * dy
58
- dxc = dxn / @std
59
- dstd = -((dxn * @xc) / (@std**2)).sum(axis: @axis, keepdims: true)
60
- dvar = 0.5 * dstd / @std
61
- dxc += (2.0 / batch_size) * @xc * dvar
62
- dmean = dxc.sum(axis: @axis, keepdims: true)
63
- dxc - dmean / batch_size
64
- end
65
-
66
- def to_hash
67
- super({axis: @axis, momentum: @momentum, eps: @eps})
68
- end
69
- end
70
-
71
- end
72
- end
1
+ module DNN
2
+ module Layers
3
+
4
+ class BatchNormalization < HasParamLayer
5
+ # @return [Integer] The axis to normalization.
6
+ attr_reader :axis
7
+ # @return [Float] Exponential moving average of mean and variance.
8
+ attr_accessor :momentum
9
+ # @return [Float] Value to avoid division by zero.
10
+ attr_accessor :eps
11
+
12
+ def self.from_hash(hash)
13
+ self.new(axis: hash[:axis], momentum: hash[:momentum])
14
+ end
15
+
16
+ # @param [integer] axis The axis to normalization.
17
+ # @param [Float] momentum Exponential moving average of mean and variance.
18
+ # @param [Float] eps Value to avoid division by zero.
19
+ def initialize(axis: 0, momentum: 0.9, eps: 1e-7)
20
+ super()
21
+ @axis = axis
22
+ @momentum = momentum
23
+ @eps = eps
24
+ end
25
+
26
+ def build(input_shape)
27
+ super
28
+ @params[:gamma] = @gamma = Param.new(Xumo::SFloat.ones(*output_shape), 0)
29
+ @params[:beta] = @beta = Param.new(Xumo::SFloat.zeros(*output_shape), 0)
30
+ @params[:running_mean] = @running_mean = Param.new(Xumo::SFloat.zeros(*output_shape))
31
+ @params[:running_var] = @running_var = Param.new(Xumo::SFloat.zeros(*output_shape))
32
+ end
33
+
34
+ def forward(x)
35
+ if learning_phase
36
+ mean = x.mean(axis: @axis, keepdims: true)
37
+ @xc = x - mean
38
+ var = (@xc**2).mean(axis: @axis, keepdims: true)
39
+ @std = NMath.sqrt(var + @eps)
40
+ xn = @xc / @std
41
+ @xn = xn
42
+ @running_mean.data = @momentum * @running_mean.data + (1 - @momentum) * mean
43
+ @running_var.data = @momentum * @running_var.data + (1 - @momentum) * var
44
+ else
45
+ xc = x - @running_mean.data
46
+ xn = xc / NMath.sqrt(@running_var.data + @eps)
47
+ end
48
+ @gamma.data * xn + @beta.data
49
+ end
50
+
51
+ def backward(dy)
52
+ batch_size = dy.shape[@axis]
53
+ if @trainable
54
+ @beta.grad = dy.sum(axis: @axis, keepdims: true)
55
+ @gamma.grad = (@xn * dy).sum(axis: @axis, keepdims: true)
56
+ end
57
+ dxn = @gamma.data * dy
58
+ dxc = dxn / @std
59
+ dstd = -((dxn * @xc) / (@std**2)).sum(axis: @axis, keepdims: true)
60
+ dvar = 0.5 * dstd / @std
61
+ dxc += (2.0 / batch_size) * @xc * dvar
62
+ dmean = dxc.sum(axis: @axis, keepdims: true)
63
+ dxc - dmean / batch_size
64
+ end
65
+
66
+ def to_hash
67
+ super({axis: @axis, momentum: @momentum, eps: @eps})
68
+ end
69
+ end
70
+
71
+ end
72
+ end
@@ -1,283 +1,283 @@
1
- module DNN
2
- module Optimizers
3
-
4
- # Super class of all optimizer classes.
5
- class Optimizer
6
- # @return [Float] Return the Learning rate.
7
- attr_accessor :learning_rate
8
-
9
- def initialize(learning_rate)
10
- @learning_rate = learning_rate
11
- end
12
-
13
- # Update layers has param.
14
- def update(layers)
15
- target_params = layers.select { |layer| layer.is_a?(HasParamLayer) && layer.trainable }
16
- .map { |layer| layer.params.values }.flatten
17
- .select { |param| param.grad }
18
- target_params.each do |param|
19
- update_param(param)
20
- param.grad = 0
21
- end
22
- end
23
-
24
- def to_hash(merge_hash = nil)
25
- hash = {class: self.class.name, learning_rate: @learning_rate}
26
- hash.merge!(merge_hash) if merge_hash
27
- hash
28
- end
29
-
30
- # Update param.
31
- # Classes that inherit from this class must implement this method.
32
- private def update_param(param)
33
- raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'update_param'")
34
- end
35
- end
36
-
37
-
38
- class SGD < Optimizer
39
- # @return [Float] Return the momentum coefficient.
40
- attr_accessor :momentum
41
-
42
- def self.from_hash(hash)
43
- self.new(hash[:learning_rate], momentum: hash[:momentum])
44
- end
45
-
46
- # @param [Float] learning_rate Learning rate.
47
- # @param [Float] momentum momentum coefficient.
48
- def initialize(learning_rate = 0.01, momentum: 0)
49
- super(learning_rate)
50
- @momentum = momentum
51
- @v = {}
52
- end
53
-
54
- def to_hash
55
- super(momentum: @momentum)
56
- end
57
-
58
- private def update_param(param)
59
- amount = param.grad * @learning_rate
60
- if @momentum > 0
61
- @v[param] ||= 0
62
- amount += @momentum * @v[param]
63
- @v[param] = amount
64
- end
65
- param.data -= amount
66
- end
67
- end
68
-
69
-
70
- class Nesterov < Optimizer
71
- attr_accessor :momentum
72
-
73
- def self.from_hash(hash)
74
- self.new(hash[:learning_rate], momentum: hash[:momentum])
75
- end
76
-
77
- # @param [Float] learning_rate Learning rate.
78
- # @param [Float] momentum momentum coefficient.
79
- def initialize(learning_rate = 0.01, momentum: 0.9)
80
- super(learning_rate)
81
- @momentum = momentum
82
- @v = {}
83
- end
84
-
85
- def to_hash
86
- super(momentum: @momentum)
87
- end
88
-
89
- private def update_param(param)
90
- @v[param] ||= 0
91
- amount = param.grad * @learning_rate
92
- @v[param] = @v[param] * @momentum - amount
93
- param.data = (param.data + @momentum**2 * @v[param]) - (1 + @momentum) * amount
94
- end
95
- end
96
-
97
-
98
- class AdaGrad < Optimizer
99
- # @return [Float] Return the eps value.
100
- attr_accessor :eps
101
-
102
- # @param [Float] learning_rate Learning rate.
103
- # @param [Float] eps Value to avoid division by zero.
104
- def initialize(learning_rate = 0.01, eps: 1e-7)
105
- super(learning_rate)
106
- @eps = eps
107
- @g = {}
108
- end
109
-
110
- def self.from_hash(hash)
111
- self.new(hash[:learning_rate], eps: hash[:eps])
112
- end
113
-
114
- private def update_param(param)
115
- @g[param] ||= 0
116
- @g[param] += param.grad**2
117
- param.data -= (@learning_rate / NMath.sqrt(@g[param] + @eps)) * param.grad
118
- end
119
-
120
- def to_hash
121
- super(eps: @eps)
122
- end
123
- end
124
-
125
-
126
- class RMSProp < Optimizer
127
- # @return [Float] Return the alpha value.
128
- attr_accessor :alpha
129
- # @return [Float] Return the eps value.
130
- attr_accessor :eps
131
-
132
- def self.from_hash(hash)
133
- self.new(hash[:learning_rate], alpha: hash[:alpha], eps: hash[:eps])
134
- end
135
-
136
- # @param [Float] learning_rate Learning rate.
137
- # @param [Float] alpha Moving average index of past slopes.
138
- # @param [Float] eps Value to avoid division by zero.
139
- def initialize(learning_rate = 0.001, alpha: 0.9, eps: 1e-7)
140
- super(learning_rate)
141
- @alpha = alpha
142
- @eps = eps
143
- @g = {}
144
- end
145
-
146
- def to_hash
147
- super(alpha: @alpha, eps: @eps)
148
- end
149
-
150
- private def update_param(param)
151
- @g[param] ||= 0
152
- @g[param] = @alpha * @g[param] + (1 - @alpha) * param.grad**2
153
- param.data -= (@learning_rate / NMath.sqrt(@g[param] + @eps)) * param.grad
154
- end
155
- end
156
-
157
-
158
- class AdaDelta < Optimizer
159
- # @return [Float] Return the rho value.
160
- attr_accessor :rho
161
- # @return [Float] Return the eps value.
162
- attr_accessor :eps
163
-
164
- def self.from_hash(hash)
165
- self.new(rho: hash[:rho], eps: hash[:eps])
166
- end
167
-
168
- # @param [Float] rho Moving average index of past slopes.
169
- # @param [Float] eps Value to avoid division by zero.
170
- def initialize(rho: 0.95, eps: 1e-6)
171
- super(nil)
172
- @rho = rho
173
- @eps = eps
174
- @h = {}
175
- @s = {}
176
- end
177
-
178
- def to_hash
179
- super(rho: @rho, eps: @eps)
180
- end
181
-
182
- private def update_param(param)
183
- @h[param] ||= Xumo::SFloat.zeros(*param.data.shape)
184
- @s[param] ||= Xumo::SFloat.zeros(*param.data.shape)
185
- @h[param] = @rho * @h[param] + (1 - @rho) * param.grad**2
186
- v = (NMath.sqrt(@s[param] + @eps) / NMath.sqrt(@h[param] + @eps)) * param.grad
187
- @s[param] = @rho * @s[param] + (1 - @rho) * v**2
188
- param.data -= v
189
- end
190
- end
191
-
192
-
193
- class Adam < Optimizer
194
- # @return [Float] Return the alpha value.
195
- attr_accessor :alpha
196
- # @return [Float] Return the beta1 value.
197
- attr_accessor :beta1
198
- # @return [Float] Return the beta2 value.
199
- attr_accessor :beta2
200
- # @return [Float] Return the eps value.
201
- attr_accessor :eps
202
-
203
- def self.from_hash(hash)
204
- self.new(alpha: hash[:alpha], beta1: hash[:beta1], beta2: hash[:beta2], eps: hash[:eps])
205
- end
206
-
207
- # @param [Float] alpha Value used to calculate learning rate.
208
- # @param [Float] beta1 Moving average index of beta1.
209
- # @param [Float] beta2 Moving average index of beta2.
210
- # @param [Float] eps Value to avoid division by zero.
211
- def initialize(alpha: 0.001, beta1: 0.9, beta2: 0.999, eps: 1e-7)
212
- super(nil)
213
- @alpha = alpha
214
- @beta1 = beta1
215
- @beta2 = beta2
216
- @eps = eps
217
- @iter = 0
218
- @m = {}
219
- @v = {}
220
- end
221
-
222
- def update(layers)
223
- @iter += 1
224
- learning_rate = @alpha * Math.sqrt(1 - @beta2**@iter) / (1 - @beta1**@iter)
225
- target_params = layers.select { |layer| layer.is_a?(HasParamLayer) && layer.trainable }
226
- .map { |layer| layer.params.values }.flatten
227
- .select { |param| param.grad }
228
- target_params.each do |param|
229
- update_param(param, learning_rate)
230
- param.grad = 0
231
- end
232
- end
233
-
234
- def to_hash
235
- super(alpha: @alpha, beta1: @beta1, beta2: @beta2, eps: @eps)
236
- end
237
-
238
- private def update_param(param, learning_rate)
239
- @m[param] ||= 0
240
- @v[param] ||= 0
241
- @m[param] += (1 - @beta1) * (param.grad - @m[param])
242
- @v[param] += (1 - @beta2) * (param.grad**2 - @v[param])
243
- param.data -= learning_rate * @m[param] / NMath.sqrt(@v[param] + @eps)
244
- end
245
- end
246
-
247
-
248
- class RMSPropGraves < Optimizer
249
- # @return [Float] Return the alpha value.
250
- attr_accessor :alpha
251
- # @return [Float] Return the eps value.
252
- attr_accessor :eps
253
-
254
- def self.from_hash(hash)
255
- self.new(hash[:learning_rate], alpha: hash[:alpha], eps: hash[:eps])
256
- end
257
-
258
- # @param [Float] learning_rate Learning rate.
259
- # @param [Float] alpha Moving average index of past slopes.
260
- # @param [Float] eps Value to avoid division by zero.
261
- def initialize(learning_rate = 0.0001, alpha: 0.95, eps: 0.0001)
262
- super(learning_rate)
263
- @alpha = alpha
264
- @eps = eps
265
- @m = {}
266
- @v = {}
267
- end
268
-
269
- def to_hash
270
- super(alpha: @alpha, eps: @eps)
271
- end
272
-
273
- private def update_param(param)
274
- @m[param] ||= 0
275
- @v[param] ||= 0
276
- @m[param] = @alpha * @m[param] + (1 - @alpha) * param.grad
277
- @v[param] = @alpha * @v[param] + (1 - @alpha) * param.grad**2
278
- param.data -= (@learning_rate / NMath.sqrt(@v[param] - @m[param]**2 + @eps)) * param.grad
279
- end
280
- end
281
-
282
- end
283
- end
1
+ module DNN
2
+ module Optimizers
3
+
4
+ # Super class of all optimizer classes.
5
+ class Optimizer
6
+ # @return [Float] Return the Learning rate.
7
+ attr_accessor :learning_rate
8
+
9
+ def initialize(learning_rate)
10
+ @learning_rate = learning_rate
11
+ end
12
+
13
+ # Update layers has param.
14
+ def update(layers)
15
+ target_params = layers.select { |layer| layer.is_a?(HasParamLayer) && layer.trainable }
16
+ .map { |layer| layer.params.values }.flatten
17
+ .select { |param| param.grad }
18
+ target_params.each do |param|
19
+ update_param(param)
20
+ param.grad = 0
21
+ end
22
+ end
23
+
24
+ def to_hash(merge_hash = nil)
25
+ hash = {class: self.class.name, learning_rate: @learning_rate}
26
+ hash.merge!(merge_hash) if merge_hash
27
+ hash
28
+ end
29
+
30
+ # Update param.
31
+ # Classes that inherit from this class must implement this method.
32
+ private def update_param(param)
33
+ raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'update_param'")
34
+ end
35
+ end
36
+
37
+
38
+ class SGD < Optimizer
39
+ # @return [Float] Return the momentum coefficient.
40
+ attr_accessor :momentum
41
+
42
+ def self.from_hash(hash)
43
+ self.new(hash[:learning_rate], momentum: hash[:momentum])
44
+ end
45
+
46
+ # @param [Float] learning_rate Learning rate.
47
+ # @param [Float] momentum momentum coefficient.
48
+ def initialize(learning_rate = 0.01, momentum: 0)
49
+ super(learning_rate)
50
+ @momentum = momentum
51
+ @v = {}
52
+ end
53
+
54
+ def to_hash
55
+ super(momentum: @momentum)
56
+ end
57
+
58
+ private def update_param(param)
59
+ amount = param.grad * @learning_rate
60
+ if @momentum > 0
61
+ @v[param] ||= 0
62
+ amount += @momentum * @v[param]
63
+ @v[param] = amount
64
+ end
65
+ param.data -= amount
66
+ end
67
+ end
68
+
69
+
70
+ class Nesterov < Optimizer
71
+ attr_accessor :momentum
72
+
73
+ def self.from_hash(hash)
74
+ self.new(hash[:learning_rate], momentum: hash[:momentum])
75
+ end
76
+
77
+ # @param [Float] learning_rate Learning rate.
78
+ # @param [Float] momentum momentum coefficient.
79
+ def initialize(learning_rate = 0.01, momentum: 0.9)
80
+ super(learning_rate)
81
+ @momentum = momentum
82
+ @v = {}
83
+ end
84
+
85
+ def to_hash
86
+ super(momentum: @momentum)
87
+ end
88
+
89
+ private def update_param(param)
90
+ @v[param] ||= 0
91
+ amount = param.grad * @learning_rate
92
+ @v[param] = @v[param] * @momentum - amount
93
+ param.data = (param.data + @momentum**2 * @v[param]) - (1 + @momentum) * amount
94
+ end
95
+ end
96
+
97
+
98
+ class AdaGrad < Optimizer
99
+ # @return [Float] Return the eps value.
100
+ attr_accessor :eps
101
+
102
+ # @param [Float] learning_rate Learning rate.
103
+ # @param [Float] eps Value to avoid division by zero.
104
+ def initialize(learning_rate = 0.01, eps: 1e-7)
105
+ super(learning_rate)
106
+ @eps = eps
107
+ @g = {}
108
+ end
109
+
110
+ def self.from_hash(hash)
111
+ self.new(hash[:learning_rate], eps: hash[:eps])
112
+ end
113
+
114
+ private def update_param(param)
115
+ @g[param] ||= 0
116
+ @g[param] += param.grad**2
117
+ param.data -= (@learning_rate / NMath.sqrt(@g[param] + @eps)) * param.grad
118
+ end
119
+
120
+ def to_hash
121
+ super(eps: @eps)
122
+ end
123
+ end
124
+
125
+
126
+ class RMSProp < Optimizer
127
+ # @return [Float] Return the alpha value.
128
+ attr_accessor :alpha
129
+ # @return [Float] Return the eps value.
130
+ attr_accessor :eps
131
+
132
+ def self.from_hash(hash)
133
+ self.new(hash[:learning_rate], alpha: hash[:alpha], eps: hash[:eps])
134
+ end
135
+
136
+ # @param [Float] learning_rate Learning rate.
137
+ # @param [Float] alpha Moving average index of past slopes.
138
+ # @param [Float] eps Value to avoid division by zero.
139
+ def initialize(learning_rate = 0.001, alpha: 0.9, eps: 1e-7)
140
+ super(learning_rate)
141
+ @alpha = alpha
142
+ @eps = eps
143
+ @g = {}
144
+ end
145
+
146
+ def to_hash
147
+ super(alpha: @alpha, eps: @eps)
148
+ end
149
+
150
+ private def update_param(param)
151
+ @g[param] ||= 0
152
+ @g[param] = @alpha * @g[param] + (1 - @alpha) * param.grad**2
153
+ param.data -= (@learning_rate / NMath.sqrt(@g[param] + @eps)) * param.grad
154
+ end
155
+ end
156
+
157
+
158
+ class AdaDelta < Optimizer
159
+ # @return [Float] Return the rho value.
160
+ attr_accessor :rho
161
+ # @return [Float] Return the eps value.
162
+ attr_accessor :eps
163
+
164
+ def self.from_hash(hash)
165
+ self.new(rho: hash[:rho], eps: hash[:eps])
166
+ end
167
+
168
+ # @param [Float] rho Moving average index of past slopes.
169
+ # @param [Float] eps Value to avoid division by zero.
170
+ def initialize(rho: 0.95, eps: 1e-6)
171
+ super(nil)
172
+ @rho = rho
173
+ @eps = eps
174
+ @h = {}
175
+ @s = {}
176
+ end
177
+
178
+ def to_hash
179
+ super(rho: @rho, eps: @eps)
180
+ end
181
+
182
+ private def update_param(param)
183
+ @h[param] ||= Xumo::SFloat.zeros(*param.data.shape)
184
+ @s[param] ||= Xumo::SFloat.zeros(*param.data.shape)
185
+ @h[param] = @rho * @h[param] + (1 - @rho) * param.grad**2
186
+ v = (NMath.sqrt(@s[param] + @eps) / NMath.sqrt(@h[param] + @eps)) * param.grad
187
+ @s[param] = @rho * @s[param] + (1 - @rho) * v**2
188
+ param.data -= v
189
+ end
190
+ end
191
+
192
+
193
+ class Adam < Optimizer
194
+ # @return [Float] Return the alpha value.
195
+ attr_accessor :alpha
196
+ # @return [Float] Return the beta1 value.
197
+ attr_accessor :beta1
198
+ # @return [Float] Return the beta2 value.
199
+ attr_accessor :beta2
200
+ # @return [Float] Return the eps value.
201
+ attr_accessor :eps
202
+
203
+ def self.from_hash(hash)
204
+ self.new(alpha: hash[:alpha], beta1: hash[:beta1], beta2: hash[:beta2], eps: hash[:eps])
205
+ end
206
+
207
+ # @param [Float] alpha Value used to calculate learning rate.
208
+ # @param [Float] beta1 Moving average index of beta1.
209
+ # @param [Float] beta2 Moving average index of beta2.
210
+ # @param [Float] eps Value to avoid division by zero.
211
+ def initialize(alpha: 0.001, beta1: 0.9, beta2: 0.999, eps: 1e-7)
212
+ super(nil)
213
+ @alpha = alpha
214
+ @beta1 = beta1
215
+ @beta2 = beta2
216
+ @eps = eps
217
+ @iter = 0
218
+ @m = {}
219
+ @v = {}
220
+ end
221
+
222
+ def update(layers)
223
+ @iter += 1
224
+ learning_rate = @alpha * Math.sqrt(1 - @beta2**@iter) / (1 - @beta1**@iter)
225
+ target_params = layers.select { |layer| layer.is_a?(HasParamLayer) && layer.trainable }
226
+ .map { |layer| layer.params.values }.flatten
227
+ .select { |param| param.grad }
228
+ target_params.each do |param|
229
+ update_param(param, learning_rate)
230
+ param.grad = 0
231
+ end
232
+ end
233
+
234
+ def to_hash
235
+ super(alpha: @alpha, beta1: @beta1, beta2: @beta2, eps: @eps)
236
+ end
237
+
238
+ private def update_param(param, learning_rate)
239
+ @m[param] ||= 0
240
+ @v[param] ||= 0
241
+ @m[param] += (1 - @beta1) * (param.grad - @m[param])
242
+ @v[param] += (1 - @beta2) * (param.grad**2 - @v[param])
243
+ param.data -= learning_rate * @m[param] / NMath.sqrt(@v[param] + @eps)
244
+ end
245
+ end
246
+
247
+
248
+ class RMSPropGraves < Optimizer
249
+ # @return [Float] Return the alpha value.
250
+ attr_accessor :alpha
251
+ # @return [Float] Return the eps value.
252
+ attr_accessor :eps
253
+
254
+ def self.from_hash(hash)
255
+ self.new(hash[:learning_rate], alpha: hash[:alpha], eps: hash[:eps])
256
+ end
257
+
258
+ # @param [Float] learning_rate Learning rate.
259
+ # @param [Float] alpha Moving average index of past slopes.
260
+ # @param [Float] eps Value to avoid division by zero.
261
+ def initialize(learning_rate = 0.0001, alpha: 0.95, eps: 0.0001)
262
+ super(learning_rate)
263
+ @alpha = alpha
264
+ @eps = eps
265
+ @m = {}
266
+ @v = {}
267
+ end
268
+
269
+ def to_hash
270
+ super(alpha: @alpha, eps: @eps)
271
+ end
272
+
273
+ private def update_param(param)
274
+ @m[param] ||= 0
275
+ @v[param] ||= 0
276
+ @m[param] = @alpha * @m[param] + (1 - @alpha) * param.grad
277
+ @v[param] = @alpha * @v[param] + (1 - @alpha) * param.grad**2
278
+ param.data -= (@learning_rate / NMath.sqrt(@v[param] - @m[param]**2 + @eps)) * param.grad
279
+ end
280
+ end
281
+
282
+ end
283
+ end