ruby-dnn 0.9.4 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,34 @@
1
+ # This class manages input datas and output datas together.
1
2
  class DNN::Dataset
2
- def initialize(x_datas, y_datas)
3
+ # @param [Numo::SFloat] x_datas input datas.
4
+ # @param [Numo::SFloat] y_datas output datas.
5
+ # @param [Bool] random Set true to return batches randomly. Setting false returns batches in order of index.
6
+ def initialize(x_datas, y_datas, random = true)
3
7
  @x_datas = x_datas
4
8
  @y_datas = y_datas
9
+ @random = random
5
10
  @num_datas = x_datas.shape[0]
6
- @indexes = @num_datas.times.to_a.shuffle
11
+ reset_indexs
7
12
  end
8
13
 
9
- def get_batch(batch_size)
14
+ # Return the next batch.
15
+ # If the number of remaining data < batch size, if random = true, shuffle the data again and return a batch.
16
+ # If random = false, all remaining data will be returned regardless of the batch size.
17
+ def next_batch(batch_size)
10
18
  if @indexes.length < batch_size
11
- @indexes = @num_datas.times.to_a.shuffle
19
+ batch_indexes = @indexes unless @random
20
+ reset_indexs
21
+ batch_indexes = @indexes.shift(batch_size) if @random
22
+ else
23
+ batch_indexes = @indexes.shift(batch_size)
12
24
  end
13
- batch_indexes = @indexes.shift(batch_size)
14
25
  x_batch = @x_datas[batch_indexes, false]
15
26
  y_batch = @y_datas[batch_indexes, false]
16
27
  [x_batch, y_batch]
17
28
  end
29
+
30
+ private def reset_indexs
31
+ @indexes = @num_datas.times.to_a
32
+ @indexes.shuffle! if @random
33
+ end
18
34
  end
@@ -28,7 +28,7 @@ module DNN
28
28
  class Const < Initializer
29
29
  attr_reader :const
30
30
 
31
- def self.load_hash(hash)
31
+ def self.from_hash(hash)
32
32
  self.new(hash[:const])
33
33
  end
34
34
 
@@ -51,7 +51,7 @@ module DNN
51
51
  attr_reader :mean
52
52
  attr_reader :std
53
53
 
54
- def self.load_hash(hash)
54
+ def self.from_hash(hash)
55
55
  self.new(hash[:mean], hash[:std], hash[:seed])
56
56
  end
57
57
 
@@ -76,7 +76,7 @@ module DNN
76
76
  attr_reader :min
77
77
  attr_reader :max
78
78
 
79
- def self.load_hash(hash)
79
+ def self.from_hash(hash)
80
80
  self.new(hash[:min], hash[:max], hash[:seed])
81
81
  end
82
82
 
@@ -3,6 +3,9 @@ module DNN
3
3
 
4
4
  # Super class of all optimizer classes.
5
5
  class Layer
6
+ # @return [Bool] learning_phase Return the true if learning.
7
+ attr_accessor :learning_phase
8
+ # @return [Array] Return the shape of the input data.
6
9
  attr_reader :input_shape
7
10
 
8
11
  def initialize
@@ -10,12 +13,15 @@ module DNN
10
13
  end
11
14
 
12
15
  # Build the layer.
16
+ # @param [Array] input_shape Setting the shape of the input data.
13
17
  def build(input_shape)
14
18
  @input_shape = input_shape
19
+ @learning_phase = true
15
20
  @built = true
16
21
  end
17
22
 
18
23
  # Does the layer have already been built?
24
+ # @return [Bool] If layer have already been built then return true.
19
25
  def built?
20
26
  @built
21
27
  end
@@ -26,10 +32,13 @@ module DNN
26
32
  end
27
33
 
28
34
  # Backward propagation.
29
- def backward(dout)
30
- raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'update'")
35
+ def backward(dy)
36
+ raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'backward'")
31
37
  end
32
38
 
39
+ # Please reimplement this method as needed.
40
+ # The default implementation return input_shape.
41
+ # @return [Array] Return the shape of the output data.
33
42
  def output_shape
34
43
  @input_shape
35
44
  end
@@ -55,31 +64,11 @@ module DNN
55
64
  @params = {}
56
65
  @trainable = true
57
66
  end
58
-
59
- def build(input_shape)
60
- @input_shape = input_shape
61
- unless @built
62
- @built = true
63
- init_params
64
- end
65
- end
66
-
67
- # Update the parameters.
68
- def update(optimizer)
69
- optimizer.update(@params) if @trainable
70
- end
71
-
72
- private
73
-
74
- # Initialize of the parameters.
75
- def init_params
76
- raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'init_params'")
77
- end
78
67
  end
79
68
 
80
69
 
81
70
  class InputLayer < Layer
82
- def self.load_hash(hash)
71
+ def self.from_hash(hash)
83
72
  self.new(hash[:input_shape])
84
73
  end
85
74
 
@@ -97,8 +86,8 @@ module DNN
97
86
  x
98
87
  end
99
88
 
100
- def backward(dout)
101
- dout
89
+ def backward(dy)
90
+ dy
102
91
  end
103
92
 
104
93
  def to_hash
@@ -109,44 +98,42 @@ module DNN
109
98
 
110
99
  # It is a superclass of all connection layers.
111
100
  class Connection < HasParamLayer
112
- # @return [DNN::Initializers] weight initializer.
101
+ # @return [DNN::Initializers::Initializer] Weight initializer.
113
102
  attr_reader :weight_initializer
114
- # @return [DNN::Initializers] bias initializer.
103
+ # @return [DNN::Initializers::Initializer] Bias initializer.
115
104
  attr_reader :bias_initializer
116
- # @return [Float] L1 regularization.
117
- attr_reader :l1_lambda
118
- # @return [Float] L2 regularization.
119
- attr_reader :l2_lambda
120
-
121
- # @param [DNN::Initializers] weight_initializer weight initializer.
122
- # @param [DNN::Initializers] bias_initializer bias initializer.
123
- # @param [Float] l1_lambda L1 regularization
124
- # @param [Float] l2_lambda L2 regularization
105
+ # @return [DNN::Regularizers::Regularizer] Weight regularization.
106
+ attr_reader :weight_regularizer
107
+ # @return [DNN::Regularizers::Regularizer] Bias regularization.
108
+ attr_reader :bias_regularizer
109
+
110
+ # @param [DNN::Initializers::Initializer] weight_initializer Weight initializer.
111
+ # @param [DNN::Initializers::Initializer] bias_initializer Bias initializer.
112
+ # @param [DNN::Regularizers::Regularizer] weight_regularizer Weight regularization.
113
+ # @param [DNN::Regularizers::Regularizer] bias_regularizer Bias regularization.
125
114
  # @param [Bool] use_bias whether to use bias.
126
115
  def initialize(weight_initializer: Initializers::RandomNormal.new,
127
116
  bias_initializer: Initializers::Zeros.new,
128
- l1_lambda: 0,
129
- l2_lambda: 0,
117
+ weight_regularizer: nil,
118
+ bias_regularizer: nil,
130
119
  use_bias: true)
131
120
  super()
132
121
  @weight_initializer = weight_initializer
133
122
  @bias_initializer = bias_initializer
134
- @l1_lambda = l1_lambda
135
- @l2_lambda = l2_lambda
136
- @params[:weight] = @weight = Param.new
137
- # For compatibility on or before with v0.9.3, setting use_bias to nil use bias.
138
- # Therefore, setting use_bias to nil is deprecated.
139
- if use_bias || use_bias == nil
140
- @params[:bias] = @bias = Param.new
123
+ @weight_regularizer = weight_regularizer
124
+ @bias_regularizer = bias_regularizer
125
+ @params[:weight] = @weight = Param.new(nil, 0)
126
+ if use_bias
127
+ @params[:bias] = @bias = Param.new(nil, 0)
141
128
  else
142
- @params[:bias] = @bias = nil
129
+ @bias = nil
143
130
  end
144
131
  end
145
132
 
146
133
  def regularizers
147
134
  regularizers = []
148
- regularizers << Lasso.new(@l1_lambda, @weight) if @l1_lambda > 0
149
- regularizers << Ridge.new(@l2_lambda, @weight) if @l2_lambda > 0
135
+ regularizers << @weight_regularizer if @weight_regularizer
136
+ regularizers << @bias_regularizer if @bias_regularizer
150
137
  regularizers
151
138
  end
152
139
 
@@ -158,15 +145,18 @@ module DNN
158
145
  def to_hash(merge_hash)
159
146
  super({weight_initializer: @weight_initializer.to_hash,
160
147
  bias_initializer: @bias_initializer.to_hash,
161
- l1_lambda: @l1_lambda,
162
- l2_lambda: @l2_lambda}.merge(merge_hash))
148
+ weight_regularizer: @weight_regularizer&.to_hash,
149
+ bias_regularizer: @bias_regularizer&.to_hash,
150
+ use_bias: use_bias}.merge(merge_hash))
163
151
  end
164
152
 
165
- private
166
-
167
- def init_params
153
+ private def init_weight_and_bias
168
154
  @weight_initializer.init_param(self, @weight)
169
- @bias_initializer.init_param(self, @bias) if @bias
155
+ @weight_regularizer.param = @weight if @weight_regularizer
156
+ if @bias
157
+ @bias_initializer.init_param(self, @bias)
158
+ @bias_regularizer.param = @bias if @bias_regularizer
159
+ end
170
160
  end
171
161
  end
172
162
 
@@ -176,12 +166,12 @@ module DNN
176
166
  # @return [Integer] number of nodes.
177
167
  attr_reader :num_nodes
178
168
 
179
- def self.load_hash(hash)
169
+ def self.from_hash(hash)
180
170
  self.new(hash[:num_nodes],
181
- weight_initializer: Utils.load_hash(hash[:weight_initializer]),
182
- bias_initializer: Utils.load_hash(hash[:bias_initializer]),
183
- l1_lambda: hash[:l1_lambda],
184
- l2_lambda: hash[:l2_lambda],
171
+ weight_initializer: Utils.from_hash(hash[:weight_initializer]),
172
+ bias_initializer: Utils.from_hash(hash[:bias_initializer]),
173
+ weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
174
+ bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
185
175
  use_bias: hash[:use_bias])
186
176
  end
187
177
 
@@ -189,25 +179,35 @@ module DNN
189
179
  def initialize(num_nodes,
190
180
  weight_initializer: Initializers::RandomNormal.new,
191
181
  bias_initializer: Initializers::Zeros.new,
192
- l1_lambda: 0,
193
- l2_lambda: 0,
182
+ weight_regularizer: nil,
183
+ bias_regularizer: nil,
194
184
  use_bias: true)
195
185
  super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
196
- l1_lambda: l1_lambda, l2_lambda: l2_lambda, use_bias: use_bias)
186
+ weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
197
187
  @num_nodes = num_nodes
198
188
  end
199
-
189
+
190
+ def build(input_shape)
191
+ super
192
+ num_prev_nodes = input_shape[0]
193
+ @weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes)
194
+ @bias.data = Xumo::SFloat.new(@num_nodes) if @bias
195
+ init_weight_and_bias
196
+ end
197
+
200
198
  def forward(x)
201
199
  @x = x
202
- out = x.dot(@weight.data)
203
- out += @bias.data if @bias
204
- out
200
+ y = x.dot(@weight.data)
201
+ y += @bias.data if @bias
202
+ y
205
203
  end
206
204
 
207
- def backward(dout)
208
- @weight.grad = @x.transpose.dot(dout)
209
- @bias.grad = dout.sum(0) if @bias
210
- dout.dot(@weight.data.transpose)
205
+ def backward(dy)
206
+ if @trainable
207
+ @weight.grad += @x.transpose.dot(dy)
208
+ @bias.grad += dy.sum(0) if @bias
209
+ end
210
+ dy.dot(@weight.data.transpose)
211
211
  end
212
212
 
213
213
  def output_shape
@@ -217,17 +217,6 @@ module DNN
217
217
  def to_hash
218
218
  super({num_nodes: @num_nodes})
219
219
  end
220
-
221
- private
222
-
223
- # TODO
224
- # Change writing super() other than the first.
225
- def init_params
226
- num_prev_nodes = @input_shape[0]
227
- @weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes)
228
- @bias.data = Xumo::SFloat.new(@num_nodes) if @bias
229
- super()
230
- end
231
220
  end
232
221
 
233
222
 
@@ -236,8 +225,8 @@ module DNN
236
225
  x.reshape(x.shape[0], *output_shape)
237
226
  end
238
227
 
239
- def backward(dout)
240
- dout.reshape(dout.shape[0], *@input_shape)
228
+ def backward(dy)
229
+ dy.reshape(dy.shape[0], *@input_shape)
241
230
  end
242
231
 
243
232
  def output_shape
@@ -247,7 +236,7 @@ module DNN
247
236
 
248
237
 
249
238
  class Reshape < Layer
250
- def self.load_hash(hash)
239
+ def self.from_hash(hash)
251
240
  self.new(hash[:output_shape])
252
241
  end
253
242
 
@@ -260,8 +249,8 @@ module DNN
260
249
  x.reshape(x.shape[0], *@output_shape)
261
250
  end
262
251
 
263
- def backward(dout)
264
- dout.reshape(dout.shape[0], *@input_shape)
252
+ def backward(dy)
253
+ dy.reshape(dy.shape[0], *@input_shape)
265
254
  end
266
255
 
267
256
  def output_shape
@@ -276,11 +265,11 @@ module DNN
276
265
 
277
266
  class Dropout < Layer
278
267
  # @return [Float] dropout ratio.
279
- attr_reader :dropout_ratio
268
+ attr_accessor :dropout_ratio
280
269
  # @return [Float] Use 'weight scaling inference rule'.
281
270
  attr_reader :use_scale
282
271
 
283
- def self.load_hash(hash)
272
+ def self.from_hash(hash)
284
273
  self.new(hash[:dropout_ratio], seed: hash[:seed], use_scale: hash[:use_scale])
285
274
  end
286
275
 
@@ -292,7 +281,7 @@ module DNN
292
281
  @mask = nil
293
282
  end
294
283
 
295
- def forward(x, learning_phase)
284
+ def forward(x)
296
285
  if learning_phase
297
286
  Xumo::SFloat.srand(@seed)
298
287
  @mask = Xumo::SFloat.ones(*x.shape).rand < @dropout_ratio
@@ -303,9 +292,9 @@ module DNN
303
292
  x
304
293
  end
305
294
 
306
- def backward(dout)
307
- dout[@mask] = 0
308
- dout
295
+ def backward(dy)
296
+ dy[@mask] = 0
297
+ dy
309
298
  end
310
299
 
311
300
  def to_hash
@@ -313,64 +302,6 @@ module DNN
313
302
  end
314
303
  end
315
304
 
316
-
317
- class BatchNormalization < HasParamLayer
318
- # @return [Float] Exponential moving average of mean and variance.
319
- attr_reader :momentum
320
-
321
- def self.load_hash(hash)
322
- self.new(momentum: hash[:momentum])
323
- end
324
-
325
- # @param [Float] momentum Exponential moving average of mean and variance.
326
- def initialize(momentum: 0.9)
327
- super()
328
- @momentum = momentum
329
- end
330
-
331
- def forward(x, learning_phase)
332
- if learning_phase
333
- mean = x.mean(0)
334
- @xc = x - mean
335
- var = (@xc**2).mean(0)
336
- @std = NMath.sqrt(var + 1e-7)
337
- xn = @xc / @std
338
- @xn = xn
339
- @running_mean.data = @momentum * @running_mean.data + (1 - @momentum) * mean
340
- @running_var.data = @momentum * @running_var.data + (1 - @momentum) * var
341
- else
342
- xc = x - @running_mean.data
343
- xn = xc / NMath.sqrt(@running_var.data + 1e-7)
344
- end
345
- @gamma.data * xn + @beta.data
346
- end
347
-
348
- def backward(dout)
349
- batch_size = dout.shape[0]
350
- @beta.grad = dout.sum(0)
351
- @gamma.grad = (@xn * dout).sum(0)
352
- dxn = @gamma.data * dout
353
- dxc = dxn / @std
354
- dstd = -((dxn * @xc) / (@std**2)).sum(0)
355
- dvar = 0.5 * dstd / @std
356
- dxc += (2.0 / batch_size) * @xc * dvar
357
- dmean = dxc.sum(0)
358
- dxc - dmean / batch_size
359
- end
360
-
361
- def to_hash
362
- super({momentum: @momentum})
363
- end
364
-
365
- private
366
-
367
- def init_params
368
- @params[:gamma] = @gamma = Param.new(Xumo::SFloat.ones(*output_shape))
369
- @params[:beta] = @beta = Param.new(Xumo::SFloat.zeros(*output_shape))
370
- @params[:running_mean] = @running_mean = Param.new(Xumo::SFloat.zeros(*output_shape))
371
- @params[:running_var] = @running_var = Param.new(Xumo::SFloat.zeros(*output_shape))
372
- end
373
- end
374
305
  end
375
306
 
376
307
  end
@@ -2,8 +2,8 @@ module DNN
2
2
  module Losses
3
3
 
4
4
  class Loss
5
- def forward(out, y, layers)
6
- loss_value = loss(out, y)
5
+ def forward(x, y, layers)
6
+ loss_value = forward_loss(x, y)
7
7
  regularizers = layers.select { |layer| layer.is_a?(Connection) }
8
8
  .map { |layer| layer.regularizers }.flatten
9
9
 
@@ -13,122 +13,161 @@ module DNN
13
13
  loss_value
14
14
  end
15
15
 
16
- def backward(y)
17
- raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'backward'")
18
- end
19
-
20
- def regularizes_backward(layers)
16
+ def backward(y, layers)
21
17
  layers.select { |layer| layer.is_a?(Connection) }.each do |layer|
22
18
  layer.regularizers.each do |regularizer|
23
19
  regularizer.backward
24
20
  end
25
21
  end
22
+ backward_loss(y)
26
23
  end
27
24
 
28
- def to_hash
29
- {class: self.class.name}
25
+ def to_hash(merge_hash = nil)
26
+ hash = {class: self.class.name}
27
+ hash.merge!(merge_hash) if merge_hash
28
+ hash
30
29
  end
31
30
 
32
31
  private
33
32
 
34
- def loss(out, y)
35
- raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'loss'")
33
+ def forward_loss(x, y)
34
+ raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'forward_loss'")
35
+ end
36
+
37
+ def backward_loss(y)
38
+ raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'backward_loss'")
36
39
  end
37
40
  end
38
41
 
39
42
  class MeanSquaredError < Loss
40
- def loss(out, y)
41
- @out = out
43
+ private
44
+
45
+ def forward_loss(x, y)
46
+ @x = x
42
47
  batch_size = y.shape[0]
43
- 0.5 * ((out - y)**2).sum / batch_size
48
+ 0.5 * ((x - y)**2).sum / batch_size
44
49
  end
45
50
 
46
- def backward(y)
47
- @out - y
51
+ def backward_loss(y)
52
+ @x - y
48
53
  end
49
54
  end
50
55
 
51
56
 
52
57
  class MeanAbsoluteError < Loss
53
- def loss(out, y)
54
- @out = out
58
+ private
59
+
60
+ def forward_loss(x, y)
61
+ @x = x
55
62
  batch_size = y.shape[0]
56
- (out - y).abs.sum / batch_size
63
+ (x - y).abs.sum / batch_size
57
64
  end
58
65
 
59
- def backward(y)
60
- dout = @out - y
61
- dout[dout >= 0] = 1
62
- dout[dout < 0] = -1
63
- dout
66
+ def backward_loss(y)
67
+ dy = @x - y
68
+ dy[dy >= 0] = 1
69
+ dy[dy < 0] = -1
70
+ dy
64
71
  end
65
72
  end
66
73
 
67
74
 
68
75
  class HuberLoss < Loss
69
- def forward(out, y, layers)
70
- @loss_value = super(out, y, layers)
76
+ def forward(x, y, layers)
77
+ @loss_value = super(x, y, layers)
71
78
  end
72
79
 
73
- def loss(out, y)
74
- @out = out
80
+ private
81
+
82
+ def forward_loss(x, y)
83
+ @x = x
75
84
  loss_value = loss_l1(y)
76
85
  loss_value > 1 ? loss_value : loss_l2(y)
77
86
  end
78
87
 
79
- def backward(y)
80
- dout = @out - y
88
+ def backward_loss(y)
89
+ dy = @x - y
81
90
  if @loss_value > 1
82
- dout[dout >= 0] = 1
83
- dout[dout < 0] = -1
91
+ dy[dy >= 0] = 1
92
+ dy[dy < 0] = -1
84
93
  end
85
- dout
94
+ dy
86
95
  end
87
96
 
88
- private
89
-
90
97
  def loss_l1(y)
91
98
  batch_size = y.shape[0]
92
- (@out - y).abs.sum / batch_size
99
+ (@x - y).abs.sum / batch_size
93
100
  end
94
101
 
95
102
  def loss_l2(y)
96
103
  batch_size = y.shape[0]
97
- 0.5 * ((@out - y)**2).sum / batch_size
104
+ 0.5 * ((@x - y)**2).sum / batch_size
98
105
  end
99
106
  end
100
107
 
101
108
 
102
109
  class SoftmaxCrossEntropy < Loss
110
+ # @return [Float] Return the eps value.
111
+ attr_accessor :eps
112
+
113
+ def self.from_hash(hash)
114
+ SoftmaxCrossEntropy.new(eps: hash[:eps])
115
+ end
116
+
103
117
  def self.softmax(x)
104
118
  NMath.exp(x) / NMath.exp(x).sum(1).reshape(x.shape[0], 1)
105
119
  end
106
120
 
107
- def loss(x, y)
108
- @out = SoftmaxCrossEntropy.softmax(x)
121
+ # @param [Float] eps Value to avoid nan.
122
+ def initialize(eps: 1e-7)
123
+ @eps = eps
124
+ end
125
+
126
+ def to_hash
127
+ super(eps: @eps)
128
+ end
129
+
130
+ private
131
+
132
+ def forward_loss(x, y)
133
+ @x = SoftmaxCrossEntropy.softmax(x)
109
134
  batch_size = y.shape[0]
110
- -(y * NMath.log(@out + 1e-7)).sum / batch_size
135
+ -(y * NMath.log(@x + @eps)).sum / batch_size
111
136
  end
112
137
 
113
- def backward(y)
114
- @out - y
138
+ def backward_loss(y)
139
+ @x - y
115
140
  end
116
141
  end
117
142
 
118
143
 
119
144
  class SigmoidCrossEntropy < Loss
120
- def initialize
121
- @sigmoid = Sigmoid.new
145
+ # @return [Float] Return the eps value.
146
+ attr_accessor :eps
147
+
148
+ def self.from_hash(hash)
149
+ SigmoidCrossEntropy.new(eps: hash[:eps])
122
150
  end
123
151
 
124
- def loss(x, y)
125
- @out = @sigmoid.forward(x)
152
+ # @param [Float] eps Value to avoid nan.
153
+ def initialize(eps: 1e-7)
154
+ @eps = eps
155
+ end
156
+
157
+ def to_hash
158
+ super(eps: @eps)
159
+ end
160
+
161
+ private
162
+
163
+ def forward_loss(x, y)
164
+ @x = Sigmoid.new.forward(x)
126
165
  batch_size = y.shape[0]
127
- -(y * NMath.log(@out + 1e-7) + (1 - y) * NMath.log(1 - @out + 1e-7))
166
+ -(y * NMath.log(@x) + (1 - y) * NMath.log(1 - @x))
128
167
  end
129
168
 
130
- def backward(y)
131
- @out - y
169
+ def backward_loss(y)
170
+ @x - y
132
171
  end
133
172
  end
134
173