ruby-dnn 0.9.4 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  module DNN
2
2
  module Layers
3
3
  # This module is used for convolution.
4
- module Conv2DModule
4
+ module Conv2D_Utils
5
5
  private
6
6
 
7
7
  # img[bsize, out_h, out_w, ch] to col[bsize * out_h * out_w, fil_h * fil_w * ch]
@@ -34,7 +34,7 @@ module DNN
34
34
  img
35
35
  end
36
36
 
37
- def padding(img, pad)
37
+ def zero_padding(img, pad)
38
38
  bsize, img_h, img_w, ch = img.shape
39
39
  img2 = Xumo::SFloat.zeros(bsize, img_h + pad[0], img_w + pad[1], ch)
40
40
  i_begin = pad[0] / 2
@@ -45,7 +45,7 @@ module DNN
45
45
  img2
46
46
  end
47
47
 
48
- def back_padding(img, pad)
48
+ def zero_padding_bwd(img, pad)
49
49
  i_begin = pad[0] / 2
50
50
  i_end = img.shape[1] - (pad[0] / 2.0).round
51
51
  j_begin = pad[1] / 2
@@ -53,22 +53,34 @@ module DNN
53
53
  img[true, i_begin...i_end, j_begin...j_end, true]
54
54
  end
55
55
 
56
- def out_size(prev_h, prev_w, fil_h, fil_w, strides)
57
- out_h = (prev_h - fil_h) / strides[0] + 1
58
- out_w = (prev_w - fil_w) / strides[1] + 1
56
+ def calc_conv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
57
+ out_h = (prev_h + pad_h - fil_h) / strides[0] + 1
58
+ out_w = (prev_w + pad_w - fil_w) / strides[1] + 1
59
59
  [out_h, out_w]
60
60
  end
61
61
 
62
- def padding_size(prev_h, prev_w, out_h, out_w, strides)
62
+ def calc_deconv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
63
+ out_h = (prev_h - 1) * strides[0] + fil_h - pad_h
64
+ out_w = (prev_w - 1) * strides[1] + fil_w - pad_w
65
+ [out_h, out_w]
66
+ end
67
+
68
+ def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
63
69
  pad_h = (prev_h.to_f / strides[0]).ceil - out_h
64
70
  pad_w = (prev_w.to_f / strides[1]).ceil - out_w
65
71
  [pad_h, pad_w]
66
72
  end
73
+
74
+ def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
75
+ pad_h = ((prev_h.to_f / strides[0]).ceil - out_h) * strides[0]
76
+ pad_w = ((prev_w.to_f / strides[1]).ceil - out_w) * strides[1]
77
+ [pad_h, pad_w]
78
+ end
67
79
  end
68
80
 
69
81
 
70
82
  class Conv2D < Connection
71
- include Conv2DModule
83
+ include Conv2D_Utils
72
84
 
73
85
  # @return [Integer] number of filters.
74
86
  attr_reader :num_filters
@@ -76,75 +88,81 @@ module DNN
76
88
  attr_reader :filter_size
77
89
  # @return [Array] Return stride length. stride length is of the form [height, width].
78
90
  attr_reader :strides
91
+ # @return [Array | Bool] Return padding size or whether to padding.
92
+ attr_reader :padding
79
93
 
80
- def self.load_hash(hash)
94
+ def self.from_hash(hash)
81
95
  Conv2D.new(hash[:num_filters], hash[:filter_size],
82
- weight_initializer: Utils.load_hash(hash[:weight_initializer]),
83
- bias_initializer: Utils.load_hash(hash[:bias_initializer]),
96
+ weight_initializer: Utils.from_hash(hash[:weight_initializer]),
97
+ bias_initializer: Utils.from_hash(hash[:bias_initializer]),
98
+ weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
99
+ bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
100
+ use_bias: hash[:use_bias],
84
101
  strides: hash[:strides],
85
- padding: hash[:padding],
86
- l1_lambda: hash[:l1_lambda],
87
- l2_lambda: hash[:l2_lambda],
88
- use_bias: hash[:use_bias])
102
+ padding: hash[:padding])
89
103
  end
90
104
 
91
- # @param [Integer] num_filters number of filters.
92
- # @param [Array or Integer] filter_size filter size. filter size is of the form [height, width].
93
- # @param [Array or Integer] strides stride length. stride length is of the form [height, width].
94
- # @param [Bool] padding Whether to padding.
105
+ # @param [Integer] num_filters Number of filters.
106
+ # @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
107
+ # @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
108
+ # @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
95
109
  def initialize(num_filters, filter_size,
96
110
  weight_initializer: Initializers::RandomNormal.new,
97
111
  bias_initializer: Initializers::Zeros.new,
112
+ weight_regularizer: nil,
113
+ bias_regularizer: nil,
114
+ use_bias: true,
98
115
  strides: 1,
99
- padding: false,
100
- l1_lambda: 0,
101
- l2_lambda: 0,
102
- use_bias: true)
116
+ padding: false)
103
117
  super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
104
- l1_lambda: l1_lambda, l2_lambda: l2_lambda, use_bias: use_bias)
118
+ weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
105
119
  @num_filters = num_filters
106
120
  @filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
107
121
  @strides = strides.is_a?(Integer) ? [strides, strides] : strides
108
- @padding = padding
122
+ @padding = padding.is_a?(Integer) ? [padding, padding] : padding
109
123
  end
110
124
 
111
125
  def build(input_shape)
112
126
  super
113
- prev_h, prev_w = input_shape[0..1]
114
- @out_size = out_size(prev_h, prev_w, *@filter_size, @strides)
115
- if @padding
116
- @pad_size = padding_size(prev_h, prev_w, *@out_size, @strides)
117
- @out_size = [@out_size[0] + @pad_size[0], @out_size[1] + @pad_size[1]]
127
+ prev_h, prev_w, num_prev_filter = *input_shape
128
+ @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
129
+ @bias.data = Xumo::SFloat.new(@num_filters) if @bias
130
+ init_weight_and_bias
131
+ if @padding == true
132
+ out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
133
+ @pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
134
+ elsif @padding.is_a?(Array)
135
+ @pad_size = @padding
136
+ else
137
+ @pad_size = [0, 0]
118
138
  end
139
+ @out_size = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
119
140
  end
120
141
 
121
142
  def forward(x)
122
- x = padding(x, @pad_size) if @padding
143
+ x = zero_padding(x, @pad_size) if @padding
123
144
  @x_shape = x.shape
124
145
  @col = im2col(x, *@out_size, *@filter_size, @strides)
125
- out = @col.dot(@weight.data)
126
- out += @bias.data if @bias
127
- out.reshape(x.shape[0], *@out_size, out.shape[3])
146
+ y = @col.dot(@weight.data)
147
+ y += @bias.data if @bias
148
+ y.reshape(x.shape[0], *@out_size, y.shape[3])
128
149
  end
129
150
 
130
- def backward(dout)
131
- dout = dout.reshape(dout.shape[0..2].reduce(:*), dout.shape[3])
132
- @weight.grad = @col.transpose.dot(dout)
133
- @bias.grad = dout.sum(0) if @bias
134
- dcol = dout.dot(@weight.data.transpose)
151
+ def backward(dy)
152
+ dy = dy.reshape(dy.shape[0..2].reduce(:*), dy.shape[3])
153
+ if @trainable
154
+ @weight.grad += @col.transpose.dot(dy)
155
+ @bias.grad += dy.sum(0) if @bias
156
+ end
157
+ dcol = dy.dot(@weight.data.transpose)
135
158
  dx = col2im(dcol, @x_shape, *@out_size, *@filter_size, @strides)
136
- @padding ? back_padding(dx, @pad_size) : dx
159
+ @padding ? zero_padding_bwd(dx, @pad_size) : dx
137
160
  end
138
161
 
139
162
  def output_shape
140
163
  [*@out_size, @num_filters]
141
164
  end
142
165
 
143
- # @return [Bool] whether to padding.
144
- def padding?
145
- @padding
146
- end
147
-
148
166
  # @return [Numo::SFloat] Convert weight to filter and return.
149
167
  def filters
150
168
  num_prev_filter = @input_shape[2]
@@ -163,35 +181,140 @@ module DNN
163
181
  strides: @strides,
164
182
  padding: @padding})
165
183
  end
166
-
167
- private
168
-
169
- def init_params
184
+ end
185
+
186
+
187
+ class Conv2D_Transpose < Connection
188
+ include Conv2D_Utils
189
+
190
+ # @return [Integer] number of filters.
191
+ attr_reader :num_filters
192
+ # @return [Array] Return filter size. filter size is of the form [height, width].
193
+ attr_reader :filter_size
194
+ # @return [Array] Return stride length. stride length is of the form [height, width].
195
+ attr_reader :strides
196
+ # @return [Array] Return padding size.
197
+ attr_reader :padding
198
+
199
+ def self.from_hash(hash)
200
+ Conv2D_Transpose.new(hash[:num_filters], hash[:filter_size],
201
+ weight_initializer: Utils.from_hash(hash[:weight_initializer]),
202
+ bias_initializer: Utils.from_hash(hash[:bias_initializer]),
203
+ weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
204
+ bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
205
+ use_bias: hash[:use_bias],
206
+ strides: hash[:strides],
207
+ padding: hash[:padding])
208
+ end
209
+
210
+ # @param [Integer] num_filters Number of filters.
211
+ # @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
212
+ # @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
213
+ # @param [Array] padding Padding size. Padding size is of the form [height, width].
214
+ def initialize(num_filters, filter_size,
215
+ weight_initializer: Initializers::RandomNormal.new,
216
+ bias_initializer: Initializers::Zeros.new,
217
+ weight_regularizer: nil,
218
+ bias_regularizer: nil,
219
+ use_bias: true,
220
+ strides: 1,
221
+ padding: false)
222
+ super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
223
+ weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
224
+ @num_filters = num_filters
225
+ @filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
226
+ @strides = strides.is_a?(Integer) ? [strides, strides] : strides
227
+ @padding = padding.is_a?(Integer) ? [padding, padding] : padding
228
+ end
229
+
230
+ def build(input_shape)
231
+ super
232
+ prev_h, prev_w, num_prev_filter = *input_shape
233
+ @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
234
+ @weight_initializer.init_param(self, @weight)
235
+ @weight_regularizer.param = @weight if @weight_regularizer
236
+ if @bias
237
+ @bias.data = Xumo::SFloat.new(@num_filters)
238
+ @bias_initializer.init_param(self, @bias)
239
+ @bias_regularizer.param = @bias if @bias_regularizer
240
+ end
241
+ if @padding == true
242
+ out_h, out_w = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
243
+ @pad_size = calc_padding_size(out_h, out_w, prev_h, prev_w, @strides)
244
+ elsif @padding.is_a?(Array)
245
+ @pad_size = @padding
246
+ else
247
+ @pad_size = [0, 0]
248
+ end
249
+ @out_size = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
250
+ end
251
+
252
+ def forward(x)
253
+ bsize = x.shape[0]
254
+ x = x.reshape(x.shape[0..2].reduce(:*), x.shape[3])
255
+ @x = x
256
+ col = x.dot(@weight.data.transpose)
257
+ img_shape = [bsize, @out_size[0] + @pad_size[0], @out_size[1] + @pad_size[1], @num_filters]
258
+ y = col2im(col, img_shape, *input_shape[0..1], *@filter_size, @strides)
259
+ y += @bias.data if @bias
260
+ @padding ? zero_padding_bwd(y, @pad_size) : y
261
+ end
262
+
263
+ def backward(dy)
264
+ dy = zero_padding(dy, @pad_size) if @padding
265
+ col = im2col(dy, *input_shape[0..1], *@filter_size, @strides)
266
+ if @trainable
267
+ @weight.grad += col.transpose.dot(@x)
268
+ @bias.grad += col.reshape(col.shape[0] * @filter_size.reduce(:*), @num_filters).sum(0) if @bias
269
+ end
270
+ dx = col.dot(@weight.data)
271
+ dx.reshape(dy.shape[0], *input_shape)
272
+ end
273
+
274
+ def output_shape
275
+ [*@out_size, @num_filters]
276
+ end
277
+
278
+ # @return [Numo::SFloat] Convert weight to filter and return.
279
+ def filters
170
280
  num_prev_filter = @input_shape[2]
171
- @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
172
- @bias.data = Xumo::SFloat.new(@num_filters) if @bias
173
- super()
281
+ @weight.data.reshape(*@filter_size, @num_filters, num_prev_filter)
282
+ end
283
+
284
+ # @param [Numo::SFloat] filters Convert weight to filters and set.
285
+ def filters=(filters)
286
+ num_prev_filter = @input_shape[2]
287
+ @weight.data = filters.reshape(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
288
+ end
289
+
290
+ def to_hash
291
+ super({num_filters: @num_filters,
292
+ filter_size: @filter_size,
293
+ strides: @strides,
294
+ padding: @padding})
174
295
  end
175
296
  end
176
297
 
177
298
 
178
299
  # Super class of all pooling2D class.
179
300
  class Pool2D < Layer
180
- include Conv2DModule
301
+ include Conv2D_Utils
181
302
 
182
- # @return [Array] Return pooling size. pooling size is of the form [height, width].
303
+ # @return [Array] Return pooling size. Pooling size is of the form [height, width].
183
304
  attr_reader :pool_size
184
- # @return [Array] Return stride length. stride length is of the form [height, width].
305
+ # @return [Array] Return stride length. Stride length is of the form [height, width].
185
306
  attr_reader :strides
307
+ # @return [Array | Bool] Return padding size or whether to padding.
308
+ attr_reader :padding
186
309
 
187
- def self.load_hash(pool2d_class, hash)
310
+ def self.from_hash(pool2d_class, hash)
188
311
  pool2d_class.new(hash[:pool_size], strides: hash[:strides], padding: hash[:padding])
189
312
  end
190
313
 
191
- # @param [Array or Integer] pool_size pooling size. pooling size is of the form [height, width].
192
- # @param [Array or Integer or NilClass] strides stride length. stride length is of the form [height, width].
314
+ # @param [Array | Integer] pool_size Pooling size. Pooling size is of the form [height, width].
315
+ # @param [Array | Integer | NilClass] strides stride length. Stride length is of the form [height, width].
193
316
  # If you set nil, treat pool_size as strides.
194
- # @param [Bool] padding Whether to padding.
317
+ # @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
195
318
  def initialize(pool_size, strides: nil, padding: false)
196
319
  super()
197
320
  @pool_size = pool_size.is_a?(Integer) ? [pool_size, pool_size] : pool_size
@@ -200,29 +323,28 @@ module DNN
200
323
  else
201
324
  @pool_size.clone
202
325
  end
203
- @padding = padding
326
+ @padding = padding.is_a?(Integer) ? [padding, padding] : padding
204
327
  end
205
328
 
206
329
  def build(input_shape)
207
330
  super
208
331
  prev_h, prev_w = input_shape[0..1]
209
332
  @num_channel = input_shape[2]
210
- @out_size = out_size(prev_h, prev_w, *@pool_size, @strides)
211
- if @padding
212
- @pad_size = padding_size(prev_h, prev_w, *@out_size, @strides)
213
- @out_size = [@out_size[0] + @pad_size[0], @out_size[1] + @pad_size[1]]
333
+ if @padding == true
334
+ out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, 0, 0, @strides)
335
+ @pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
336
+ elsif @padding.is_a?(Array)
337
+ @pad_size = @padding
338
+ else
339
+ @pad_size = [0, 0]
214
340
  end
341
+ @out_size = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, *@pad_size, @strides)
215
342
  end
216
343
 
217
344
  def output_shape
218
345
  [*@out_size, @num_channel]
219
346
  end
220
347
 
221
- # @return [Bool] whether to padding.
222
- def padding?
223
- @padding
224
- end
225
-
226
348
  def to_hash
227
349
  super({pool_size: @pool_size,
228
350
  strides: @strides,
@@ -232,12 +354,12 @@ module DNN
232
354
 
233
355
 
234
356
  class MaxPool2D < Pool2D
235
- def self.load_hash(hash)
236
- Pool2D.load_hash(self, hash)
357
+ def self.from_hash(hash)
358
+ Pool2D.from_hash(self, hash)
237
359
  end
238
360
 
239
361
  def forward(x)
240
- x = padding(x, @pad_size) if @padding
362
+ x = zero_padding(x, @pad_size) if @padding
241
363
  @x_shape = x.shape
242
364
  col = im2col(x, *@out_size, *@pool_size, @strides)
243
365
  col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
@@ -246,23 +368,23 @@ module DNN
246
368
  col.max(1).reshape(x.shape[0], *@out_size, x.shape[3])
247
369
  end
248
370
 
249
- def backward(dout)
250
- dmax = Xumo::SFloat.zeros(dout.size * @pool_size.reduce(:*))
251
- dmax[@max_index] = dout.flatten
252
- dcol = dmax.reshape(dout.shape[0..2].reduce(:*), @pool_size.reduce(:*) * dout.shape[3])
371
+ def backward(dy)
372
+ dmax = Xumo::SFloat.zeros(dy.size * @pool_size.reduce(:*))
373
+ dmax[@max_index] = dy.flatten
374
+ dcol = dmax.reshape(dy.shape[0..2].reduce(:*), @pool_size.reduce(:*) * dy.shape[3])
253
375
  dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
254
- @padding ? back_padding(dx, @pad_size) : dx
376
+ @padding ? zero_padding_bwd(dx, @pad_size) : dx
255
377
  end
256
378
  end
257
379
 
258
380
 
259
381
  class AvgPool2D < Pool2D
260
- def self.load_hash(hash)
261
- Pool2D.load_hash(self, hash)
382
+ def self.from_hash(hash)
383
+ Pool2D.from_hash(self, hash)
262
384
  end
263
385
 
264
386
  def forward(x)
265
- x = padding(x, @pad_size) if @padding
387
+ x = zero_padding(x, @pad_size) if @padding
266
388
  @x_shape = x.shape
267
389
  col = im2col(x, *@out_size, *@pool_size, @strides)
268
390
  col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
@@ -270,21 +392,23 @@ module DNN
270
392
  col.mean(1).reshape(x.shape[0], *@out_size, x.shape[3])
271
393
  end
272
394
 
273
- def backward(dout)
395
+ def backward(dy)
274
396
  row_length = @pool_size.reduce(:*)
275
- dout /= row_length
276
- davg = Xumo::SFloat.zeros(dout.size, row_length)
397
+ dy /= row_length
398
+ davg = Xumo::SFloat.zeros(dy.size, row_length)
277
399
  row_length.times do |i|
278
- davg[true, i] = dout.flatten
400
+ davg[true, i] = dy.flatten
279
401
  end
280
- dcol = davg.reshape(dout.shape[0..2].reduce(:*), dout.shape[3] * @pool_size.reduce(:*))
402
+ dcol = davg.reshape(dy.shape[0..2].reduce(:*), dy.shape[3] * @pool_size.reduce(:*))
281
403
  dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
282
- @padding ? back_padding(dx, @pad_size) : dx
404
+ @padding ? zero_padding_bwd(dx, @pad_size) : dx
283
405
  end
284
406
  end
285
407
 
286
408
 
287
409
  class UnPool2D < Layer
410
+ include Conv2D_Utils
411
+
288
412
  # @return [Array] Return unpooling size. unpooling size is of the form [height, width].
289
413
  attr_reader :unpool_size
290
414
 
@@ -294,7 +418,7 @@ module DNN
294
418
  @unpool_size = unpool_size.is_a?(Integer) ? [unpool_size, unpool_size] : unpool_size
295
419
  end
296
420
 
297
- def self.load_hash(hash)
421
+ def self.from_hash(hash)
298
422
  UnPool2D.new(hash[:unpool_size])
299
423
  end
300
424
 
@@ -308,8 +432,6 @@ module DNN
308
432
  @num_channel = input_shape[2]
309
433
  end
310
434
 
311
- include Conv2DModule
312
-
313
435
  def forward(x)
314
436
  @x_shape = x.shape
315
437
  unpool_h, unpool_w = @unpool_size
@@ -322,12 +444,12 @@ module DNN
322
444
  x2.reshape(x.shape[0], *@out_size, x.shape[3])
323
445
  end
324
446
 
325
- def backward(dout)
447
+ def backward(dy)
326
448
  in_size = input_shape[0..1]
327
- col = im2col(dout, *input_shape[0..1], *@unpool_size, @unpool_size)
328
- col = col.reshape(dout.shape[0] * in_size.reduce(:*), @unpool_size.reduce(:*), dout.shape[3]).transpose(0, 2, 1)
329
- .reshape(dout.shape[0] * in_size.reduce(:*) * dout.shape[3], @unpool_size.reduce(:*))
330
- col.sum(1).reshape(dout.shape[0], *in_size, dout.shape[3])
449
+ col = im2col(dy, *input_shape[0..1], *@unpool_size, @unpool_size)
450
+ col = col.reshape(dy.shape[0] * in_size.reduce(:*), @unpool_size.reduce(:*), dy.shape[3]).transpose(0, 2, 1)
451
+ .reshape(dy.shape[0] * in_size.reduce(:*) * dy.shape[3], @unpool_size.reduce(:*))
452
+ col.sum(1).reshape(dy.shape[0], *in_size, dy.shape[3])
331
453
  end
332
454
 
333
455
  def output_shape