ruby-dnn 0.9.4 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,7 @@
1
1
  module DNN
2
2
  module Layers
3
3
  # This module is used for convolution.
4
- module Conv2DModule
4
+ module Conv2D_Utils
5
5
  private
6
6
 
7
7
  # img[bsize, out_h, out_w, ch] to col[bsize * out_h * out_w, fil_h * fil_w * ch]
@@ -34,7 +34,7 @@ module DNN
34
34
  img
35
35
  end
36
36
 
37
- def padding(img, pad)
37
+ def zero_padding(img, pad)
38
38
  bsize, img_h, img_w, ch = img.shape
39
39
  img2 = Xumo::SFloat.zeros(bsize, img_h + pad[0], img_w + pad[1], ch)
40
40
  i_begin = pad[0] / 2
@@ -45,7 +45,7 @@ module DNN
45
45
  img2
46
46
  end
47
47
 
48
- def back_padding(img, pad)
48
+ def zero_padding_bwd(img, pad)
49
49
  i_begin = pad[0] / 2
50
50
  i_end = img.shape[1] - (pad[0] / 2.0).round
51
51
  j_begin = pad[1] / 2
@@ -53,22 +53,34 @@ module DNN
53
53
  img[true, i_begin...i_end, j_begin...j_end, true]
54
54
  end
55
55
 
56
- def out_size(prev_h, prev_w, fil_h, fil_w, strides)
57
- out_h = (prev_h - fil_h) / strides[0] + 1
58
- out_w = (prev_w - fil_w) / strides[1] + 1
56
+ def calc_conv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
57
+ out_h = (prev_h + pad_h - fil_h) / strides[0] + 1
58
+ out_w = (prev_w + pad_w - fil_w) / strides[1] + 1
59
59
  [out_h, out_w]
60
60
  end
61
61
 
62
- def padding_size(prev_h, prev_w, out_h, out_w, strides)
62
+ def calc_deconv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
63
+ out_h = (prev_h - 1) * strides[0] + fil_h - pad_h
64
+ out_w = (prev_w - 1) * strides[1] + fil_w - pad_w
65
+ [out_h, out_w]
66
+ end
67
+
68
+ def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
63
69
  pad_h = (prev_h.to_f / strides[0]).ceil - out_h
64
70
  pad_w = (prev_w.to_f / strides[1]).ceil - out_w
65
71
  [pad_h, pad_w]
66
72
  end
73
+
74
+ def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
75
+ pad_h = ((prev_h.to_f / strides[0]).ceil - out_h) * strides[0]
76
+ pad_w = ((prev_w.to_f / strides[1]).ceil - out_w) * strides[1]
77
+ [pad_h, pad_w]
78
+ end
67
79
  end
68
80
 
69
81
 
70
82
  class Conv2D < Connection
71
- include Conv2DModule
83
+ include Conv2D_Utils
72
84
 
73
85
  # @return [Integer] number of filters.
74
86
  attr_reader :num_filters
@@ -76,75 +88,81 @@ module DNN
76
88
  attr_reader :filter_size
77
89
  # @return [Array] Return stride length. stride length is of the form [height, width].
78
90
  attr_reader :strides
91
+ # @return [Array | Bool] Return padding size or whether to padding.
92
+ attr_reader :padding
79
93
 
80
- def self.load_hash(hash)
94
+ def self.from_hash(hash)
81
95
  Conv2D.new(hash[:num_filters], hash[:filter_size],
82
- weight_initializer: Utils.load_hash(hash[:weight_initializer]),
83
- bias_initializer: Utils.load_hash(hash[:bias_initializer]),
96
+ weight_initializer: Utils.from_hash(hash[:weight_initializer]),
97
+ bias_initializer: Utils.from_hash(hash[:bias_initializer]),
98
+ weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
99
+ bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
100
+ use_bias: hash[:use_bias],
84
101
  strides: hash[:strides],
85
- padding: hash[:padding],
86
- l1_lambda: hash[:l1_lambda],
87
- l2_lambda: hash[:l2_lambda],
88
- use_bias: hash[:use_bias])
102
+ padding: hash[:padding])
89
103
  end
90
104
 
91
- # @param [Integer] num_filters number of filters.
92
- # @param [Array or Integer] filter_size filter size. filter size is of the form [height, width].
93
- # @param [Array or Integer] strides stride length. stride length is of the form [height, width].
94
- # @param [Bool] padding Whether to padding.
105
+ # @param [Integer] num_filters Number of filters.
106
+ # @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
107
+ # @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
108
+ # @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
95
109
  def initialize(num_filters, filter_size,
96
110
  weight_initializer: Initializers::RandomNormal.new,
97
111
  bias_initializer: Initializers::Zeros.new,
112
+ weight_regularizer: nil,
113
+ bias_regularizer: nil,
114
+ use_bias: true,
98
115
  strides: 1,
99
- padding: false,
100
- l1_lambda: 0,
101
- l2_lambda: 0,
102
- use_bias: true)
116
+ padding: false)
103
117
  super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
104
- l1_lambda: l1_lambda, l2_lambda: l2_lambda, use_bias: use_bias)
118
+ weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
105
119
  @num_filters = num_filters
106
120
  @filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
107
121
  @strides = strides.is_a?(Integer) ? [strides, strides] : strides
108
- @padding = padding
122
+ @padding = padding.is_a?(Integer) ? [padding, padding] : padding
109
123
  end
110
124
 
111
125
  def build(input_shape)
112
126
  super
113
- prev_h, prev_w = input_shape[0..1]
114
- @out_size = out_size(prev_h, prev_w, *@filter_size, @strides)
115
- if @padding
116
- @pad_size = padding_size(prev_h, prev_w, *@out_size, @strides)
117
- @out_size = [@out_size[0] + @pad_size[0], @out_size[1] + @pad_size[1]]
127
+ prev_h, prev_w, num_prev_filter = *input_shape
128
+ @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
129
+ @bias.data = Xumo::SFloat.new(@num_filters) if @bias
130
+ init_weight_and_bias
131
+ if @padding == true
132
+ out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
133
+ @pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
134
+ elsif @padding.is_a?(Array)
135
+ @pad_size = @padding
136
+ else
137
+ @pad_size = [0, 0]
118
138
  end
139
+ @out_size = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
119
140
  end
120
141
 
121
142
  def forward(x)
122
- x = padding(x, @pad_size) if @padding
143
+ x = zero_padding(x, @pad_size) if @padding
123
144
  @x_shape = x.shape
124
145
  @col = im2col(x, *@out_size, *@filter_size, @strides)
125
- out = @col.dot(@weight.data)
126
- out += @bias.data if @bias
127
- out.reshape(x.shape[0], *@out_size, out.shape[3])
146
+ y = @col.dot(@weight.data)
147
+ y += @bias.data if @bias
148
+ y.reshape(x.shape[0], *@out_size, y.shape[3])
128
149
  end
129
150
 
130
- def backward(dout)
131
- dout = dout.reshape(dout.shape[0..2].reduce(:*), dout.shape[3])
132
- @weight.grad = @col.transpose.dot(dout)
133
- @bias.grad = dout.sum(0) if @bias
134
- dcol = dout.dot(@weight.data.transpose)
151
+ def backward(dy)
152
+ dy = dy.reshape(dy.shape[0..2].reduce(:*), dy.shape[3])
153
+ if @trainable
154
+ @weight.grad += @col.transpose.dot(dy)
155
+ @bias.grad += dy.sum(0) if @bias
156
+ end
157
+ dcol = dy.dot(@weight.data.transpose)
135
158
  dx = col2im(dcol, @x_shape, *@out_size, *@filter_size, @strides)
136
- @padding ? back_padding(dx, @pad_size) : dx
159
+ @padding ? zero_padding_bwd(dx, @pad_size) : dx
137
160
  end
138
161
 
139
162
  def output_shape
140
163
  [*@out_size, @num_filters]
141
164
  end
142
165
 
143
- # @return [Bool] whether to padding.
144
- def padding?
145
- @padding
146
- end
147
-
148
166
  # @return [Numo::SFloat] Convert weight to filter and return.
149
167
  def filters
150
168
  num_prev_filter = @input_shape[2]
@@ -163,35 +181,140 @@ module DNN
163
181
  strides: @strides,
164
182
  padding: @padding})
165
183
  end
166
-
167
- private
168
-
169
- def init_params
184
+ end
185
+
186
+
187
+ class Conv2D_Transpose < Connection
188
+ include Conv2D_Utils
189
+
190
+ # @return [Integer] number of filters.
191
+ attr_reader :num_filters
192
+ # @return [Array] Return filter size. filter size is of the form [height, width].
193
+ attr_reader :filter_size
194
+ # @return [Array] Return stride length. stride length is of the form [height, width].
195
+ attr_reader :strides
196
+ # @return [Array] Return padding size.
197
+ attr_reader :padding
198
+
199
+ def self.from_hash(hash)
200
+ Conv2D_Transpose.new(hash[:num_filters], hash[:filter_size],
201
+ weight_initializer: Utils.from_hash(hash[:weight_initializer]),
202
+ bias_initializer: Utils.from_hash(hash[:bias_initializer]),
203
+ weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
204
+ bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
205
+ use_bias: hash[:use_bias],
206
+ strides: hash[:strides],
207
+ padding: hash[:padding])
208
+ end
209
+
210
+ # @param [Integer] num_filters Number of filters.
211
+ # @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
212
+ # @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
213
+ # @param [Array] padding Padding size. Padding size is of the form [height, width].
214
+ def initialize(num_filters, filter_size,
215
+ weight_initializer: Initializers::RandomNormal.new,
216
+ bias_initializer: Initializers::Zeros.new,
217
+ weight_regularizer: nil,
218
+ bias_regularizer: nil,
219
+ use_bias: true,
220
+ strides: 1,
221
+ padding: false)
222
+ super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
223
+ weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
224
+ @num_filters = num_filters
225
+ @filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
226
+ @strides = strides.is_a?(Integer) ? [strides, strides] : strides
227
+ @padding = padding.is_a?(Integer) ? [padding, padding] : padding
228
+ end
229
+
230
+ def build(input_shape)
231
+ super
232
+ prev_h, prev_w, num_prev_filter = *input_shape
233
+ @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
234
+ @weight_initializer.init_param(self, @weight)
235
+ @weight_regularizer.param = @weight if @weight_regularizer
236
+ if @bias
237
+ @bias.data = Xumo::SFloat.new(@num_filters)
238
+ @bias_initializer.init_param(self, @bias)
239
+ @bias_regularizer.param = @bias if @bias_regularizer
240
+ end
241
+ if @padding == true
242
+ out_h, out_w = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
243
+ @pad_size = calc_padding_size(out_h, out_w, prev_h, prev_w, @strides)
244
+ elsif @padding.is_a?(Array)
245
+ @pad_size = @padding
246
+ else
247
+ @pad_size = [0, 0]
248
+ end
249
+ @out_size = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
250
+ end
251
+
252
+ def forward(x)
253
+ bsize = x.shape[0]
254
+ x = x.reshape(x.shape[0..2].reduce(:*), x.shape[3])
255
+ @x = x
256
+ col = x.dot(@weight.data.transpose)
257
+ img_shape = [bsize, @out_size[0] + @pad_size[0], @out_size[1] + @pad_size[1], @num_filters]
258
+ y = col2im(col, img_shape, *input_shape[0..1], *@filter_size, @strides)
259
+ y += @bias.data if @bias
260
+ @padding ? zero_padding_bwd(y, @pad_size) : y
261
+ end
262
+
263
+ def backward(dy)
264
+ dy = zero_padding(dy, @pad_size) if @padding
265
+ col = im2col(dy, *input_shape[0..1], *@filter_size, @strides)
266
+ if @trainable
267
+ @weight.grad += col.transpose.dot(@x)
268
+ @bias.grad += col.reshape(col.shape[0] * @filter_size.reduce(:*), @num_filters).sum(0) if @bias
269
+ end
270
+ dx = col.dot(@weight.data)
271
+ dx.reshape(dy.shape[0], *input_shape)
272
+ end
273
+
274
+ def output_shape
275
+ [*@out_size, @num_filters]
276
+ end
277
+
278
+ # @return [Numo::SFloat] Convert weight to filter and return.
279
+ def filters
170
280
  num_prev_filter = @input_shape[2]
171
- @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
172
- @bias.data = Xumo::SFloat.new(@num_filters) if @bias
173
- super()
281
+ @weight.data.reshape(*@filter_size, @num_filters, num_prev_filter)
282
+ end
283
+
284
+ # @param [Numo::SFloat] filters Convert weight to filters and set.
285
+ def filters=(filters)
286
+ num_prev_filter = @input_shape[2]
287
+ @weight.data = filters.reshape(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
288
+ end
289
+
290
+ def to_hash
291
+ super({num_filters: @num_filters,
292
+ filter_size: @filter_size,
293
+ strides: @strides,
294
+ padding: @padding})
174
295
  end
175
296
  end
176
297
 
177
298
 
178
299
  # Super class of all pooling2D class.
179
300
  class Pool2D < Layer
180
- include Conv2DModule
301
+ include Conv2D_Utils
181
302
 
182
- # @return [Array] Return pooling size. pooling size is of the form [height, width].
303
+ # @return [Array] Return pooling size. Pooling size is of the form [height, width].
183
304
  attr_reader :pool_size
184
- # @return [Array] Return stride length. stride length is of the form [height, width].
305
+ # @return [Array] Return stride length. Stride length is of the form [height, width].
185
306
  attr_reader :strides
307
+ # @return [Array | Bool] Return padding size or whether to padding.
308
+ attr_reader :padding
186
309
 
187
- def self.load_hash(pool2d_class, hash)
310
+ def self.from_hash(pool2d_class, hash)
188
311
  pool2d_class.new(hash[:pool_size], strides: hash[:strides], padding: hash[:padding])
189
312
  end
190
313
 
191
- # @param [Array or Integer] pool_size pooling size. pooling size is of the form [height, width].
192
- # @param [Array or Integer or NilClass] strides stride length. stride length is of the form [height, width].
314
+ # @param [Array | Integer] pool_size Pooling size. Pooling size is of the form [height, width].
315
+ # @param [Array | Integer | NilClass] strides stride length. Stride length is of the form [height, width].
193
316
  # If you set nil, treat pool_size as strides.
194
- # @param [Bool] padding Whether to padding.
317
+ # @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
195
318
  def initialize(pool_size, strides: nil, padding: false)
196
319
  super()
197
320
  @pool_size = pool_size.is_a?(Integer) ? [pool_size, pool_size] : pool_size
@@ -200,29 +323,28 @@ module DNN
200
323
  else
201
324
  @pool_size.clone
202
325
  end
203
- @padding = padding
326
+ @padding = padding.is_a?(Integer) ? [padding, padding] : padding
204
327
  end
205
328
 
206
329
  def build(input_shape)
207
330
  super
208
331
  prev_h, prev_w = input_shape[0..1]
209
332
  @num_channel = input_shape[2]
210
- @out_size = out_size(prev_h, prev_w, *@pool_size, @strides)
211
- if @padding
212
- @pad_size = padding_size(prev_h, prev_w, *@out_size, @strides)
213
- @out_size = [@out_size[0] + @pad_size[0], @out_size[1] + @pad_size[1]]
333
+ if @padding == true
334
+ out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, 0, 0, @strides)
335
+ @pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
336
+ elsif @padding.is_a?(Array)
337
+ @pad_size = @padding
338
+ else
339
+ @pad_size = [0, 0]
214
340
  end
341
+ @out_size = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, *@pad_size, @strides)
215
342
  end
216
343
 
217
344
  def output_shape
218
345
  [*@out_size, @num_channel]
219
346
  end
220
347
 
221
- # @return [Bool] whether to padding.
222
- def padding?
223
- @padding
224
- end
225
-
226
348
  def to_hash
227
349
  super({pool_size: @pool_size,
228
350
  strides: @strides,
@@ -232,12 +354,12 @@ module DNN
232
354
 
233
355
 
234
356
  class MaxPool2D < Pool2D
235
- def self.load_hash(hash)
236
- Pool2D.load_hash(self, hash)
357
+ def self.from_hash(hash)
358
+ Pool2D.from_hash(self, hash)
237
359
  end
238
360
 
239
361
  def forward(x)
240
- x = padding(x, @pad_size) if @padding
362
+ x = zero_padding(x, @pad_size) if @padding
241
363
  @x_shape = x.shape
242
364
  col = im2col(x, *@out_size, *@pool_size, @strides)
243
365
  col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
@@ -246,23 +368,23 @@ module DNN
246
368
  col.max(1).reshape(x.shape[0], *@out_size, x.shape[3])
247
369
  end
248
370
 
249
- def backward(dout)
250
- dmax = Xumo::SFloat.zeros(dout.size * @pool_size.reduce(:*))
251
- dmax[@max_index] = dout.flatten
252
- dcol = dmax.reshape(dout.shape[0..2].reduce(:*), @pool_size.reduce(:*) * dout.shape[3])
371
+ def backward(dy)
372
+ dmax = Xumo::SFloat.zeros(dy.size * @pool_size.reduce(:*))
373
+ dmax[@max_index] = dy.flatten
374
+ dcol = dmax.reshape(dy.shape[0..2].reduce(:*), @pool_size.reduce(:*) * dy.shape[3])
253
375
  dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
254
- @padding ? back_padding(dx, @pad_size) : dx
376
+ @padding ? zero_padding_bwd(dx, @pad_size) : dx
255
377
  end
256
378
  end
257
379
 
258
380
 
259
381
  class AvgPool2D < Pool2D
260
- def self.load_hash(hash)
261
- Pool2D.load_hash(self, hash)
382
+ def self.from_hash(hash)
383
+ Pool2D.from_hash(self, hash)
262
384
  end
263
385
 
264
386
  def forward(x)
265
- x = padding(x, @pad_size) if @padding
387
+ x = zero_padding(x, @pad_size) if @padding
266
388
  @x_shape = x.shape
267
389
  col = im2col(x, *@out_size, *@pool_size, @strides)
268
390
  col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
@@ -270,21 +392,23 @@ module DNN
270
392
  col.mean(1).reshape(x.shape[0], *@out_size, x.shape[3])
271
393
  end
272
394
 
273
- def backward(dout)
395
+ def backward(dy)
274
396
  row_length = @pool_size.reduce(:*)
275
- dout /= row_length
276
- davg = Xumo::SFloat.zeros(dout.size, row_length)
397
+ dy /= row_length
398
+ davg = Xumo::SFloat.zeros(dy.size, row_length)
277
399
  row_length.times do |i|
278
- davg[true, i] = dout.flatten
400
+ davg[true, i] = dy.flatten
279
401
  end
280
- dcol = davg.reshape(dout.shape[0..2].reduce(:*), dout.shape[3] * @pool_size.reduce(:*))
402
+ dcol = davg.reshape(dy.shape[0..2].reduce(:*), dy.shape[3] * @pool_size.reduce(:*))
281
403
  dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
282
- @padding ? back_padding(dx, @pad_size) : dx
404
+ @padding ? zero_padding_bwd(dx, @pad_size) : dx
283
405
  end
284
406
  end
285
407
 
286
408
 
287
409
  class UnPool2D < Layer
410
+ include Conv2D_Utils
411
+
288
412
  # @return [Array] Return unpooling size. unpooling size is of the form [height, width].
289
413
  attr_reader :unpool_size
290
414
 
@@ -294,7 +418,7 @@ module DNN
294
418
  @unpool_size = unpool_size.is_a?(Integer) ? [unpool_size, unpool_size] : unpool_size
295
419
  end
296
420
 
297
- def self.load_hash(hash)
421
+ def self.from_hash(hash)
298
422
  UnPool2D.new(hash[:unpool_size])
299
423
  end
300
424
 
@@ -308,8 +432,6 @@ module DNN
308
432
  @num_channel = input_shape[2]
309
433
  end
310
434
 
311
- include Conv2DModule
312
-
313
435
  def forward(x)
314
436
  @x_shape = x.shape
315
437
  unpool_h, unpool_w = @unpool_size
@@ -322,12 +444,12 @@ module DNN
322
444
  x2.reshape(x.shape[0], *@out_size, x.shape[3])
323
445
  end
324
446
 
325
- def backward(dout)
447
+ def backward(dy)
326
448
  in_size = input_shape[0..1]
327
- col = im2col(dout, *input_shape[0..1], *@unpool_size, @unpool_size)
328
- col = col.reshape(dout.shape[0] * in_size.reduce(:*), @unpool_size.reduce(:*), dout.shape[3]).transpose(0, 2, 1)
329
- .reshape(dout.shape[0] * in_size.reduce(:*) * dout.shape[3], @unpool_size.reduce(:*))
330
- col.sum(1).reshape(dout.shape[0], *in_size, dout.shape[3])
449
+ col = im2col(dy, *input_shape[0..1], *@unpool_size, @unpool_size)
450
+ col = col.reshape(dy.shape[0] * in_size.reduce(:*), @unpool_size.reduce(:*), dy.shape[3]).transpose(0, 2, 1)
451
+ .reshape(dy.shape[0] * in_size.reduce(:*) * dy.shape[3], @unpool_size.reduce(:*))
452
+ col.sum(1).reshape(dy.shape[0], *in_size, dy.shape[3])
331
453
  end
332
454
 
333
455
  def output_shape