ruby-dnn 0.10.1 → 0.10.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,148 +1,148 @@
1
- module DNN
2
- module Activations
3
-
4
- class Sigmoid < Layers::Layer
5
- def forward(x)
6
- @y = 1 / (1 + NMath.exp(-x))
7
- end
8
-
9
- def backward(dy)
10
- dy * (1 - @y) * @y
11
- end
12
- end
13
-
14
-
15
- class Tanh < Layers::Layer
16
- def forward(x)
17
- @y = NMath.tanh(x)
18
- end
19
-
20
- def backward(dy)
21
- dy * (1 - @y**2)
22
- end
23
- end
24
-
25
-
26
- class Softsign < Layers::Layer
27
- def forward(x)
28
- @x = x
29
- x / (1 + x.abs)
30
- end
31
-
32
- def backward(dy)
33
- dy * (1 / (1 + @x.abs)**2)
34
- end
35
- end
36
-
37
-
38
- class Softplus < Layers::Layer
39
- def forward(x)
40
- @x = x
41
- NMath.log(1 + NMath.exp(x))
42
- end
43
-
44
- def backward(dy)
45
- dy * (1 / (1 + NMath.exp(-@x)))
46
- end
47
- end
48
-
49
-
50
- class Swish < Layers::Layer
51
- def forward(x)
52
- @x = x
53
- @y = x * (1 / (1 + NMath.exp(-x)))
54
- end
55
-
56
- def backward(dy)
57
- dy * (@y + (1 / (1 + NMath.exp(-@x))) * (1 - @y))
58
- end
59
- end
60
-
61
-
62
- class ReLU < Layers::Layer
63
- def forward(x)
64
- @x = x.clone
65
- x[x < 0] = 0
66
- x
67
- end
68
-
69
- def backward(dy)
70
- @x[@x > 0] = 1
71
- @x[@x <= 0] = 0
72
- dy * @x
73
- end
74
- end
75
-
76
-
77
- class LeakyReLU < Layers::Layer
78
- # @return [Float] Return the alpha value.
79
- attr_reader :alpha
80
-
81
- def self.from_hash(hash)
82
- self.new(hash[:alpha])
83
- end
84
-
85
- # @param [Float] alpha The slope when the output value is negative.
86
- def initialize(alpha = 0.3)
87
- @alpha = alpha
88
- end
89
-
90
- def forward(x)
91
- @x = x.clone
92
- a = Xumo::SFloat.ones(x.shape)
93
- a[x <= 0] = @alpha
94
- x * a
95
- end
96
-
97
- def backward(dy)
98
- @x[@x > 0] = 1
99
- @x[@x <= 0] = @alpha
100
- dy * @x
101
- end
102
-
103
- def to_hash
104
- {class: self.class.name, alpha: alpha}
105
- end
106
- end
107
-
108
-
109
- class ELU < Layers::Layer
110
- # @return [Float] Return the alpha value.
111
- attr_reader :alpha
112
-
113
- def self.from_hash(hash)
114
- self.new(hash[:alpha])
115
- end
116
-
117
- # @param [Float] alpha The slope when the output value is negative.
118
- def initialize(alpha = 1.0)
119
- @alpha = alpha
120
- end
121
-
122
- def forward(x)
123
- @x = x
124
- x1 = Xumo::SFloat.zeros(x.shape)
125
- x1[x >= 0] = 1
126
- x1 *= x
127
- x2 = Xumo::SFloat.zeros(x.shape)
128
- x2[x < 0] = 1
129
- x2 *= @alpha * NMath.exp(x) - @alpha
130
- x1 + x2
131
- end
132
-
133
- def backward(dy)
134
- dx = Xumo::SFloat.ones(@x.shape)
135
- dx[@x < 0] = 0
136
- dx2 = Xumo::SFloat.zeros(@x.shape)
137
- dx2[@x < 0] = 1
138
- dx2 *= @alpha * NMath.exp(@x)
139
- dy * (dx + dx2)
140
- end
141
-
142
- def to_hash
143
- {class: self.class.name, alpha: @alpha}
144
- end
145
- end
146
-
147
- end
148
- end
1
+ module DNN
2
+ module Activations
3
+
4
+ class Sigmoid < Layers::Layer
5
+ def forward(x)
6
+ @y = 1 / (1 + NMath.exp(-x))
7
+ end
8
+
9
+ def backward(dy)
10
+ dy * (1 - @y) * @y
11
+ end
12
+ end
13
+
14
+
15
+ class Tanh < Layers::Layer
16
+ def forward(x)
17
+ @y = NMath.tanh(x)
18
+ end
19
+
20
+ def backward(dy)
21
+ dy * (1 - @y**2)
22
+ end
23
+ end
24
+
25
+
26
+ class Softsign < Layers::Layer
27
+ def forward(x)
28
+ @x = x
29
+ x / (1 + x.abs)
30
+ end
31
+
32
+ def backward(dy)
33
+ dy * (1 / (1 + @x.abs)**2)
34
+ end
35
+ end
36
+
37
+
38
+ class Softplus < Layers::Layer
39
+ def forward(x)
40
+ @x = x
41
+ NMath.log(1 + NMath.exp(x))
42
+ end
43
+
44
+ def backward(dy)
45
+ dy * (1 / (1 + NMath.exp(-@x)))
46
+ end
47
+ end
48
+
49
+
50
+ class Swish < Layers::Layer
51
+ def forward(x)
52
+ @x = x
53
+ @y = x * (1 / (1 + NMath.exp(-x)))
54
+ end
55
+
56
+ def backward(dy)
57
+ dy * (@y + (1 / (1 + NMath.exp(-@x))) * (1 - @y))
58
+ end
59
+ end
60
+
61
+
62
+ class ReLU < Layers::Layer
63
+ def forward(x)
64
+ @x = x.clone
65
+ x[x < 0] = 0
66
+ x
67
+ end
68
+
69
+ def backward(dy)
70
+ @x[@x > 0] = 1
71
+ @x[@x <= 0] = 0
72
+ dy * @x
73
+ end
74
+ end
75
+
76
+
77
+ class LeakyReLU < Layers::Layer
78
+ # @return [Float] Return the alpha value.
79
+ attr_reader :alpha
80
+
81
+ def self.from_hash(hash)
82
+ self.new(hash[:alpha])
83
+ end
84
+
85
+ # @param [Float] alpha The slope when the output value is negative.
86
+ def initialize(alpha = 0.3)
87
+ @alpha = alpha
88
+ end
89
+
90
+ def forward(x)
91
+ @x = x.clone
92
+ a = Xumo::SFloat.ones(x.shape)
93
+ a[x <= 0] = @alpha
94
+ x * a
95
+ end
96
+
97
+ def backward(dy)
98
+ @x[@x > 0] = 1
99
+ @x[@x <= 0] = @alpha
100
+ dy * @x
101
+ end
102
+
103
+ def to_hash
104
+ {class: self.class.name, alpha: alpha}
105
+ end
106
+ end
107
+
108
+
109
+ class ELU < Layers::Layer
110
+ # @return [Float] Return the alpha value.
111
+ attr_reader :alpha
112
+
113
+ def self.from_hash(hash)
114
+ self.new(hash[:alpha])
115
+ end
116
+
117
+ # @param [Float] alpha The slope when the output value is negative.
118
+ def initialize(alpha = 1.0)
119
+ @alpha = alpha
120
+ end
121
+
122
+ def forward(x)
123
+ @x = x
124
+ x1 = Xumo::SFloat.zeros(x.shape)
125
+ x1[x >= 0] = 1
126
+ x1 *= x
127
+ x2 = Xumo::SFloat.zeros(x.shape)
128
+ x2[x < 0] = 1
129
+ x2 *= @alpha * NMath.exp(x) - @alpha
130
+ x1 + x2
131
+ end
132
+
133
+ def backward(dy)
134
+ dx = Xumo::SFloat.ones(@x.shape)
135
+ dx[@x < 0] = 0
136
+ dx2 = Xumo::SFloat.zeros(@x.shape)
137
+ dx2[@x < 0] = 1
138
+ dx2 *= @alpha * NMath.exp(@x)
139
+ dy * (dx + dx2)
140
+ end
141
+
142
+ def to_hash
143
+ {class: self.class.name, alpha: @alpha}
144
+ end
145
+ end
146
+
147
+ end
148
+ end
@@ -1,464 +1,464 @@
1
- module DNN
2
- module Layers
3
- # This module is used for convolution.
4
- module Conv2D_Utils
5
- private
6
-
7
- # img[bsize, out_h, out_w, ch] to col[bsize * out_h * out_w, fil_h * fil_w * ch]
8
- def im2col(img, out_h, out_w, fil_h, fil_w, strides)
9
- bsize = img.shape[0]
10
- ch = img.shape[3]
11
- col = Xumo::SFloat.zeros(bsize, out_h, out_w, fil_h, fil_w, ch)
12
- (0...fil_h).each do |i|
13
- i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a
14
- (0...fil_w).each do |j|
15
- j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a
16
- col[true, true, true, i, j, true] = img[true, i_range, j_range, true]
17
- end
18
- end
19
- col.reshape(bsize * out_h * out_w, fil_h * fil_w * ch)
20
- end
21
-
22
- # col[bsize * out_h * out_w, fil_h * fil_w * ch] to img[bsize, out_h, out_w, ch]
23
- def col2im(col, img_shape, out_h, out_w, fil_h, fil_w, strides)
24
- bsize, img_h, img_w, ch = img_shape
25
- col = col.reshape(bsize, out_h, out_w, fil_h, fil_w, ch)
26
- img = Xumo::SFloat.zeros(bsize, img_h, img_w, ch)
27
- (0...fil_h).each do |i|
28
- i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a
29
- (0...fil_w).each do |j|
30
- j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a
31
- img[true, i_range, j_range, true] += col[true, true, true, i, j, true]
32
- end
33
- end
34
- img
35
- end
36
-
37
- def zero_padding(img, pad)
38
- bsize, img_h, img_w, ch = img.shape
39
- img2 = Xumo::SFloat.zeros(bsize, img_h + pad[0], img_w + pad[1], ch)
40
- i_begin = pad[0] / 2
41
- i_end = i_begin + img_h
42
- j_begin = pad[1] / 2
43
- j_end = j_begin + img_w
44
- img2[true, i_begin...i_end, j_begin...j_end, true] = img
45
- img2
46
- end
47
-
48
- def zero_padding_bwd(img, pad)
49
- i_begin = pad[0] / 2
50
- i_end = img.shape[1] - (pad[0] / 2.0).round
51
- j_begin = pad[1] / 2
52
- j_end = img.shape[2] - (pad[1] / 2.0).round
53
- img[true, i_begin...i_end, j_begin...j_end, true]
54
- end
55
-
56
- def calc_conv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
57
- out_h = (prev_h + pad_h - fil_h) / strides[0] + 1
58
- out_w = (prev_w + pad_w - fil_w) / strides[1] + 1
59
- [out_h, out_w]
60
- end
61
-
62
- def calc_deconv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
63
- out_h = (prev_h - 1) * strides[0] + fil_h - pad_h
64
- out_w = (prev_w - 1) * strides[1] + fil_w - pad_w
65
- [out_h, out_w]
66
- end
67
-
68
- def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
69
- pad_h = (prev_h.to_f / strides[0]).ceil - out_h
70
- pad_w = (prev_w.to_f / strides[1]).ceil - out_w
71
- [pad_h, pad_w]
72
- end
73
-
74
- def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
75
- pad_h = ((prev_h.to_f / strides[0]).ceil - out_h) * strides[0]
76
- pad_w = ((prev_w.to_f / strides[1]).ceil - out_w) * strides[1]
77
- [pad_h, pad_w]
78
- end
79
- end
80
-
81
-
82
- class Conv2D < Connection
83
- include Conv2D_Utils
84
-
85
- # @return [Integer] number of filters.
86
- attr_reader :num_filters
87
- # @return [Array] Return filter size. filter size is of the form [height, width].
88
- attr_reader :filter_size
89
- # @return [Array] Return stride length. stride length is of the form [height, width].
90
- attr_reader :strides
91
- # @return [Array | Bool] Return padding size or whether to padding.
92
- attr_reader :padding
93
-
94
- def self.from_hash(hash)
95
- Conv2D.new(hash[:num_filters], hash[:filter_size],
96
- weight_initializer: Utils.from_hash(hash[:weight_initializer]),
97
- bias_initializer: Utils.from_hash(hash[:bias_initializer]),
98
- weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
99
- bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
100
- use_bias: hash[:use_bias],
101
- strides: hash[:strides],
102
- padding: hash[:padding])
103
- end
104
-
105
- # @param [Integer] num_filters Number of filters.
106
- # @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
107
- # @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
108
- # @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
109
- def initialize(num_filters, filter_size,
110
- weight_initializer: Initializers::RandomNormal.new,
111
- bias_initializer: Initializers::Zeros.new,
112
- weight_regularizer: nil,
113
- bias_regularizer: nil,
114
- use_bias: true,
115
- strides: 1,
116
- padding: false)
117
- super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
118
- weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
119
- @num_filters = num_filters
120
- @filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
121
- @strides = strides.is_a?(Integer) ? [strides, strides] : strides
122
- @padding = padding.is_a?(Integer) ? [padding, padding] : padding
123
- end
124
-
125
- def build(input_shape)
126
- super
127
- prev_h, prev_w, num_prev_filter = *input_shape
128
- @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
129
- @bias.data = Xumo::SFloat.new(@num_filters) if @bias
130
- init_weight_and_bias
131
- if @padding == true
132
- out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
133
- @pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
134
- elsif @padding.is_a?(Array)
135
- @pad_size = @padding
136
- else
137
- @pad_size = [0, 0]
138
- end
139
- @out_size = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
140
- end
141
-
142
- def forward(x)
143
- x = zero_padding(x, @pad_size) if @padding
144
- @x_shape = x.shape
145
- @col = im2col(x, *@out_size, *@filter_size, @strides)
146
- y = @col.dot(@weight.data)
147
- y += @bias.data if @bias
148
- y.reshape(x.shape[0], *@out_size, y.shape[3])
149
- end
150
-
151
- def backward(dy)
152
- dy = dy.reshape(dy.shape[0..2].reduce(:*), dy.shape[3])
153
- if @trainable
154
- @weight.grad += @col.transpose.dot(dy)
155
- @bias.grad += dy.sum(0) if @bias
156
- end
157
- dcol = dy.dot(@weight.data.transpose)
158
- dx = col2im(dcol, @x_shape, *@out_size, *@filter_size, @strides)
159
- @padding ? zero_padding_bwd(dx, @pad_size) : dx
160
- end
161
-
162
- def output_shape
163
- [*@out_size, @num_filters]
164
- end
165
-
166
- # @return [Numo::SFloat] Convert weight to filter and return.
167
- def filters
168
- num_prev_filter = @input_shape[2]
169
- @weight.data.reshape(*@filter_size, num_prev_filter, @num_filters)
170
- end
171
-
172
- # @param [Numo::SFloat] filters Convert weight to filters and set.
173
- def filters=(filters)
174
- num_prev_filter = @input_shape[2]
175
- @weight.data = filters.reshape(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
176
- end
177
-
178
- def to_hash
179
- super({num_filters: @num_filters,
180
- filter_size: @filter_size,
181
- strides: @strides,
182
- padding: @padding})
183
- end
184
- end
185
-
186
-
187
- class Conv2D_Transpose < Connection
188
- include Conv2D_Utils
189
-
190
- # @return [Integer] number of filters.
191
- attr_reader :num_filters
192
- # @return [Array] Return filter size. filter size is of the form [height, width].
193
- attr_reader :filter_size
194
- # @return [Array] Return stride length. stride length is of the form [height, width].
195
- attr_reader :strides
196
- # @return [Array] Return padding size.
197
- attr_reader :padding
198
-
199
- def self.from_hash(hash)
200
- Conv2D_Transpose.new(hash[:num_filters], hash[:filter_size],
201
- weight_initializer: Utils.from_hash(hash[:weight_initializer]),
202
- bias_initializer: Utils.from_hash(hash[:bias_initializer]),
203
- weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
204
- bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
205
- use_bias: hash[:use_bias],
206
- strides: hash[:strides],
207
- padding: hash[:padding])
208
- end
209
-
210
- # @param [Integer] num_filters Number of filters.
211
- # @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
212
- # @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
213
- # @param [Array] padding Padding size. Padding size is of the form [height, width].
214
- def initialize(num_filters, filter_size,
215
- weight_initializer: Initializers::RandomNormal.new,
216
- bias_initializer: Initializers::Zeros.new,
217
- weight_regularizer: nil,
218
- bias_regularizer: nil,
219
- use_bias: true,
220
- strides: 1,
221
- padding: false)
222
- super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
223
- weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
224
- @num_filters = num_filters
225
- @filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
226
- @strides = strides.is_a?(Integer) ? [strides, strides] : strides
227
- @padding = padding.is_a?(Integer) ? [padding, padding] : padding
228
- end
229
-
230
- def build(input_shape)
231
- super
232
- prev_h, prev_w, num_prev_filter = *input_shape
233
- @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
234
- @weight_initializer.init_param(self, @weight)
235
- @weight_regularizer.param = @weight if @weight_regularizer
236
- if @bias
237
- @bias.data = Xumo::SFloat.new(@num_filters)
238
- @bias_initializer.init_param(self, @bias)
239
- @bias_regularizer.param = @bias if @bias_regularizer
240
- end
241
- if @padding == true
242
- out_h, out_w = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
243
- @pad_size = calc_padding_size(out_h, out_w, prev_h, prev_w, @strides)
244
- elsif @padding.is_a?(Array)
245
- @pad_size = @padding
246
- else
247
- @pad_size = [0, 0]
248
- end
249
- @out_size = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
250
- end
251
-
252
- def forward(x)
253
- bsize = x.shape[0]
254
- x = x.reshape(x.shape[0..2].reduce(:*), x.shape[3])
255
- @x = x
256
- col = x.dot(@weight.data.transpose)
257
- img_shape = [bsize, @out_size[0] + @pad_size[0], @out_size[1] + @pad_size[1], @num_filters]
258
- y = col2im(col, img_shape, *input_shape[0..1], *@filter_size, @strides)
259
- y += @bias.data if @bias
260
- @padding ? zero_padding_bwd(y, @pad_size) : y
261
- end
262
-
263
- def backward(dy)
264
- dy = zero_padding(dy, @pad_size) if @padding
265
- col = im2col(dy, *input_shape[0..1], *@filter_size, @strides)
266
- if @trainable
267
- @weight.grad += col.transpose.dot(@x)
268
- @bias.grad += col.reshape(col.shape[0] * @filter_size.reduce(:*), @num_filters).sum(0) if @bias
269
- end
270
- dx = col.dot(@weight.data)
271
- dx.reshape(dy.shape[0], *input_shape)
272
- end
273
-
274
- def output_shape
275
- [*@out_size, @num_filters]
276
- end
277
-
278
- # @return [Numo::SFloat] Convert weight to filter and return.
279
- def filters
280
- num_prev_filter = @input_shape[2]
281
- @weight.data.reshape(*@filter_size, @num_filters, num_prev_filter)
282
- end
283
-
284
- # @param [Numo::SFloat] filters Convert weight to filters and set.
285
- def filters=(filters)
286
- num_prev_filter = @input_shape[2]
287
- @weight.data = filters.reshape(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
288
- end
289
-
290
- def to_hash
291
- super({num_filters: @num_filters,
292
- filter_size: @filter_size,
293
- strides: @strides,
294
- padding: @padding})
295
- end
296
- end
297
-
298
-
299
- # Super class of all pooling2D class.
300
- class Pool2D < Layer
301
- include Conv2D_Utils
302
-
303
- # @return [Array] Return pooling size. Pooling size is of the form [height, width].
304
- attr_reader :pool_size
305
- # @return [Array] Return stride length. Stride length is of the form [height, width].
306
- attr_reader :strides
307
- # @return [Array | Bool] Return padding size or whether to padding.
308
- attr_reader :padding
309
-
310
- def self.from_hash(pool2d_class, hash)
311
- pool2d_class.new(hash[:pool_size], strides: hash[:strides], padding: hash[:padding])
312
- end
313
-
314
- # @param [Array | Integer] pool_size Pooling size. Pooling size is of the form [height, width].
315
- # @param [Array | Integer | NilClass] strides stride length. Stride length is of the form [height, width].
316
- # If you set nil, treat pool_size as strides.
317
- # @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
318
- def initialize(pool_size, strides: nil, padding: false)
319
- super()
320
- @pool_size = pool_size.is_a?(Integer) ? [pool_size, pool_size] : pool_size
321
- @strides = if strides
322
- strides.is_a?(Integer) ? [strides, strides] : strides
323
- else
324
- @pool_size.clone
325
- end
326
- @padding = padding.is_a?(Integer) ? [padding, padding] : padding
327
- end
328
-
329
- def build(input_shape)
330
- super
331
- prev_h, prev_w = input_shape[0..1]
332
- @num_channel = input_shape[2]
333
- if @padding == true
334
- out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, 0, 0, @strides)
335
- @pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
336
- elsif @padding.is_a?(Array)
337
- @pad_size = @padding
338
- else
339
- @pad_size = [0, 0]
340
- end
341
- @out_size = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, *@pad_size, @strides)
342
- end
343
-
344
- def output_shape
345
- [*@out_size, @num_channel]
346
- end
347
-
348
- def to_hash
349
- super({pool_size: @pool_size,
350
- strides: @strides,
351
- padding: @padding})
352
- end
353
- end
354
-
355
-
356
- class MaxPool2D < Pool2D
357
- def self.from_hash(hash)
358
- Pool2D.from_hash(self, hash)
359
- end
360
-
361
- def forward(x)
362
- x = zero_padding(x, @pad_size) if @padding
363
- @x_shape = x.shape
364
- col = im2col(x, *@out_size, *@pool_size, @strides)
365
- col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
366
- .reshape(x.shape[0] * @out_size.reduce(:*) * x.shape[3], @pool_size.reduce(:*))
367
- @max_index = col.max_index(1)
368
- col.max(1).reshape(x.shape[0], *@out_size, x.shape[3])
369
- end
370
-
371
- def backward(dy)
372
- dmax = Xumo::SFloat.zeros(dy.size * @pool_size.reduce(:*))
373
- dmax[@max_index] = dy.flatten
374
- dcol = dmax.reshape(dy.shape[0..2].reduce(:*), @pool_size.reduce(:*) * dy.shape[3])
375
- dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
376
- @padding ? zero_padding_bwd(dx, @pad_size) : dx
377
- end
378
- end
379
-
380
-
381
- class AvgPool2D < Pool2D
382
- def self.from_hash(hash)
383
- Pool2D.from_hash(self, hash)
384
- end
385
-
386
- def forward(x)
387
- x = zero_padding(x, @pad_size) if @padding
388
- @x_shape = x.shape
389
- col = im2col(x, *@out_size, *@pool_size, @strides)
390
- col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
391
- .reshape(x.shape[0] * @out_size.reduce(:*) * x.shape[3], @pool_size.reduce(:*))
392
- col.mean(1).reshape(x.shape[0], *@out_size, x.shape[3])
393
- end
394
-
395
- def backward(dy)
396
- row_length = @pool_size.reduce(:*)
397
- dy /= row_length
398
- davg = Xumo::SFloat.zeros(dy.size, row_length)
399
- row_length.times do |i|
400
- davg[true, i] = dy.flatten
401
- end
402
- dcol = davg.reshape(dy.shape[0..2].reduce(:*), dy.shape[3] * @pool_size.reduce(:*))
403
- dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
404
- @padding ? zero_padding_bwd(dx, @pad_size) : dx
405
- end
406
- end
407
-
408
-
409
- class UnPool2D < Layer
410
- include Conv2D_Utils
411
-
412
- # @return [Array] Return unpooling size. unpooling size is of the form [height, width].
413
- attr_reader :unpool_size
414
-
415
- # @param [Array or Integer] unpool_size Unpooling size. unpooling size is of the form [height, width].
416
- def initialize(unpool_size)
417
- super()
418
- @unpool_size = unpool_size.is_a?(Integer) ? [unpool_size, unpool_size] : unpool_size
419
- end
420
-
421
- def self.from_hash(hash)
422
- UnPool2D.new(hash[:unpool_size])
423
- end
424
-
425
- def build(input_shape)
426
- super
427
- prev_h, prev_w = input_shape[0..1]
428
- unpool_h, unpool_w = @unpool_size
429
- out_h = prev_h * unpool_h
430
- out_w = prev_w * unpool_w
431
- @out_size = [out_h, out_w]
432
- @num_channel = input_shape[2]
433
- end
434
-
435
- def forward(x)
436
- @x_shape = x.shape
437
- unpool_h, unpool_w = @unpool_size
438
- x2 = Xumo::SFloat.zeros(x.shape[0], x.shape[1], unpool_h, x.shape[2], unpool_w, @num_channel)
439
- unpool_h.times do |i|
440
- unpool_w.times do |j|
441
- x2[true, true, i, true, j, true] = x
442
- end
443
- end
444
- x2.reshape(x.shape[0], *@out_size, x.shape[3])
445
- end
446
-
447
- def backward(dy)
448
- in_size = input_shape[0..1]
449
- col = im2col(dy, *input_shape[0..1], *@unpool_size, @unpool_size)
450
- col = col.reshape(dy.shape[0] * in_size.reduce(:*), @unpool_size.reduce(:*), dy.shape[3]).transpose(0, 2, 1)
451
- .reshape(dy.shape[0] * in_size.reduce(:*) * dy.shape[3], @unpool_size.reduce(:*))
452
- col.sum(1).reshape(dy.shape[0], *in_size, dy.shape[3])
453
- end
454
-
455
- def output_shape
456
- [*@out_size, @num_channel]
457
- end
458
-
459
- def to_hash
460
- super({unpool_size: @unpool_size})
461
- end
462
- end
463
- end
464
- end
1
+ module DNN
2
+ module Layers
3
+ # This module is used for convolution.
4
+ module Conv2D_Utils
5
+ private
6
+
7
+ # img[bsize, out_h, out_w, ch] to col[bsize * out_h * out_w, fil_h * fil_w * ch]
8
+ def im2col(img, out_h, out_w, fil_h, fil_w, strides)
9
+ bsize = img.shape[0]
10
+ ch = img.shape[3]
11
+ col = Xumo::SFloat.zeros(bsize, out_h, out_w, fil_h, fil_w, ch)
12
+ (0...fil_h).each do |i|
13
+ i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a
14
+ (0...fil_w).each do |j|
15
+ j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a
16
+ col[true, true, true, i, j, true] = img[true, i_range, j_range, true]
17
+ end
18
+ end
19
+ col.reshape(bsize * out_h * out_w, fil_h * fil_w * ch)
20
+ end
21
+
22
+ # col[bsize * out_h * out_w, fil_h * fil_w * ch] to img[bsize, out_h, out_w, ch]
23
+ def col2im(col, img_shape, out_h, out_w, fil_h, fil_w, strides)
24
+ bsize, img_h, img_w, ch = img_shape
25
+ col = col.reshape(bsize, out_h, out_w, fil_h, fil_w, ch)
26
+ img = Xumo::SFloat.zeros(bsize, img_h, img_w, ch)
27
+ (0...fil_h).each do |i|
28
+ i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a
29
+ (0...fil_w).each do |j|
30
+ j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a
31
+ img[true, i_range, j_range, true] += col[true, true, true, i, j, true]
32
+ end
33
+ end
34
+ img
35
+ end
36
+
37
+ def zero_padding(img, pad)
38
+ bsize, img_h, img_w, ch = img.shape
39
+ img2 = Xumo::SFloat.zeros(bsize, img_h + pad[0], img_w + pad[1], ch)
40
+ i_begin = pad[0] / 2
41
+ i_end = i_begin + img_h
42
+ j_begin = pad[1] / 2
43
+ j_end = j_begin + img_w
44
+ img2[true, i_begin...i_end, j_begin...j_end, true] = img
45
+ img2
46
+ end
47
+
48
+ def zero_padding_bwd(img, pad)
49
+ i_begin = pad[0] / 2
50
+ i_end = img.shape[1] - (pad[0] / 2.0).round
51
+ j_begin = pad[1] / 2
52
+ j_end = img.shape[2] - (pad[1] / 2.0).round
53
+ img[true, i_begin...i_end, j_begin...j_end, true]
54
+ end
55
+
56
+ def calc_conv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
57
+ out_h = (prev_h + pad_h - fil_h) / strides[0] + 1
58
+ out_w = (prev_w + pad_w - fil_w) / strides[1] + 1
59
+ [out_h, out_w]
60
+ end
61
+
62
+ def calc_deconv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
63
+ out_h = (prev_h - 1) * strides[0] + fil_h - pad_h
64
+ out_w = (prev_w - 1) * strides[1] + fil_w - pad_w
65
+ [out_h, out_w]
66
+ end
67
+
68
+ def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
69
+ pad_h = (prev_h.to_f / strides[0]).ceil - out_h
70
+ pad_w = (prev_w.to_f / strides[1]).ceil - out_w
71
+ [pad_h, pad_w]
72
+ end
73
+
74
+ def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
75
+ pad_h = ((prev_h.to_f / strides[0]).ceil - out_h) * strides[0]
76
+ pad_w = ((prev_w.to_f / strides[1]).ceil - out_w) * strides[1]
77
+ [pad_h, pad_w]
78
+ end
79
+ end
80
+
81
+
82
+ class Conv2D < Connection
83
+ include Conv2D_Utils
84
+
85
+ # @return [Integer] number of filters.
86
+ attr_reader :num_filters
87
+ # @return [Array] Return filter size. filter size is of the form [height, width].
88
+ attr_reader :filter_size
89
+ # @return [Array] Return stride length. stride length is of the form [height, width].
90
+ attr_reader :strides
91
+ # @return [Array | Bool] Return padding size or whether to padding.
92
+ attr_reader :padding
93
+
94
+ def self.from_hash(hash)
95
+ Conv2D.new(hash[:num_filters], hash[:filter_size],
96
+ weight_initializer: Utils.from_hash(hash[:weight_initializer]),
97
+ bias_initializer: Utils.from_hash(hash[:bias_initializer]),
98
+ weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
99
+ bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
100
+ use_bias: hash[:use_bias],
101
+ strides: hash[:strides],
102
+ padding: hash[:padding])
103
+ end
104
+
105
+ # @param [Integer] num_filters Number of filters.
106
+ # @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
107
+ # @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
108
+ # @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
109
+ def initialize(num_filters, filter_size,
110
+ weight_initializer: Initializers::RandomNormal.new,
111
+ bias_initializer: Initializers::Zeros.new,
112
+ weight_regularizer: nil,
113
+ bias_regularizer: nil,
114
+ use_bias: true,
115
+ strides: 1,
116
+ padding: false)
117
+ super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
118
+ weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
119
+ @num_filters = num_filters
120
+ @filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
121
+ @strides = strides.is_a?(Integer) ? [strides, strides] : strides
122
+ @padding = padding.is_a?(Integer) ? [padding, padding] : padding
123
+ end
124
+
125
+ def build(input_shape)
126
+ super
127
+ prev_h, prev_w, num_prev_filter = *input_shape
128
+ @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
129
+ @bias.data = Xumo::SFloat.new(@num_filters) if @bias
130
+ init_weight_and_bias
131
+ if @padding == true
132
+ out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
133
+ @pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
134
+ elsif @padding.is_a?(Array)
135
+ @pad_size = @padding
136
+ else
137
+ @pad_size = [0, 0]
138
+ end
139
+ @out_size = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
140
+ end
141
+
142
+ def forward(x)
143
+ x = zero_padding(x, @pad_size) if @padding
144
+ @x_shape = x.shape
145
+ @col = im2col(x, *@out_size, *@filter_size, @strides)
146
+ y = @col.dot(@weight.data)
147
+ y += @bias.data if @bias
148
+ y.reshape(x.shape[0], *@out_size, y.shape[3])
149
+ end
150
+
151
+ def backward(dy)
152
+ dy = dy.reshape(dy.shape[0..2].reduce(:*), dy.shape[3])
153
+ if @trainable
154
+ @weight.grad += @col.transpose.dot(dy)
155
+ @bias.grad += dy.sum(0) if @bias
156
+ end
157
+ dcol = dy.dot(@weight.data.transpose)
158
+ dx = col2im(dcol, @x_shape, *@out_size, *@filter_size, @strides)
159
+ @padding ? zero_padding_bwd(dx, @pad_size) : dx
160
+ end
161
+
162
+ def output_shape
163
+ [*@out_size, @num_filters]
164
+ end
165
+
166
+ # @return [Numo::SFloat] Convert weight to filter and return.
167
+ def filters
168
+ num_prev_filter = @input_shape[2]
169
+ @weight.data.reshape(*@filter_size, num_prev_filter, @num_filters)
170
+ end
171
+
172
+ # @param [Numo::SFloat] filters Convert weight to filters and set.
173
+ def filters=(filters)
174
+ num_prev_filter = @input_shape[2]
175
+ @weight.data = filters.reshape(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
176
+ end
177
+
178
+ def to_hash
179
+ super({num_filters: @num_filters,
180
+ filter_size: @filter_size,
181
+ strides: @strides,
182
+ padding: @padding})
183
+ end
184
+ end
185
+
186
+
187
+ class Conv2D_Transpose < Connection
188
+ include Conv2D_Utils
189
+
190
+ # @return [Integer] number of filters.
191
+ attr_reader :num_filters
192
+ # @return [Array] Return filter size. filter size is of the form [height, width].
193
+ attr_reader :filter_size
194
+ # @return [Array] Return stride length. stride length is of the form [height, width].
195
+ attr_reader :strides
196
+ # @return [Array] Return padding size.
197
+ attr_reader :padding
198
+
199
+ def self.from_hash(hash)
200
+ Conv2D_Transpose.new(hash[:num_filters], hash[:filter_size],
201
+ weight_initializer: Utils.from_hash(hash[:weight_initializer]),
202
+ bias_initializer: Utils.from_hash(hash[:bias_initializer]),
203
+ weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
204
+ bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
205
+ use_bias: hash[:use_bias],
206
+ strides: hash[:strides],
207
+ padding: hash[:padding])
208
+ end
209
+
210
+ # @param [Integer] num_filters Number of filters.
211
+ # @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
212
+ # @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
213
+ # @param [Array] padding Padding size. Padding size is of the form [height, width].
214
+ def initialize(num_filters, filter_size,
215
+ weight_initializer: Initializers::RandomNormal.new,
216
+ bias_initializer: Initializers::Zeros.new,
217
+ weight_regularizer: nil,
218
+ bias_regularizer: nil,
219
+ use_bias: true,
220
+ strides: 1,
221
+ padding: false)
222
+ super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
223
+ weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
224
+ @num_filters = num_filters
225
+ @filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
226
+ @strides = strides.is_a?(Integer) ? [strides, strides] : strides
227
+ @padding = padding.is_a?(Integer) ? [padding, padding] : padding
228
+ end
229
+
230
+ def build(input_shape)
231
+ super
232
+ prev_h, prev_w, num_prev_filter = *input_shape
233
+ @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
234
+ @weight_initializer.init_param(self, @weight)
235
+ @weight_regularizer.param = @weight if @weight_regularizer
236
+ if @bias
237
+ @bias.data = Xumo::SFloat.new(@num_filters)
238
+ @bias_initializer.init_param(self, @bias)
239
+ @bias_regularizer.param = @bias if @bias_regularizer
240
+ end
241
+ if @padding == true
242
+ out_h, out_w = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
243
+ @pad_size = calc_padding_size(out_h, out_w, prev_h, prev_w, @strides)
244
+ elsif @padding.is_a?(Array)
245
+ @pad_size = @padding
246
+ else
247
+ @pad_size = [0, 0]
248
+ end
249
+ @out_size = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
250
+ end
251
+
252
+ def forward(x)
253
+ bsize = x.shape[0]
254
+ x = x.reshape(x.shape[0..2].reduce(:*), x.shape[3])
255
+ @x = x
256
+ col = x.dot(@weight.data.transpose)
257
+ img_shape = [bsize, @out_size[0] + @pad_size[0], @out_size[1] + @pad_size[1], @num_filters]
258
+ y = col2im(col, img_shape, *input_shape[0..1], *@filter_size, @strides)
259
+ y += @bias.data if @bias
260
+ @padding ? zero_padding_bwd(y, @pad_size) : y
261
+ end
262
+
263
+ def backward(dy)
264
+ dy = zero_padding(dy, @pad_size) if @padding
265
+ col = im2col(dy, *input_shape[0..1], *@filter_size, @strides)
266
+ if @trainable
267
+ @weight.grad += col.transpose.dot(@x)
268
+ @bias.grad += col.reshape(col.shape[0] * @filter_size.reduce(:*), @num_filters).sum(0) if @bias
269
+ end
270
+ dx = col.dot(@weight.data)
271
+ dx.reshape(dy.shape[0], *input_shape)
272
+ end
273
+
274
+ def output_shape
275
+ [*@out_size, @num_filters]
276
+ end
277
+
278
+ # @return [Numo::SFloat] Convert weight to filter and return.
279
+ def filters
280
+ num_prev_filter = @input_shape[2]
281
+ @weight.data.reshape(*@filter_size, @num_filters, num_prev_filter)
282
+ end
283
+
284
+ # @param [Numo::SFloat] filters Convert weight to filters and set.
285
+ def filters=(filters)
286
+ num_prev_filter = @input_shape[2]
287
+ @weight.data = filters.reshape(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
288
+ end
289
+
290
+ def to_hash
291
+ super({num_filters: @num_filters,
292
+ filter_size: @filter_size,
293
+ strides: @strides,
294
+ padding: @padding})
295
+ end
296
+ end
297
+
298
+
299
+ # Super class of all pooling2D class.
300
+ class Pool2D < Layer
301
+ include Conv2D_Utils
302
+
303
+ # @return [Array] Return pooling size. Pooling size is of the form [height, width].
304
+ attr_reader :pool_size
305
+ # @return [Array] Return stride length. Stride length is of the form [height, width].
306
+ attr_reader :strides
307
+ # @return [Array | Bool] Return padding size or whether to padding.
308
+ attr_reader :padding
309
+
310
+ def self.from_hash(pool2d_class, hash)
311
+ pool2d_class.new(hash[:pool_size], strides: hash[:strides], padding: hash[:padding])
312
+ end
313
+
314
+ # @param [Array | Integer] pool_size Pooling size. Pooling size is of the form [height, width].
315
+ # @param [Array | Integer | NilClass] strides stride length. Stride length is of the form [height, width].
316
+ # If you set nil, treat pool_size as strides.
317
+ # @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
318
+ def initialize(pool_size, strides: nil, padding: false)
319
+ super()
320
+ @pool_size = pool_size.is_a?(Integer) ? [pool_size, pool_size] : pool_size
321
+ @strides = if strides
322
+ strides.is_a?(Integer) ? [strides, strides] : strides
323
+ else
324
+ @pool_size.clone
325
+ end
326
+ @padding = padding.is_a?(Integer) ? [padding, padding] : padding
327
+ end
328
+
329
+ def build(input_shape)
330
+ super
331
+ prev_h, prev_w = input_shape[0..1]
332
+ @num_channel = input_shape[2]
333
+ if @padding == true
334
+ out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, 0, 0, @strides)
335
+ @pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
336
+ elsif @padding.is_a?(Array)
337
+ @pad_size = @padding
338
+ else
339
+ @pad_size = [0, 0]
340
+ end
341
+ @out_size = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, *@pad_size, @strides)
342
+ end
343
+
344
+ def output_shape
345
+ [*@out_size, @num_channel]
346
+ end
347
+
348
+ def to_hash
349
+ super({pool_size: @pool_size,
350
+ strides: @strides,
351
+ padding: @padding})
352
+ end
353
+ end
354
+
355
+
356
+ class MaxPool2D < Pool2D
357
+ def self.from_hash(hash)
358
+ Pool2D.from_hash(self, hash)
359
+ end
360
+
361
+ def forward(x)
362
+ x = zero_padding(x, @pad_size) if @padding
363
+ @x_shape = x.shape
364
+ col = im2col(x, *@out_size, *@pool_size, @strides)
365
+ col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
366
+ .reshape(x.shape[0] * @out_size.reduce(:*) * x.shape[3], @pool_size.reduce(:*))
367
+ @max_index = col.max_index(1)
368
+ col.max(1).reshape(x.shape[0], *@out_size, x.shape[3])
369
+ end
370
+
371
+ def backward(dy)
372
+ dmax = Xumo::SFloat.zeros(dy.size * @pool_size.reduce(:*))
373
+ dmax[@max_index] = dy.flatten
374
+ dcol = dmax.reshape(dy.shape[0..2].reduce(:*), @pool_size.reduce(:*) * dy.shape[3])
375
+ dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
376
+ @padding ? zero_padding_bwd(dx, @pad_size) : dx
377
+ end
378
+ end
379
+
380
+
381
+ class AvgPool2D < Pool2D
382
+ def self.from_hash(hash)
383
+ Pool2D.from_hash(self, hash)
384
+ end
385
+
386
+ def forward(x)
387
+ x = zero_padding(x, @pad_size) if @padding
388
+ @x_shape = x.shape
389
+ col = im2col(x, *@out_size, *@pool_size, @strides)
390
+ col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
391
+ .reshape(x.shape[0] * @out_size.reduce(:*) * x.shape[3], @pool_size.reduce(:*))
392
+ col.mean(1).reshape(x.shape[0], *@out_size, x.shape[3])
393
+ end
394
+
395
+ def backward(dy)
396
+ row_length = @pool_size.reduce(:*)
397
+ dy /= row_length
398
+ davg = Xumo::SFloat.zeros(dy.size, row_length)
399
+ row_length.times do |i|
400
+ davg[true, i] = dy.flatten
401
+ end
402
+ dcol = davg.reshape(dy.shape[0..2].reduce(:*), dy.shape[3] * @pool_size.reduce(:*))
403
+ dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
404
+ @padding ? zero_padding_bwd(dx, @pad_size) : dx
405
+ end
406
+ end
407
+
408
+
409
+ class UnPool2D < Layer
410
+ include Conv2D_Utils
411
+
412
+ # @return [Array] Return unpooling size. unpooling size is of the form [height, width].
413
+ attr_reader :unpool_size
414
+
415
+ # @param [Array or Integer] unpool_size Unpooling size. unpooling size is of the form [height, width].
416
+ def initialize(unpool_size)
417
+ super()
418
+ @unpool_size = unpool_size.is_a?(Integer) ? [unpool_size, unpool_size] : unpool_size
419
+ end
420
+
421
+ def self.from_hash(hash)
422
+ UnPool2D.new(hash[:unpool_size])
423
+ end
424
+
425
+ def build(input_shape)
426
+ super
427
+ prev_h, prev_w = input_shape[0..1]
428
+ unpool_h, unpool_w = @unpool_size
429
+ out_h = prev_h * unpool_h
430
+ out_w = prev_w * unpool_w
431
+ @out_size = [out_h, out_w]
432
+ @num_channel = input_shape[2]
433
+ end
434
+
435
+ def forward(x)
436
+ @x_shape = x.shape
437
+ unpool_h, unpool_w = @unpool_size
438
+ x2 = Xumo::SFloat.zeros(x.shape[0], x.shape[1], unpool_h, x.shape[2], unpool_w, @num_channel)
439
+ unpool_h.times do |i|
440
+ unpool_w.times do |j|
441
+ x2[true, true, i, true, j, true] = x
442
+ end
443
+ end
444
+ x2.reshape(x.shape[0], *@out_size, x.shape[3])
445
+ end
446
+
447
+ def backward(dy)
448
+ in_size = input_shape[0..1]
449
+ col = im2col(dy, *input_shape[0..1], *@unpool_size, @unpool_size)
450
+ col = col.reshape(dy.shape[0] * in_size.reduce(:*), @unpool_size.reduce(:*), dy.shape[3]).transpose(0, 2, 1)
451
+ .reshape(dy.shape[0] * in_size.reduce(:*) * dy.shape[3], @unpool_size.reduce(:*))
452
+ col.sum(1).reshape(dy.shape[0], *in_size, dy.shape[3])
453
+ end
454
+
455
+ def output_shape
456
+ [*@out_size, @num_channel]
457
+ end
458
+
459
+ def to_hash
460
+ super({unpool_size: @unpool_size})
461
+ end
462
+ end
463
+ end
464
+ end