ruby-dnn 0.10.1 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,148 +1,148 @@
1
- module DNN
2
- module Activations
3
-
4
- class Sigmoid < Layers::Layer
5
- def forward(x)
6
- @y = 1 / (1 + NMath.exp(-x))
7
- end
8
-
9
- def backward(dy)
10
- dy * (1 - @y) * @y
11
- end
12
- end
13
-
14
-
15
- class Tanh < Layers::Layer
16
- def forward(x)
17
- @y = NMath.tanh(x)
18
- end
19
-
20
- def backward(dy)
21
- dy * (1 - @y**2)
22
- end
23
- end
24
-
25
-
26
- class Softsign < Layers::Layer
27
- def forward(x)
28
- @x = x
29
- x / (1 + x.abs)
30
- end
31
-
32
- def backward(dy)
33
- dy * (1 / (1 + @x.abs)**2)
34
- end
35
- end
36
-
37
-
38
- class Softplus < Layers::Layer
39
- def forward(x)
40
- @x = x
41
- NMath.log(1 + NMath.exp(x))
42
- end
43
-
44
- def backward(dy)
45
- dy * (1 / (1 + NMath.exp(-@x)))
46
- end
47
- end
48
-
49
-
50
- class Swish < Layers::Layer
51
- def forward(x)
52
- @x = x
53
- @y = x * (1 / (1 + NMath.exp(-x)))
54
- end
55
-
56
- def backward(dy)
57
- dy * (@y + (1 / (1 + NMath.exp(-@x))) * (1 - @y))
58
- end
59
- end
60
-
61
-
62
- class ReLU < Layers::Layer
63
- def forward(x)
64
- @x = x.clone
65
- x[x < 0] = 0
66
- x
67
- end
68
-
69
- def backward(dy)
70
- @x[@x > 0] = 1
71
- @x[@x <= 0] = 0
72
- dy * @x
73
- end
74
- end
75
-
76
-
77
- class LeakyReLU < Layers::Layer
78
- # @return [Float] Return the alpha value.
79
- attr_reader :alpha
80
-
81
- def self.from_hash(hash)
82
- self.new(hash[:alpha])
83
- end
84
-
85
- # @param [Float] alpha The slope when the output value is negative.
86
- def initialize(alpha = 0.3)
87
- @alpha = alpha
88
- end
89
-
90
- def forward(x)
91
- @x = x.clone
92
- a = Xumo::SFloat.ones(x.shape)
93
- a[x <= 0] = @alpha
94
- x * a
95
- end
96
-
97
- def backward(dy)
98
- @x[@x > 0] = 1
99
- @x[@x <= 0] = @alpha
100
- dy * @x
101
- end
102
-
103
- def to_hash
104
- {class: self.class.name, alpha: alpha}
105
- end
106
- end
107
-
108
-
109
- class ELU < Layers::Layer
110
- # @return [Float] Return the alpha value.
111
- attr_reader :alpha
112
-
113
- def self.from_hash(hash)
114
- self.new(hash[:alpha])
115
- end
116
-
117
- # @param [Float] alpha The slope when the output value is negative.
118
- def initialize(alpha = 1.0)
119
- @alpha = alpha
120
- end
121
-
122
- def forward(x)
123
- @x = x
124
- x1 = Xumo::SFloat.zeros(x.shape)
125
- x1[x >= 0] = 1
126
- x1 *= x
127
- x2 = Xumo::SFloat.zeros(x.shape)
128
- x2[x < 0] = 1
129
- x2 *= @alpha * NMath.exp(x) - @alpha
130
- x1 + x2
131
- end
132
-
133
- def backward(dy)
134
- dx = Xumo::SFloat.ones(@x.shape)
135
- dx[@x < 0] = 0
136
- dx2 = Xumo::SFloat.zeros(@x.shape)
137
- dx2[@x < 0] = 1
138
- dx2 *= @alpha * NMath.exp(@x)
139
- dy * (dx + dx2)
140
- end
141
-
142
- def to_hash
143
- {class: self.class.name, alpha: @alpha}
144
- end
145
- end
146
-
147
- end
148
- end
1
+ module DNN
2
+ module Activations
3
+
4
+ class Sigmoid < Layers::Layer
5
+ def forward(x)
6
+ @y = 1 / (1 + NMath.exp(-x))
7
+ end
8
+
9
+ def backward(dy)
10
+ dy * (1 - @y) * @y
11
+ end
12
+ end
13
+
14
+
15
+ class Tanh < Layers::Layer
16
+ def forward(x)
17
+ @y = NMath.tanh(x)
18
+ end
19
+
20
+ def backward(dy)
21
+ dy * (1 - @y**2)
22
+ end
23
+ end
24
+
25
+
26
+ class Softsign < Layers::Layer
27
+ def forward(x)
28
+ @x = x
29
+ x / (1 + x.abs)
30
+ end
31
+
32
+ def backward(dy)
33
+ dy * (1 / (1 + @x.abs)**2)
34
+ end
35
+ end
36
+
37
+
38
+ class Softplus < Layers::Layer
39
+ def forward(x)
40
+ @x = x
41
+ NMath.log(1 + NMath.exp(x))
42
+ end
43
+
44
+ def backward(dy)
45
+ dy * (1 / (1 + NMath.exp(-@x)))
46
+ end
47
+ end
48
+
49
+
50
+ class Swish < Layers::Layer
51
+ def forward(x)
52
+ @x = x
53
+ @y = x * (1 / (1 + NMath.exp(-x)))
54
+ end
55
+
56
+ def backward(dy)
57
+ dy * (@y + (1 / (1 + NMath.exp(-@x))) * (1 - @y))
58
+ end
59
+ end
60
+
61
+
62
+ class ReLU < Layers::Layer
63
+ def forward(x)
64
+ @x = x.clone
65
+ x[x < 0] = 0
66
+ x
67
+ end
68
+
69
+ def backward(dy)
70
+ @x[@x > 0] = 1
71
+ @x[@x <= 0] = 0
72
+ dy * @x
73
+ end
74
+ end
75
+
76
+
77
+ class LeakyReLU < Layers::Layer
78
+ # @return [Float] Return the alpha value.
79
+ attr_reader :alpha
80
+
81
+ def self.from_hash(hash)
82
+ self.new(hash[:alpha])
83
+ end
84
+
85
+ # @param [Float] alpha The slope when the output value is negative.
86
+ def initialize(alpha = 0.3)
87
+ @alpha = alpha
88
+ end
89
+
90
+ def forward(x)
91
+ @x = x.clone
92
+ a = Xumo::SFloat.ones(x.shape)
93
+ a[x <= 0] = @alpha
94
+ x * a
95
+ end
96
+
97
+ def backward(dy)
98
+ @x[@x > 0] = 1
99
+ @x[@x <= 0] = @alpha
100
+ dy * @x
101
+ end
102
+
103
+ def to_hash
104
+ {class: self.class.name, alpha: alpha}
105
+ end
106
+ end
107
+
108
+
109
+ class ELU < Layers::Layer
110
+ # @return [Float] Return the alpha value.
111
+ attr_reader :alpha
112
+
113
+ def self.from_hash(hash)
114
+ self.new(hash[:alpha])
115
+ end
116
+
117
+ # @param [Float] alpha The slope when the output value is negative.
118
+ def initialize(alpha = 1.0)
119
+ @alpha = alpha
120
+ end
121
+
122
+ def forward(x)
123
+ @x = x
124
+ x1 = Xumo::SFloat.zeros(x.shape)
125
+ x1[x >= 0] = 1
126
+ x1 *= x
127
+ x2 = Xumo::SFloat.zeros(x.shape)
128
+ x2[x < 0] = 1
129
+ x2 *= @alpha * NMath.exp(x) - @alpha
130
+ x1 + x2
131
+ end
132
+
133
+ def backward(dy)
134
+ dx = Xumo::SFloat.ones(@x.shape)
135
+ dx[@x < 0] = 0
136
+ dx2 = Xumo::SFloat.zeros(@x.shape)
137
+ dx2[@x < 0] = 1
138
+ dx2 *= @alpha * NMath.exp(@x)
139
+ dy * (dx + dx2)
140
+ end
141
+
142
+ def to_hash
143
+ {class: self.class.name, alpha: @alpha}
144
+ end
145
+ end
146
+
147
+ end
148
+ end
@@ -1,464 +1,464 @@
1
- module DNN
2
- module Layers
3
- # This module is used for convolution.
4
- module Conv2D_Utils
5
- private
6
-
7
- # img[bsize, out_h, out_w, ch] to col[bsize * out_h * out_w, fil_h * fil_w * ch]
8
- def im2col(img, out_h, out_w, fil_h, fil_w, strides)
9
- bsize = img.shape[0]
10
- ch = img.shape[3]
11
- col = Xumo::SFloat.zeros(bsize, out_h, out_w, fil_h, fil_w, ch)
12
- (0...fil_h).each do |i|
13
- i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a
14
- (0...fil_w).each do |j|
15
- j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a
16
- col[true, true, true, i, j, true] = img[true, i_range, j_range, true]
17
- end
18
- end
19
- col.reshape(bsize * out_h * out_w, fil_h * fil_w * ch)
20
- end
21
-
22
- # col[bsize * out_h * out_w, fil_h * fil_w * ch] to img[bsize, out_h, out_w, ch]
23
- def col2im(col, img_shape, out_h, out_w, fil_h, fil_w, strides)
24
- bsize, img_h, img_w, ch = img_shape
25
- col = col.reshape(bsize, out_h, out_w, fil_h, fil_w, ch)
26
- img = Xumo::SFloat.zeros(bsize, img_h, img_w, ch)
27
- (0...fil_h).each do |i|
28
- i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a
29
- (0...fil_w).each do |j|
30
- j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a
31
- img[true, i_range, j_range, true] += col[true, true, true, i, j, true]
32
- end
33
- end
34
- img
35
- end
36
-
37
- def zero_padding(img, pad)
38
- bsize, img_h, img_w, ch = img.shape
39
- img2 = Xumo::SFloat.zeros(bsize, img_h + pad[0], img_w + pad[1], ch)
40
- i_begin = pad[0] / 2
41
- i_end = i_begin + img_h
42
- j_begin = pad[1] / 2
43
- j_end = j_begin + img_w
44
- img2[true, i_begin...i_end, j_begin...j_end, true] = img
45
- img2
46
- end
47
-
48
- def zero_padding_bwd(img, pad)
49
- i_begin = pad[0] / 2
50
- i_end = img.shape[1] - (pad[0] / 2.0).round
51
- j_begin = pad[1] / 2
52
- j_end = img.shape[2] - (pad[1] / 2.0).round
53
- img[true, i_begin...i_end, j_begin...j_end, true]
54
- end
55
-
56
- def calc_conv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
57
- out_h = (prev_h + pad_h - fil_h) / strides[0] + 1
58
- out_w = (prev_w + pad_w - fil_w) / strides[1] + 1
59
- [out_h, out_w]
60
- end
61
-
62
- def calc_deconv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
63
- out_h = (prev_h - 1) * strides[0] + fil_h - pad_h
64
- out_w = (prev_w - 1) * strides[1] + fil_w - pad_w
65
- [out_h, out_w]
66
- end
67
-
68
- def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
69
- pad_h = (prev_h.to_f / strides[0]).ceil - out_h
70
- pad_w = (prev_w.to_f / strides[1]).ceil - out_w
71
- [pad_h, pad_w]
72
- end
73
-
74
- def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
75
- pad_h = ((prev_h.to_f / strides[0]).ceil - out_h) * strides[0]
76
- pad_w = ((prev_w.to_f / strides[1]).ceil - out_w) * strides[1]
77
- [pad_h, pad_w]
78
- end
79
- end
80
-
81
-
82
- class Conv2D < Connection
83
- include Conv2D_Utils
84
-
85
- # @return [Integer] number of filters.
86
- attr_reader :num_filters
87
- # @return [Array] Return filter size. filter size is of the form [height, width].
88
- attr_reader :filter_size
89
- # @return [Array] Return stride length. stride length is of the form [height, width].
90
- attr_reader :strides
91
- # @return [Array | Bool] Return padding size or whether to padding.
92
- attr_reader :padding
93
-
94
- def self.from_hash(hash)
95
- Conv2D.new(hash[:num_filters], hash[:filter_size],
96
- weight_initializer: Utils.from_hash(hash[:weight_initializer]),
97
- bias_initializer: Utils.from_hash(hash[:bias_initializer]),
98
- weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
99
- bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
100
- use_bias: hash[:use_bias],
101
- strides: hash[:strides],
102
- padding: hash[:padding])
103
- end
104
-
105
- # @param [Integer] num_filters Number of filters.
106
- # @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
107
- # @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
108
- # @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
109
- def initialize(num_filters, filter_size,
110
- weight_initializer: Initializers::RandomNormal.new,
111
- bias_initializer: Initializers::Zeros.new,
112
- weight_regularizer: nil,
113
- bias_regularizer: nil,
114
- use_bias: true,
115
- strides: 1,
116
- padding: false)
117
- super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
118
- weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
119
- @num_filters = num_filters
120
- @filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
121
- @strides = strides.is_a?(Integer) ? [strides, strides] : strides
122
- @padding = padding.is_a?(Integer) ? [padding, padding] : padding
123
- end
124
-
125
- def build(input_shape)
126
- super
127
- prev_h, prev_w, num_prev_filter = *input_shape
128
- @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
129
- @bias.data = Xumo::SFloat.new(@num_filters) if @bias
130
- init_weight_and_bias
131
- if @padding == true
132
- out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
133
- @pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
134
- elsif @padding.is_a?(Array)
135
- @pad_size = @padding
136
- else
137
- @pad_size = [0, 0]
138
- end
139
- @out_size = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
140
- end
141
-
142
- def forward(x)
143
- x = zero_padding(x, @pad_size) if @padding
144
- @x_shape = x.shape
145
- @col = im2col(x, *@out_size, *@filter_size, @strides)
146
- y = @col.dot(@weight.data)
147
- y += @bias.data if @bias
148
- y.reshape(x.shape[0], *@out_size, y.shape[3])
149
- end
150
-
151
- def backward(dy)
152
- dy = dy.reshape(dy.shape[0..2].reduce(:*), dy.shape[3])
153
- if @trainable
154
- @weight.grad += @col.transpose.dot(dy)
155
- @bias.grad += dy.sum(0) if @bias
156
- end
157
- dcol = dy.dot(@weight.data.transpose)
158
- dx = col2im(dcol, @x_shape, *@out_size, *@filter_size, @strides)
159
- @padding ? zero_padding_bwd(dx, @pad_size) : dx
160
- end
161
-
162
- def output_shape
163
- [*@out_size, @num_filters]
164
- end
165
-
166
- # @return [Numo::SFloat] Convert weight to filter and return.
167
- def filters
168
- num_prev_filter = @input_shape[2]
169
- @weight.data.reshape(*@filter_size, num_prev_filter, @num_filters)
170
- end
171
-
172
- # @param [Numo::SFloat] filters Convert weight to filters and set.
173
- def filters=(filters)
174
- num_prev_filter = @input_shape[2]
175
- @weight.data = filters.reshape(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
176
- end
177
-
178
- def to_hash
179
- super({num_filters: @num_filters,
180
- filter_size: @filter_size,
181
- strides: @strides,
182
- padding: @padding})
183
- end
184
- end
185
-
186
-
187
- class Conv2D_Transpose < Connection
188
- include Conv2D_Utils
189
-
190
- # @return [Integer] number of filters.
191
- attr_reader :num_filters
192
- # @return [Array] Return filter size. filter size is of the form [height, width].
193
- attr_reader :filter_size
194
- # @return [Array] Return stride length. stride length is of the form [height, width].
195
- attr_reader :strides
196
- # @return [Array] Return padding size.
197
- attr_reader :padding
198
-
199
- def self.from_hash(hash)
200
- Conv2D_Transpose.new(hash[:num_filters], hash[:filter_size],
201
- weight_initializer: Utils.from_hash(hash[:weight_initializer]),
202
- bias_initializer: Utils.from_hash(hash[:bias_initializer]),
203
- weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
204
- bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
205
- use_bias: hash[:use_bias],
206
- strides: hash[:strides],
207
- padding: hash[:padding])
208
- end
209
-
210
- # @param [Integer] num_filters Number of filters.
211
- # @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
212
- # @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
213
- # @param [Array] padding Padding size. Padding size is of the form [height, width].
214
- def initialize(num_filters, filter_size,
215
- weight_initializer: Initializers::RandomNormal.new,
216
- bias_initializer: Initializers::Zeros.new,
217
- weight_regularizer: nil,
218
- bias_regularizer: nil,
219
- use_bias: true,
220
- strides: 1,
221
- padding: false)
222
- super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
223
- weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
224
- @num_filters = num_filters
225
- @filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
226
- @strides = strides.is_a?(Integer) ? [strides, strides] : strides
227
- @padding = padding.is_a?(Integer) ? [padding, padding] : padding
228
- end
229
-
230
- def build(input_shape)
231
- super
232
- prev_h, prev_w, num_prev_filter = *input_shape
233
- @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
234
- @weight_initializer.init_param(self, @weight)
235
- @weight_regularizer.param = @weight if @weight_regularizer
236
- if @bias
237
- @bias.data = Xumo::SFloat.new(@num_filters)
238
- @bias_initializer.init_param(self, @bias)
239
- @bias_regularizer.param = @bias if @bias_regularizer
240
- end
241
- if @padding == true
242
- out_h, out_w = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
243
- @pad_size = calc_padding_size(out_h, out_w, prev_h, prev_w, @strides)
244
- elsif @padding.is_a?(Array)
245
- @pad_size = @padding
246
- else
247
- @pad_size = [0, 0]
248
- end
249
- @out_size = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
250
- end
251
-
252
- def forward(x)
253
- bsize = x.shape[0]
254
- x = x.reshape(x.shape[0..2].reduce(:*), x.shape[3])
255
- @x = x
256
- col = x.dot(@weight.data.transpose)
257
- img_shape = [bsize, @out_size[0] + @pad_size[0], @out_size[1] + @pad_size[1], @num_filters]
258
- y = col2im(col, img_shape, *input_shape[0..1], *@filter_size, @strides)
259
- y += @bias.data if @bias
260
- @padding ? zero_padding_bwd(y, @pad_size) : y
261
- end
262
-
263
- def backward(dy)
264
- dy = zero_padding(dy, @pad_size) if @padding
265
- col = im2col(dy, *input_shape[0..1], *@filter_size, @strides)
266
- if @trainable
267
- @weight.grad += col.transpose.dot(@x)
268
- @bias.grad += col.reshape(col.shape[0] * @filter_size.reduce(:*), @num_filters).sum(0) if @bias
269
- end
270
- dx = col.dot(@weight.data)
271
- dx.reshape(dy.shape[0], *input_shape)
272
- end
273
-
274
- def output_shape
275
- [*@out_size, @num_filters]
276
- end
277
-
278
- # @return [Numo::SFloat] Convert weight to filter and return.
279
- def filters
280
- num_prev_filter = @input_shape[2]
281
- @weight.data.reshape(*@filter_size, @num_filters, num_prev_filter)
282
- end
283
-
284
- # @param [Numo::SFloat] filters Convert weight to filters and set.
285
- def filters=(filters)
286
- num_prev_filter = @input_shape[2]
287
- @weight.data = filters.reshape(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
288
- end
289
-
290
- def to_hash
291
- super({num_filters: @num_filters,
292
- filter_size: @filter_size,
293
- strides: @strides,
294
- padding: @padding})
295
- end
296
- end
297
-
298
-
299
- # Super class of all pooling2D class.
300
- class Pool2D < Layer
301
- include Conv2D_Utils
302
-
303
- # @return [Array] Return pooling size. Pooling size is of the form [height, width].
304
- attr_reader :pool_size
305
- # @return [Array] Return stride length. Stride length is of the form [height, width].
306
- attr_reader :strides
307
- # @return [Array | Bool] Return padding size or whether to padding.
308
- attr_reader :padding
309
-
310
- def self.from_hash(pool2d_class, hash)
311
- pool2d_class.new(hash[:pool_size], strides: hash[:strides], padding: hash[:padding])
312
- end
313
-
314
- # @param [Array | Integer] pool_size Pooling size. Pooling size is of the form [height, width].
315
- # @param [Array | Integer | NilClass] strides stride length. Stride length is of the form [height, width].
316
- # If you set nil, treat pool_size as strides.
317
- # @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
318
- def initialize(pool_size, strides: nil, padding: false)
319
- super()
320
- @pool_size = pool_size.is_a?(Integer) ? [pool_size, pool_size] : pool_size
321
- @strides = if strides
322
- strides.is_a?(Integer) ? [strides, strides] : strides
323
- else
324
- @pool_size.clone
325
- end
326
- @padding = padding.is_a?(Integer) ? [padding, padding] : padding
327
- end
328
-
329
- def build(input_shape)
330
- super
331
- prev_h, prev_w = input_shape[0..1]
332
- @num_channel = input_shape[2]
333
- if @padding == true
334
- out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, 0, 0, @strides)
335
- @pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
336
- elsif @padding.is_a?(Array)
337
- @pad_size = @padding
338
- else
339
- @pad_size = [0, 0]
340
- end
341
- @out_size = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, *@pad_size, @strides)
342
- end
343
-
344
- def output_shape
345
- [*@out_size, @num_channel]
346
- end
347
-
348
- def to_hash
349
- super({pool_size: @pool_size,
350
- strides: @strides,
351
- padding: @padding})
352
- end
353
- end
354
-
355
-
356
- class MaxPool2D < Pool2D
357
- def self.from_hash(hash)
358
- Pool2D.from_hash(self, hash)
359
- end
360
-
361
- def forward(x)
362
- x = zero_padding(x, @pad_size) if @padding
363
- @x_shape = x.shape
364
- col = im2col(x, *@out_size, *@pool_size, @strides)
365
- col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
366
- .reshape(x.shape[0] * @out_size.reduce(:*) * x.shape[3], @pool_size.reduce(:*))
367
- @max_index = col.max_index(1)
368
- col.max(1).reshape(x.shape[0], *@out_size, x.shape[3])
369
- end
370
-
371
- def backward(dy)
372
- dmax = Xumo::SFloat.zeros(dy.size * @pool_size.reduce(:*))
373
- dmax[@max_index] = dy.flatten
374
- dcol = dmax.reshape(dy.shape[0..2].reduce(:*), @pool_size.reduce(:*) * dy.shape[3])
375
- dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
376
- @padding ? zero_padding_bwd(dx, @pad_size) : dx
377
- end
378
- end
379
-
380
-
381
- class AvgPool2D < Pool2D
382
- def self.from_hash(hash)
383
- Pool2D.from_hash(self, hash)
384
- end
385
-
386
- def forward(x)
387
- x = zero_padding(x, @pad_size) if @padding
388
- @x_shape = x.shape
389
- col = im2col(x, *@out_size, *@pool_size, @strides)
390
- col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
391
- .reshape(x.shape[0] * @out_size.reduce(:*) * x.shape[3], @pool_size.reduce(:*))
392
- col.mean(1).reshape(x.shape[0], *@out_size, x.shape[3])
393
- end
394
-
395
- def backward(dy)
396
- row_length = @pool_size.reduce(:*)
397
- dy /= row_length
398
- davg = Xumo::SFloat.zeros(dy.size, row_length)
399
- row_length.times do |i|
400
- davg[true, i] = dy.flatten
401
- end
402
- dcol = davg.reshape(dy.shape[0..2].reduce(:*), dy.shape[3] * @pool_size.reduce(:*))
403
- dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
404
- @padding ? zero_padding_bwd(dx, @pad_size) : dx
405
- end
406
- end
407
-
408
-
409
- class UnPool2D < Layer
410
- include Conv2D_Utils
411
-
412
- # @return [Array] Return unpooling size. unpooling size is of the form [height, width].
413
- attr_reader :unpool_size
414
-
415
- # @param [Array or Integer] unpool_size Unpooling size. unpooling size is of the form [height, width].
416
- def initialize(unpool_size)
417
- super()
418
- @unpool_size = unpool_size.is_a?(Integer) ? [unpool_size, unpool_size] : unpool_size
419
- end
420
-
421
- def self.from_hash(hash)
422
- UnPool2D.new(hash[:unpool_size])
423
- end
424
-
425
- def build(input_shape)
426
- super
427
- prev_h, prev_w = input_shape[0..1]
428
- unpool_h, unpool_w = @unpool_size
429
- out_h = prev_h * unpool_h
430
- out_w = prev_w * unpool_w
431
- @out_size = [out_h, out_w]
432
- @num_channel = input_shape[2]
433
- end
434
-
435
- def forward(x)
436
- @x_shape = x.shape
437
- unpool_h, unpool_w = @unpool_size
438
- x2 = Xumo::SFloat.zeros(x.shape[0], x.shape[1], unpool_h, x.shape[2], unpool_w, @num_channel)
439
- unpool_h.times do |i|
440
- unpool_w.times do |j|
441
- x2[true, true, i, true, j, true] = x
442
- end
443
- end
444
- x2.reshape(x.shape[0], *@out_size, x.shape[3])
445
- end
446
-
447
- def backward(dy)
448
- in_size = input_shape[0..1]
449
- col = im2col(dy, *input_shape[0..1], *@unpool_size, @unpool_size)
450
- col = col.reshape(dy.shape[0] * in_size.reduce(:*), @unpool_size.reduce(:*), dy.shape[3]).transpose(0, 2, 1)
451
- .reshape(dy.shape[0] * in_size.reduce(:*) * dy.shape[3], @unpool_size.reduce(:*))
452
- col.sum(1).reshape(dy.shape[0], *in_size, dy.shape[3])
453
- end
454
-
455
- def output_shape
456
- [*@out_size, @num_channel]
457
- end
458
-
459
- def to_hash
460
- super({unpool_size: @unpool_size})
461
- end
462
- end
463
- end
464
- end
1
+ module DNN
2
+ module Layers
3
+ # This module is used for convolution.
4
+ module Conv2D_Utils
5
+ private
6
+
7
+ # img[bsize, out_h, out_w, ch] to col[bsize * out_h * out_w, fil_h * fil_w * ch]
8
+ def im2col(img, out_h, out_w, fil_h, fil_w, strides)
9
+ bsize = img.shape[0]
10
+ ch = img.shape[3]
11
+ col = Xumo::SFloat.zeros(bsize, out_h, out_w, fil_h, fil_w, ch)
12
+ (0...fil_h).each do |i|
13
+ i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a
14
+ (0...fil_w).each do |j|
15
+ j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a
16
+ col[true, true, true, i, j, true] = img[true, i_range, j_range, true]
17
+ end
18
+ end
19
+ col.reshape(bsize * out_h * out_w, fil_h * fil_w * ch)
20
+ end
21
+
22
+ # col[bsize * out_h * out_w, fil_h * fil_w * ch] to img[bsize, out_h, out_w, ch]
23
+ def col2im(col, img_shape, out_h, out_w, fil_h, fil_w, strides)
24
+ bsize, img_h, img_w, ch = img_shape
25
+ col = col.reshape(bsize, out_h, out_w, fil_h, fil_w, ch)
26
+ img = Xumo::SFloat.zeros(bsize, img_h, img_w, ch)
27
+ (0...fil_h).each do |i|
28
+ i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a
29
+ (0...fil_w).each do |j|
30
+ j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a
31
+ img[true, i_range, j_range, true] += col[true, true, true, i, j, true]
32
+ end
33
+ end
34
+ img
35
+ end
36
+
37
+ def zero_padding(img, pad)
38
+ bsize, img_h, img_w, ch = img.shape
39
+ img2 = Xumo::SFloat.zeros(bsize, img_h + pad[0], img_w + pad[1], ch)
40
+ i_begin = pad[0] / 2
41
+ i_end = i_begin + img_h
42
+ j_begin = pad[1] / 2
43
+ j_end = j_begin + img_w
44
+ img2[true, i_begin...i_end, j_begin...j_end, true] = img
45
+ img2
46
+ end
47
+
48
+ def zero_padding_bwd(img, pad)
49
+ i_begin = pad[0] / 2
50
+ i_end = img.shape[1] - (pad[0] / 2.0).round
51
+ j_begin = pad[1] / 2
52
+ j_end = img.shape[2] - (pad[1] / 2.0).round
53
+ img[true, i_begin...i_end, j_begin...j_end, true]
54
+ end
55
+
56
+ def calc_conv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
57
+ out_h = (prev_h + pad_h - fil_h) / strides[0] + 1
58
+ out_w = (prev_w + pad_w - fil_w) / strides[1] + 1
59
+ [out_h, out_w]
60
+ end
61
+
62
+ def calc_deconv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
63
+ out_h = (prev_h - 1) * strides[0] + fil_h - pad_h
64
+ out_w = (prev_w - 1) * strides[1] + fil_w - pad_w
65
+ [out_h, out_w]
66
+ end
67
+
68
+ def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
69
+ pad_h = (prev_h.to_f / strides[0]).ceil - out_h
70
+ pad_w = (prev_w.to_f / strides[1]).ceil - out_w
71
+ [pad_h, pad_w]
72
+ end
73
+
74
+ def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
75
+ pad_h = ((prev_h.to_f / strides[0]).ceil - out_h) * strides[0]
76
+ pad_w = ((prev_w.to_f / strides[1]).ceil - out_w) * strides[1]
77
+ [pad_h, pad_w]
78
+ end
79
+ end
80
+
81
+
82
+ class Conv2D < Connection
83
+ include Conv2D_Utils
84
+
85
+ # @return [Integer] number of filters.
86
+ attr_reader :num_filters
87
+ # @return [Array] Return filter size. filter size is of the form [height, width].
88
+ attr_reader :filter_size
89
+ # @return [Array] Return stride length. stride length is of the form [height, width].
90
+ attr_reader :strides
91
+ # @return [Array | Bool] Return padding size or whether to padding.
92
+ attr_reader :padding
93
+
94
+ def self.from_hash(hash)
95
+ Conv2D.new(hash[:num_filters], hash[:filter_size],
96
+ weight_initializer: Utils.from_hash(hash[:weight_initializer]),
97
+ bias_initializer: Utils.from_hash(hash[:bias_initializer]),
98
+ weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
99
+ bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
100
+ use_bias: hash[:use_bias],
101
+ strides: hash[:strides],
102
+ padding: hash[:padding])
103
+ end
104
+
105
+ # @param [Integer] num_filters Number of filters.
106
+ # @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
107
+ # @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
108
+ # @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
109
+ def initialize(num_filters, filter_size,
110
+ weight_initializer: Initializers::RandomNormal.new,
111
+ bias_initializer: Initializers::Zeros.new,
112
+ weight_regularizer: nil,
113
+ bias_regularizer: nil,
114
+ use_bias: true,
115
+ strides: 1,
116
+ padding: false)
117
+ super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
118
+ weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
119
+ @num_filters = num_filters
120
+ @filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
121
+ @strides = strides.is_a?(Integer) ? [strides, strides] : strides
122
+ @padding = padding.is_a?(Integer) ? [padding, padding] : padding
123
+ end
124
+
125
+ def build(input_shape)
126
+ super
127
+ prev_h, prev_w, num_prev_filter = *input_shape
128
+ @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
129
+ @bias.data = Xumo::SFloat.new(@num_filters) if @bias
130
+ init_weight_and_bias
131
+ if @padding == true
132
+ out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
133
+ @pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
134
+ elsif @padding.is_a?(Array)
135
+ @pad_size = @padding
136
+ else
137
+ @pad_size = [0, 0]
138
+ end
139
+ @out_size = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
140
+ end
141
+
142
+ def forward(x)
143
+ x = zero_padding(x, @pad_size) if @padding
144
+ @x_shape = x.shape
145
+ @col = im2col(x, *@out_size, *@filter_size, @strides)
146
+ y = @col.dot(@weight.data)
147
+ y += @bias.data if @bias
148
+ y.reshape(x.shape[0], *@out_size, y.shape[3])
149
+ end
150
+
151
+ def backward(dy)
152
+ dy = dy.reshape(dy.shape[0..2].reduce(:*), dy.shape[3])
153
+ if @trainable
154
+ @weight.grad += @col.transpose.dot(dy)
155
+ @bias.grad += dy.sum(0) if @bias
156
+ end
157
+ dcol = dy.dot(@weight.data.transpose)
158
+ dx = col2im(dcol, @x_shape, *@out_size, *@filter_size, @strides)
159
+ @padding ? zero_padding_bwd(dx, @pad_size) : dx
160
+ end
161
+
162
+ def output_shape
163
+ [*@out_size, @num_filters]
164
+ end
165
+
166
+ # @return [Numo::SFloat] Convert weight to filter and return.
167
+ def filters
168
+ num_prev_filter = @input_shape[2]
169
+ @weight.data.reshape(*@filter_size, num_prev_filter, @num_filters)
170
+ end
171
+
172
+ # @param [Numo::SFloat] filters Convert weight to filters and set.
173
+ def filters=(filters)
174
+ num_prev_filter = @input_shape[2]
175
+ @weight.data = filters.reshape(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
176
+ end
177
+
178
+ def to_hash
179
+ super({num_filters: @num_filters,
180
+ filter_size: @filter_size,
181
+ strides: @strides,
182
+ padding: @padding})
183
+ end
184
+ end
185
+
186
+
187
+ class Conv2D_Transpose < Connection
188
+ include Conv2D_Utils
189
+
190
+ # @return [Integer] number of filters.
191
+ attr_reader :num_filters
192
+ # @return [Array] Return filter size. filter size is of the form [height, width].
193
+ attr_reader :filter_size
194
+ # @return [Array] Return stride length. stride length is of the form [height, width].
195
+ attr_reader :strides
196
+ # @return [Array] Return padding size.
197
+ attr_reader :padding
198
+
199
+ def self.from_hash(hash)
200
+ Conv2D_Transpose.new(hash[:num_filters], hash[:filter_size],
201
+ weight_initializer: Utils.from_hash(hash[:weight_initializer]),
202
+ bias_initializer: Utils.from_hash(hash[:bias_initializer]),
203
+ weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
204
+ bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
205
+ use_bias: hash[:use_bias],
206
+ strides: hash[:strides],
207
+ padding: hash[:padding])
208
+ end
209
+
210
+ # @param [Integer] num_filters Number of filters.
211
+ # @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
212
+ # @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
213
+ # @param [Array] padding Padding size. Padding size is of the form [height, width].
214
+ def initialize(num_filters, filter_size,
215
+ weight_initializer: Initializers::RandomNormal.new,
216
+ bias_initializer: Initializers::Zeros.new,
217
+ weight_regularizer: nil,
218
+ bias_regularizer: nil,
219
+ use_bias: true,
220
+ strides: 1,
221
+ padding: false)
222
+ super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
223
+ weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
224
+ @num_filters = num_filters
225
+ @filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
226
+ @strides = strides.is_a?(Integer) ? [strides, strides] : strides
227
+ @padding = padding.is_a?(Integer) ? [padding, padding] : padding
228
+ end
229
+
230
+ def build(input_shape)
231
+ super
232
+ prev_h, prev_w, num_prev_filter = *input_shape
233
+ @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
234
+ @weight_initializer.init_param(self, @weight)
235
+ @weight_regularizer.param = @weight if @weight_regularizer
236
+ if @bias
237
+ @bias.data = Xumo::SFloat.new(@num_filters)
238
+ @bias_initializer.init_param(self, @bias)
239
+ @bias_regularizer.param = @bias if @bias_regularizer
240
+ end
241
+ if @padding == true
242
+ out_h, out_w = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
243
+ @pad_size = calc_padding_size(out_h, out_w, prev_h, prev_w, @strides)
244
+ elsif @padding.is_a?(Array)
245
+ @pad_size = @padding
246
+ else
247
+ @pad_size = [0, 0]
248
+ end
249
+ @out_size = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
250
+ end
251
+
252
+ def forward(x)
253
+ bsize = x.shape[0]
254
+ x = x.reshape(x.shape[0..2].reduce(:*), x.shape[3])
255
+ @x = x
256
+ col = x.dot(@weight.data.transpose)
257
+ img_shape = [bsize, @out_size[0] + @pad_size[0], @out_size[1] + @pad_size[1], @num_filters]
258
+ y = col2im(col, img_shape, *input_shape[0..1], *@filter_size, @strides)
259
+ y += @bias.data if @bias
260
+ @padding ? zero_padding_bwd(y, @pad_size) : y
261
+ end
262
+
263
+ def backward(dy)
264
+ dy = zero_padding(dy, @pad_size) if @padding
265
+ col = im2col(dy, *input_shape[0..1], *@filter_size, @strides)
266
+ if @trainable
267
+ @weight.grad += col.transpose.dot(@x)
268
+ @bias.grad += col.reshape(col.shape[0] * @filter_size.reduce(:*), @num_filters).sum(0) if @bias
269
+ end
270
+ dx = col.dot(@weight.data)
271
+ dx.reshape(dy.shape[0], *input_shape)
272
+ end
273
+
274
+ def output_shape
275
+ [*@out_size, @num_filters]
276
+ end
277
+
278
+ # @return [Numo::SFloat] Convert weight to filter and return.
279
+ def filters
280
+ num_prev_filter = @input_shape[2]
281
+ @weight.data.reshape(*@filter_size, @num_filters, num_prev_filter)
282
+ end
283
+
284
+ # @param [Numo::SFloat] filters Convert weight to filters and set.
285
+ def filters=(filters)
286
+ num_prev_filter = @input_shape[2]
287
+ @weight.data = filters.reshape(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
288
+ end
289
+
290
+ def to_hash
291
+ super({num_filters: @num_filters,
292
+ filter_size: @filter_size,
293
+ strides: @strides,
294
+ padding: @padding})
295
+ end
296
+ end
297
+
298
+
299
+ # Super class of all pooling2D class.
300
+ class Pool2D < Layer
301
+ include Conv2D_Utils
302
+
303
+ # @return [Array] Return pooling size. Pooling size is of the form [height, width].
304
+ attr_reader :pool_size
305
+ # @return [Array] Return stride length. Stride length is of the form [height, width].
306
+ attr_reader :strides
307
+ # @return [Array | Bool] Return padding size or whether to padding.
308
+ attr_reader :padding
309
+
310
+ def self.from_hash(pool2d_class, hash)
311
+ pool2d_class.new(hash[:pool_size], strides: hash[:strides], padding: hash[:padding])
312
+ end
313
+
314
+ # @param [Array | Integer] pool_size Pooling size. Pooling size is of the form [height, width].
315
+ # @param [Array | Integer | NilClass] strides stride length. Stride length is of the form [height, width].
316
+ # If you set nil, treat pool_size as strides.
317
+ # @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
318
+ def initialize(pool_size, strides: nil, padding: false)
319
+ super()
320
+ @pool_size = pool_size.is_a?(Integer) ? [pool_size, pool_size] : pool_size
321
+ @strides = if strides
322
+ strides.is_a?(Integer) ? [strides, strides] : strides
323
+ else
324
+ @pool_size.clone
325
+ end
326
+ @padding = padding.is_a?(Integer) ? [padding, padding] : padding
327
+ end
328
+
329
+ def build(input_shape)
330
+ super
331
+ prev_h, prev_w = input_shape[0..1]
332
+ @num_channel = input_shape[2]
333
+ if @padding == true
334
+ out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, 0, 0, @strides)
335
+ @pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
336
+ elsif @padding.is_a?(Array)
337
+ @pad_size = @padding
338
+ else
339
+ @pad_size = [0, 0]
340
+ end
341
+ @out_size = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, *@pad_size, @strides)
342
+ end
343
+
344
+ def output_shape
345
+ [*@out_size, @num_channel]
346
+ end
347
+
348
+ def to_hash
349
+ super({pool_size: @pool_size,
350
+ strides: @strides,
351
+ padding: @padding})
352
+ end
353
+ end
354
+
355
+
356
+ class MaxPool2D < Pool2D
357
+ def self.from_hash(hash)
358
+ Pool2D.from_hash(self, hash)
359
+ end
360
+
361
+ def forward(x)
362
+ x = zero_padding(x, @pad_size) if @padding
363
+ @x_shape = x.shape
364
+ col = im2col(x, *@out_size, *@pool_size, @strides)
365
+ col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
366
+ .reshape(x.shape[0] * @out_size.reduce(:*) * x.shape[3], @pool_size.reduce(:*))
367
+ @max_index = col.max_index(1)
368
+ col.max(1).reshape(x.shape[0], *@out_size, x.shape[3])
369
+ end
370
+
371
+ def backward(dy)
372
+ dmax = Xumo::SFloat.zeros(dy.size * @pool_size.reduce(:*))
373
+ dmax[@max_index] = dy.flatten
374
+ dcol = dmax.reshape(dy.shape[0..2].reduce(:*), @pool_size.reduce(:*) * dy.shape[3])
375
+ dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
376
+ @padding ? zero_padding_bwd(dx, @pad_size) : dx
377
+ end
378
+ end
379
+
380
+
381
+ class AvgPool2D < Pool2D
382
+ def self.from_hash(hash)
383
+ Pool2D.from_hash(self, hash)
384
+ end
385
+
386
+ def forward(x)
387
+ x = zero_padding(x, @pad_size) if @padding
388
+ @x_shape = x.shape
389
+ col = im2col(x, *@out_size, *@pool_size, @strides)
390
+ col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
391
+ .reshape(x.shape[0] * @out_size.reduce(:*) * x.shape[3], @pool_size.reduce(:*))
392
+ col.mean(1).reshape(x.shape[0], *@out_size, x.shape[3])
393
+ end
394
+
395
+ def backward(dy)
396
+ row_length = @pool_size.reduce(:*)
397
+ dy /= row_length
398
+ davg = Xumo::SFloat.zeros(dy.size, row_length)
399
+ row_length.times do |i|
400
+ davg[true, i] = dy.flatten
401
+ end
402
+ dcol = davg.reshape(dy.shape[0..2].reduce(:*), dy.shape[3] * @pool_size.reduce(:*))
403
+ dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
404
+ @padding ? zero_padding_bwd(dx, @pad_size) : dx
405
+ end
406
+ end
407
+
408
+
409
+ class UnPool2D < Layer
410
+ include Conv2D_Utils
411
+
412
+ # @return [Array] Return unpooling size. unpooling size is of the form [height, width].
413
+ attr_reader :unpool_size
414
+
415
+ # @param [Array or Integer] unpool_size Unpooling size. unpooling size is of the form [height, width].
416
+ def initialize(unpool_size)
417
+ super()
418
+ @unpool_size = unpool_size.is_a?(Integer) ? [unpool_size, unpool_size] : unpool_size
419
+ end
420
+
421
+ def self.from_hash(hash)
422
+ UnPool2D.new(hash[:unpool_size])
423
+ end
424
+
425
+ def build(input_shape)
426
+ super
427
+ prev_h, prev_w = input_shape[0..1]
428
+ unpool_h, unpool_w = @unpool_size
429
+ out_h = prev_h * unpool_h
430
+ out_w = prev_w * unpool_w
431
+ @out_size = [out_h, out_w]
432
+ @num_channel = input_shape[2]
433
+ end
434
+
435
+ def forward(x)
436
+ @x_shape = x.shape
437
+ unpool_h, unpool_w = @unpool_size
438
+ x2 = Xumo::SFloat.zeros(x.shape[0], x.shape[1], unpool_h, x.shape[2], unpool_w, @num_channel)
439
+ unpool_h.times do |i|
440
+ unpool_w.times do |j|
441
+ x2[true, true, i, true, j, true] = x
442
+ end
443
+ end
444
+ x2.reshape(x.shape[0], *@out_size, x.shape[3])
445
+ end
446
+
447
+ def backward(dy)
448
+ in_size = input_shape[0..1]
449
+ col = im2col(dy, *input_shape[0..1], *@unpool_size, @unpool_size)
450
+ col = col.reshape(dy.shape[0] * in_size.reduce(:*), @unpool_size.reduce(:*), dy.shape[3]).transpose(0, 2, 1)
451
+ .reshape(dy.shape[0] * in_size.reduce(:*) * dy.shape[3], @unpool_size.reduce(:*))
452
+ col.sum(1).reshape(dy.shape[0], *in_size, dy.shape[3])
453
+ end
454
+
455
+ def output_shape
456
+ [*@out_size, @num_channel]
457
+ end
458
+
459
+ def to_hash
460
+ super({unpool_size: @unpool_size})
461
+ end
462
+ end
463
+ end
464
+ end