ruby-dnn 0.10.1 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/examples/cifar100_example.rb +71 -71
- data/examples/cifar10_example.rb +68 -68
- data/examples/iris_example.rb +34 -34
- data/examples/mnist_conv2d_example.rb +50 -50
- data/examples/mnist_example.rb +39 -39
- data/examples/mnist_lstm_example.rb +36 -36
- data/examples/xor_example.rb +24 -24
- data/lib/dnn.rb +27 -26
- data/lib/dnn/cifar10.rb +51 -51
- data/lib/dnn/cifar100.rb +49 -49
- data/lib/dnn/core/activations.rb +148 -148
- data/lib/dnn/core/cnn_layers.rb +464 -464
- data/lib/dnn/core/dataset.rb +34 -34
- data/lib/dnn/core/embedding.rb +56 -0
- data/lib/dnn/core/error.rb +5 -5
- data/lib/dnn/core/initializers.rb +126 -126
- data/lib/dnn/core/layers.rb +307 -307
- data/lib/dnn/core/losses.rb +175 -175
- data/lib/dnn/core/model.rb +461 -461
- data/lib/dnn/core/normalizations.rb +72 -72
- data/lib/dnn/core/optimizers.rb +283 -283
- data/lib/dnn/core/param.rb +9 -9
- data/lib/dnn/core/regularizers.rb +106 -106
- data/lib/dnn/core/rnn_layers.rb +464 -464
- data/lib/dnn/core/utils.rb +34 -34
- data/lib/dnn/downloader.rb +50 -50
- data/lib/dnn/image.rb +41 -41
- data/lib/dnn/iris.rb +60 -60
- data/lib/dnn/mnist.rb +84 -84
- data/lib/dnn/version.rb +3 -3
- metadata +2 -1
data/lib/dnn/core/activations.rb
CHANGED
@@ -1,148 +1,148 @@
|
|
1
|
-
module DNN
|
2
|
-
module Activations
|
3
|
-
|
4
|
-
class Sigmoid < Layers::Layer
|
5
|
-
def forward(x)
|
6
|
-
@y = 1 / (1 + NMath.exp(-x))
|
7
|
-
end
|
8
|
-
|
9
|
-
def backward(dy)
|
10
|
-
dy * (1 - @y) * @y
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
|
15
|
-
class Tanh < Layers::Layer
|
16
|
-
def forward(x)
|
17
|
-
@y = NMath.tanh(x)
|
18
|
-
end
|
19
|
-
|
20
|
-
def backward(dy)
|
21
|
-
dy * (1 - @y**2)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
|
26
|
-
class Softsign < Layers::Layer
|
27
|
-
def forward(x)
|
28
|
-
@x = x
|
29
|
-
x / (1 + x.abs)
|
30
|
-
end
|
31
|
-
|
32
|
-
def backward(dy)
|
33
|
-
dy * (1 / (1 + @x.abs)**2)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
|
38
|
-
class Softplus < Layers::Layer
|
39
|
-
def forward(x)
|
40
|
-
@x = x
|
41
|
-
NMath.log(1 + NMath.exp(x))
|
42
|
-
end
|
43
|
-
|
44
|
-
def backward(dy)
|
45
|
-
dy * (1 / (1 + NMath.exp(-@x)))
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
|
50
|
-
class Swish < Layers::Layer
|
51
|
-
def forward(x)
|
52
|
-
@x = x
|
53
|
-
@y = x * (1 / (1 + NMath.exp(-x)))
|
54
|
-
end
|
55
|
-
|
56
|
-
def backward(dy)
|
57
|
-
dy * (@y + (1 / (1 + NMath.exp(-@x))) * (1 - @y))
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
|
62
|
-
class ReLU < Layers::Layer
|
63
|
-
def forward(x)
|
64
|
-
@x = x.clone
|
65
|
-
x[x < 0] = 0
|
66
|
-
x
|
67
|
-
end
|
68
|
-
|
69
|
-
def backward(dy)
|
70
|
-
@x[@x > 0] = 1
|
71
|
-
@x[@x <= 0] = 0
|
72
|
-
dy * @x
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
|
77
|
-
class LeakyReLU < Layers::Layer
|
78
|
-
# @return [Float] Return the alpha value.
|
79
|
-
attr_reader :alpha
|
80
|
-
|
81
|
-
def self.from_hash(hash)
|
82
|
-
self.new(hash[:alpha])
|
83
|
-
end
|
84
|
-
|
85
|
-
# @param [Float] alpha The slope when the output value is negative.
|
86
|
-
def initialize(alpha = 0.3)
|
87
|
-
@alpha = alpha
|
88
|
-
end
|
89
|
-
|
90
|
-
def forward(x)
|
91
|
-
@x = x.clone
|
92
|
-
a = Xumo::SFloat.ones(x.shape)
|
93
|
-
a[x <= 0] = @alpha
|
94
|
-
x * a
|
95
|
-
end
|
96
|
-
|
97
|
-
def backward(dy)
|
98
|
-
@x[@x > 0] = 1
|
99
|
-
@x[@x <= 0] = @alpha
|
100
|
-
dy * @x
|
101
|
-
end
|
102
|
-
|
103
|
-
def to_hash
|
104
|
-
{class: self.class.name, alpha: alpha}
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
|
109
|
-
class ELU < Layers::Layer
|
110
|
-
# @return [Float] Return the alpha value.
|
111
|
-
attr_reader :alpha
|
112
|
-
|
113
|
-
def self.from_hash(hash)
|
114
|
-
self.new(hash[:alpha])
|
115
|
-
end
|
116
|
-
|
117
|
-
# @param [Float] alpha The slope when the output value is negative.
|
118
|
-
def initialize(alpha = 1.0)
|
119
|
-
@alpha = alpha
|
120
|
-
end
|
121
|
-
|
122
|
-
def forward(x)
|
123
|
-
@x = x
|
124
|
-
x1 = Xumo::SFloat.zeros(x.shape)
|
125
|
-
x1[x >= 0] = 1
|
126
|
-
x1 *= x
|
127
|
-
x2 = Xumo::SFloat.zeros(x.shape)
|
128
|
-
x2[x < 0] = 1
|
129
|
-
x2 *= @alpha * NMath.exp(x) - @alpha
|
130
|
-
x1 + x2
|
131
|
-
end
|
132
|
-
|
133
|
-
def backward(dy)
|
134
|
-
dx = Xumo::SFloat.ones(@x.shape)
|
135
|
-
dx[@x < 0] = 0
|
136
|
-
dx2 = Xumo::SFloat.zeros(@x.shape)
|
137
|
-
dx2[@x < 0] = 1
|
138
|
-
dx2 *= @alpha * NMath.exp(@x)
|
139
|
-
dy * (dx + dx2)
|
140
|
-
end
|
141
|
-
|
142
|
-
def to_hash
|
143
|
-
{class: self.class.name, alpha: @alpha}
|
144
|
-
end
|
145
|
-
end
|
146
|
-
|
147
|
-
end
|
148
|
-
end
|
1
|
+
module DNN
|
2
|
+
module Activations
|
3
|
+
|
4
|
+
class Sigmoid < Layers::Layer
|
5
|
+
def forward(x)
|
6
|
+
@y = 1 / (1 + NMath.exp(-x))
|
7
|
+
end
|
8
|
+
|
9
|
+
def backward(dy)
|
10
|
+
dy * (1 - @y) * @y
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
class Tanh < Layers::Layer
|
16
|
+
def forward(x)
|
17
|
+
@y = NMath.tanh(x)
|
18
|
+
end
|
19
|
+
|
20
|
+
def backward(dy)
|
21
|
+
dy * (1 - @y**2)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
class Softsign < Layers::Layer
|
27
|
+
def forward(x)
|
28
|
+
@x = x
|
29
|
+
x / (1 + x.abs)
|
30
|
+
end
|
31
|
+
|
32
|
+
def backward(dy)
|
33
|
+
dy * (1 / (1 + @x.abs)**2)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
class Softplus < Layers::Layer
|
39
|
+
def forward(x)
|
40
|
+
@x = x
|
41
|
+
NMath.log(1 + NMath.exp(x))
|
42
|
+
end
|
43
|
+
|
44
|
+
def backward(dy)
|
45
|
+
dy * (1 / (1 + NMath.exp(-@x)))
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
class Swish < Layers::Layer
|
51
|
+
def forward(x)
|
52
|
+
@x = x
|
53
|
+
@y = x * (1 / (1 + NMath.exp(-x)))
|
54
|
+
end
|
55
|
+
|
56
|
+
def backward(dy)
|
57
|
+
dy * (@y + (1 / (1 + NMath.exp(-@x))) * (1 - @y))
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
class ReLU < Layers::Layer
|
63
|
+
def forward(x)
|
64
|
+
@x = x.clone
|
65
|
+
x[x < 0] = 0
|
66
|
+
x
|
67
|
+
end
|
68
|
+
|
69
|
+
def backward(dy)
|
70
|
+
@x[@x > 0] = 1
|
71
|
+
@x[@x <= 0] = 0
|
72
|
+
dy * @x
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
class LeakyReLU < Layers::Layer
|
78
|
+
# @return [Float] Return the alpha value.
|
79
|
+
attr_reader :alpha
|
80
|
+
|
81
|
+
def self.from_hash(hash)
|
82
|
+
self.new(hash[:alpha])
|
83
|
+
end
|
84
|
+
|
85
|
+
# @param [Float] alpha The slope when the output value is negative.
|
86
|
+
def initialize(alpha = 0.3)
|
87
|
+
@alpha = alpha
|
88
|
+
end
|
89
|
+
|
90
|
+
def forward(x)
|
91
|
+
@x = x.clone
|
92
|
+
a = Xumo::SFloat.ones(x.shape)
|
93
|
+
a[x <= 0] = @alpha
|
94
|
+
x * a
|
95
|
+
end
|
96
|
+
|
97
|
+
def backward(dy)
|
98
|
+
@x[@x > 0] = 1
|
99
|
+
@x[@x <= 0] = @alpha
|
100
|
+
dy * @x
|
101
|
+
end
|
102
|
+
|
103
|
+
def to_hash
|
104
|
+
{class: self.class.name, alpha: alpha}
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
class ELU < Layers::Layer
|
110
|
+
# @return [Float] Return the alpha value.
|
111
|
+
attr_reader :alpha
|
112
|
+
|
113
|
+
def self.from_hash(hash)
|
114
|
+
self.new(hash[:alpha])
|
115
|
+
end
|
116
|
+
|
117
|
+
# @param [Float] alpha The slope when the output value is negative.
|
118
|
+
def initialize(alpha = 1.0)
|
119
|
+
@alpha = alpha
|
120
|
+
end
|
121
|
+
|
122
|
+
def forward(x)
|
123
|
+
@x = x
|
124
|
+
x1 = Xumo::SFloat.zeros(x.shape)
|
125
|
+
x1[x >= 0] = 1
|
126
|
+
x1 *= x
|
127
|
+
x2 = Xumo::SFloat.zeros(x.shape)
|
128
|
+
x2[x < 0] = 1
|
129
|
+
x2 *= @alpha * NMath.exp(x) - @alpha
|
130
|
+
x1 + x2
|
131
|
+
end
|
132
|
+
|
133
|
+
def backward(dy)
|
134
|
+
dx = Xumo::SFloat.ones(@x.shape)
|
135
|
+
dx[@x < 0] = 0
|
136
|
+
dx2 = Xumo::SFloat.zeros(@x.shape)
|
137
|
+
dx2[@x < 0] = 1
|
138
|
+
dx2 *= @alpha * NMath.exp(@x)
|
139
|
+
dy * (dx + dx2)
|
140
|
+
end
|
141
|
+
|
142
|
+
def to_hash
|
143
|
+
{class: self.class.name, alpha: @alpha}
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
148
|
+
end
|
data/lib/dnn/core/cnn_layers.rb
CHANGED
@@ -1,464 +1,464 @@
|
|
1
|
-
module DNN
|
2
|
-
module Layers
|
3
|
-
# This module is used for convolution.
|
4
|
-
module Conv2D_Utils
|
5
|
-
private
|
6
|
-
|
7
|
-
# img[bsize, out_h, out_w, ch] to col[bsize * out_h * out_w, fil_h * fil_w * ch]
|
8
|
-
def im2col(img, out_h, out_w, fil_h, fil_w, strides)
|
9
|
-
bsize = img.shape[0]
|
10
|
-
ch = img.shape[3]
|
11
|
-
col = Xumo::SFloat.zeros(bsize, out_h, out_w, fil_h, fil_w, ch)
|
12
|
-
(0...fil_h).each do |i|
|
13
|
-
i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a
|
14
|
-
(0...fil_w).each do |j|
|
15
|
-
j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a
|
16
|
-
col[true, true, true, i, j, true] = img[true, i_range, j_range, true]
|
17
|
-
end
|
18
|
-
end
|
19
|
-
col.reshape(bsize * out_h * out_w, fil_h * fil_w * ch)
|
20
|
-
end
|
21
|
-
|
22
|
-
# col[bsize * out_h * out_w, fil_h * fil_w * ch] to img[bsize, out_h, out_w, ch]
|
23
|
-
def col2im(col, img_shape, out_h, out_w, fil_h, fil_w, strides)
|
24
|
-
bsize, img_h, img_w, ch = img_shape
|
25
|
-
col = col.reshape(bsize, out_h, out_w, fil_h, fil_w, ch)
|
26
|
-
img = Xumo::SFloat.zeros(bsize, img_h, img_w, ch)
|
27
|
-
(0...fil_h).each do |i|
|
28
|
-
i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a
|
29
|
-
(0...fil_w).each do |j|
|
30
|
-
j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a
|
31
|
-
img[true, i_range, j_range, true] += col[true, true, true, i, j, true]
|
32
|
-
end
|
33
|
-
end
|
34
|
-
img
|
35
|
-
end
|
36
|
-
|
37
|
-
def zero_padding(img, pad)
|
38
|
-
bsize, img_h, img_w, ch = img.shape
|
39
|
-
img2 = Xumo::SFloat.zeros(bsize, img_h + pad[0], img_w + pad[1], ch)
|
40
|
-
i_begin = pad[0] / 2
|
41
|
-
i_end = i_begin + img_h
|
42
|
-
j_begin = pad[1] / 2
|
43
|
-
j_end = j_begin + img_w
|
44
|
-
img2[true, i_begin...i_end, j_begin...j_end, true] = img
|
45
|
-
img2
|
46
|
-
end
|
47
|
-
|
48
|
-
def zero_padding_bwd(img, pad)
|
49
|
-
i_begin = pad[0] / 2
|
50
|
-
i_end = img.shape[1] - (pad[0] / 2.0).round
|
51
|
-
j_begin = pad[1] / 2
|
52
|
-
j_end = img.shape[2] - (pad[1] / 2.0).round
|
53
|
-
img[true, i_begin...i_end, j_begin...j_end, true]
|
54
|
-
end
|
55
|
-
|
56
|
-
def calc_conv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
|
57
|
-
out_h = (prev_h + pad_h - fil_h) / strides[0] + 1
|
58
|
-
out_w = (prev_w + pad_w - fil_w) / strides[1] + 1
|
59
|
-
[out_h, out_w]
|
60
|
-
end
|
61
|
-
|
62
|
-
def calc_deconv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
|
63
|
-
out_h = (prev_h - 1) * strides[0] + fil_h - pad_h
|
64
|
-
out_w = (prev_w - 1) * strides[1] + fil_w - pad_w
|
65
|
-
[out_h, out_w]
|
66
|
-
end
|
67
|
-
|
68
|
-
def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
|
69
|
-
pad_h = (prev_h.to_f / strides[0]).ceil - out_h
|
70
|
-
pad_w = (prev_w.to_f / strides[1]).ceil - out_w
|
71
|
-
[pad_h, pad_w]
|
72
|
-
end
|
73
|
-
|
74
|
-
def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
|
75
|
-
pad_h = ((prev_h.to_f / strides[0]).ceil - out_h) * strides[0]
|
76
|
-
pad_w = ((prev_w.to_f / strides[1]).ceil - out_w) * strides[1]
|
77
|
-
[pad_h, pad_w]
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
|
82
|
-
class Conv2D < Connection
|
83
|
-
include Conv2D_Utils
|
84
|
-
|
85
|
-
# @return [Integer] number of filters.
|
86
|
-
attr_reader :num_filters
|
87
|
-
# @return [Array] Return filter size. filter size is of the form [height, width].
|
88
|
-
attr_reader :filter_size
|
89
|
-
# @return [Array] Return stride length. stride length is of the form [height, width].
|
90
|
-
attr_reader :strides
|
91
|
-
# @return [Array | Bool] Return padding size or whether to padding.
|
92
|
-
attr_reader :padding
|
93
|
-
|
94
|
-
def self.from_hash(hash)
|
95
|
-
Conv2D.new(hash[:num_filters], hash[:filter_size],
|
96
|
-
weight_initializer: Utils.from_hash(hash[:weight_initializer]),
|
97
|
-
bias_initializer: Utils.from_hash(hash[:bias_initializer]),
|
98
|
-
weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
|
99
|
-
bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
|
100
|
-
use_bias: hash[:use_bias],
|
101
|
-
strides: hash[:strides],
|
102
|
-
padding: hash[:padding])
|
103
|
-
end
|
104
|
-
|
105
|
-
# @param [Integer] num_filters Number of filters.
|
106
|
-
# @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
|
107
|
-
# @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
|
108
|
-
# @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
|
109
|
-
def initialize(num_filters, filter_size,
|
110
|
-
weight_initializer: Initializers::RandomNormal.new,
|
111
|
-
bias_initializer: Initializers::Zeros.new,
|
112
|
-
weight_regularizer: nil,
|
113
|
-
bias_regularizer: nil,
|
114
|
-
use_bias: true,
|
115
|
-
strides: 1,
|
116
|
-
padding: false)
|
117
|
-
super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
|
118
|
-
weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
|
119
|
-
@num_filters = num_filters
|
120
|
-
@filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
|
121
|
-
@strides = strides.is_a?(Integer) ? [strides, strides] : strides
|
122
|
-
@padding = padding.is_a?(Integer) ? [padding, padding] : padding
|
123
|
-
end
|
124
|
-
|
125
|
-
def build(input_shape)
|
126
|
-
super
|
127
|
-
prev_h, prev_w, num_prev_filter = *input_shape
|
128
|
-
@weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
|
129
|
-
@bias.data = Xumo::SFloat.new(@num_filters) if @bias
|
130
|
-
init_weight_and_bias
|
131
|
-
if @padding == true
|
132
|
-
out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
|
133
|
-
@pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
|
134
|
-
elsif @padding.is_a?(Array)
|
135
|
-
@pad_size = @padding
|
136
|
-
else
|
137
|
-
@pad_size = [0, 0]
|
138
|
-
end
|
139
|
-
@out_size = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
|
140
|
-
end
|
141
|
-
|
142
|
-
def forward(x)
|
143
|
-
x = zero_padding(x, @pad_size) if @padding
|
144
|
-
@x_shape = x.shape
|
145
|
-
@col = im2col(x, *@out_size, *@filter_size, @strides)
|
146
|
-
y = @col.dot(@weight.data)
|
147
|
-
y += @bias.data if @bias
|
148
|
-
y.reshape(x.shape[0], *@out_size, y.shape[3])
|
149
|
-
end
|
150
|
-
|
151
|
-
def backward(dy)
|
152
|
-
dy = dy.reshape(dy.shape[0..2].reduce(:*), dy.shape[3])
|
153
|
-
if @trainable
|
154
|
-
@weight.grad += @col.transpose.dot(dy)
|
155
|
-
@bias.grad += dy.sum(0) if @bias
|
156
|
-
end
|
157
|
-
dcol = dy.dot(@weight.data.transpose)
|
158
|
-
dx = col2im(dcol, @x_shape, *@out_size, *@filter_size, @strides)
|
159
|
-
@padding ? zero_padding_bwd(dx, @pad_size) : dx
|
160
|
-
end
|
161
|
-
|
162
|
-
def output_shape
|
163
|
-
[*@out_size, @num_filters]
|
164
|
-
end
|
165
|
-
|
166
|
-
# @return [Numo::SFloat] Convert weight to filter and return.
|
167
|
-
def filters
|
168
|
-
num_prev_filter = @input_shape[2]
|
169
|
-
@weight.data.reshape(*@filter_size, num_prev_filter, @num_filters)
|
170
|
-
end
|
171
|
-
|
172
|
-
# @param [Numo::SFloat] filters Convert weight to filters and set.
|
173
|
-
def filters=(filters)
|
174
|
-
num_prev_filter = @input_shape[2]
|
175
|
-
@weight.data = filters.reshape(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
|
176
|
-
end
|
177
|
-
|
178
|
-
def to_hash
|
179
|
-
super({num_filters: @num_filters,
|
180
|
-
filter_size: @filter_size,
|
181
|
-
strides: @strides,
|
182
|
-
padding: @padding})
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
|
-
|
187
|
-
class Conv2D_Transpose < Connection
|
188
|
-
include Conv2D_Utils
|
189
|
-
|
190
|
-
# @return [Integer] number of filters.
|
191
|
-
attr_reader :num_filters
|
192
|
-
# @return [Array] Return filter size. filter size is of the form [height, width].
|
193
|
-
attr_reader :filter_size
|
194
|
-
# @return [Array] Return stride length. stride length is of the form [height, width].
|
195
|
-
attr_reader :strides
|
196
|
-
# @return [Array] Return padding size.
|
197
|
-
attr_reader :padding
|
198
|
-
|
199
|
-
def self.from_hash(hash)
|
200
|
-
Conv2D_Transpose.new(hash[:num_filters], hash[:filter_size],
|
201
|
-
weight_initializer: Utils.from_hash(hash[:weight_initializer]),
|
202
|
-
bias_initializer: Utils.from_hash(hash[:bias_initializer]),
|
203
|
-
weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
|
204
|
-
bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
|
205
|
-
use_bias: hash[:use_bias],
|
206
|
-
strides: hash[:strides],
|
207
|
-
padding: hash[:padding])
|
208
|
-
end
|
209
|
-
|
210
|
-
# @param [Integer] num_filters Number of filters.
|
211
|
-
# @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
|
212
|
-
# @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
|
213
|
-
# @param [Array] padding Padding size. Padding size is of the form [height, width].
|
214
|
-
def initialize(num_filters, filter_size,
|
215
|
-
weight_initializer: Initializers::RandomNormal.new,
|
216
|
-
bias_initializer: Initializers::Zeros.new,
|
217
|
-
weight_regularizer: nil,
|
218
|
-
bias_regularizer: nil,
|
219
|
-
use_bias: true,
|
220
|
-
strides: 1,
|
221
|
-
padding: false)
|
222
|
-
super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
|
223
|
-
weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
|
224
|
-
@num_filters = num_filters
|
225
|
-
@filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
|
226
|
-
@strides = strides.is_a?(Integer) ? [strides, strides] : strides
|
227
|
-
@padding = padding.is_a?(Integer) ? [padding, padding] : padding
|
228
|
-
end
|
229
|
-
|
230
|
-
def build(input_shape)
|
231
|
-
super
|
232
|
-
prev_h, prev_w, num_prev_filter = *input_shape
|
233
|
-
@weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
|
234
|
-
@weight_initializer.init_param(self, @weight)
|
235
|
-
@weight_regularizer.param = @weight if @weight_regularizer
|
236
|
-
if @bias
|
237
|
-
@bias.data = Xumo::SFloat.new(@num_filters)
|
238
|
-
@bias_initializer.init_param(self, @bias)
|
239
|
-
@bias_regularizer.param = @bias if @bias_regularizer
|
240
|
-
end
|
241
|
-
if @padding == true
|
242
|
-
out_h, out_w = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
|
243
|
-
@pad_size = calc_padding_size(out_h, out_w, prev_h, prev_w, @strides)
|
244
|
-
elsif @padding.is_a?(Array)
|
245
|
-
@pad_size = @padding
|
246
|
-
else
|
247
|
-
@pad_size = [0, 0]
|
248
|
-
end
|
249
|
-
@out_size = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
|
250
|
-
end
|
251
|
-
|
252
|
-
def forward(x)
|
253
|
-
bsize = x.shape[0]
|
254
|
-
x = x.reshape(x.shape[0..2].reduce(:*), x.shape[3])
|
255
|
-
@x = x
|
256
|
-
col = x.dot(@weight.data.transpose)
|
257
|
-
img_shape = [bsize, @out_size[0] + @pad_size[0], @out_size[1] + @pad_size[1], @num_filters]
|
258
|
-
y = col2im(col, img_shape, *input_shape[0..1], *@filter_size, @strides)
|
259
|
-
y += @bias.data if @bias
|
260
|
-
@padding ? zero_padding_bwd(y, @pad_size) : y
|
261
|
-
end
|
262
|
-
|
263
|
-
def backward(dy)
|
264
|
-
dy = zero_padding(dy, @pad_size) if @padding
|
265
|
-
col = im2col(dy, *input_shape[0..1], *@filter_size, @strides)
|
266
|
-
if @trainable
|
267
|
-
@weight.grad += col.transpose.dot(@x)
|
268
|
-
@bias.grad += col.reshape(col.shape[0] * @filter_size.reduce(:*), @num_filters).sum(0) if @bias
|
269
|
-
end
|
270
|
-
dx = col.dot(@weight.data)
|
271
|
-
dx.reshape(dy.shape[0], *input_shape)
|
272
|
-
end
|
273
|
-
|
274
|
-
def output_shape
|
275
|
-
[*@out_size, @num_filters]
|
276
|
-
end
|
277
|
-
|
278
|
-
# @return [Numo::SFloat] Convert weight to filter and return.
|
279
|
-
def filters
|
280
|
-
num_prev_filter = @input_shape[2]
|
281
|
-
@weight.data.reshape(*@filter_size, @num_filters, num_prev_filter)
|
282
|
-
end
|
283
|
-
|
284
|
-
# @param [Numo::SFloat] filters Convert weight to filters and set.
|
285
|
-
def filters=(filters)
|
286
|
-
num_prev_filter = @input_shape[2]
|
287
|
-
@weight.data = filters.reshape(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
|
288
|
-
end
|
289
|
-
|
290
|
-
def to_hash
|
291
|
-
super({num_filters: @num_filters,
|
292
|
-
filter_size: @filter_size,
|
293
|
-
strides: @strides,
|
294
|
-
padding: @padding})
|
295
|
-
end
|
296
|
-
end
|
297
|
-
|
298
|
-
|
299
|
-
# Super class of all pooling2D class.
|
300
|
-
class Pool2D < Layer
|
301
|
-
include Conv2D_Utils
|
302
|
-
|
303
|
-
# @return [Array] Return pooling size. Pooling size is of the form [height, width].
|
304
|
-
attr_reader :pool_size
|
305
|
-
# @return [Array] Return stride length. Stride length is of the form [height, width].
|
306
|
-
attr_reader :strides
|
307
|
-
# @return [Array | Bool] Return padding size or whether to padding.
|
308
|
-
attr_reader :padding
|
309
|
-
|
310
|
-
def self.from_hash(pool2d_class, hash)
|
311
|
-
pool2d_class.new(hash[:pool_size], strides: hash[:strides], padding: hash[:padding])
|
312
|
-
end
|
313
|
-
|
314
|
-
# @param [Array | Integer] pool_size Pooling size. Pooling size is of the form [height, width].
|
315
|
-
# @param [Array | Integer | NilClass] strides stride length. Stride length is of the form [height, width].
|
316
|
-
# If you set nil, treat pool_size as strides.
|
317
|
-
# @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
|
318
|
-
def initialize(pool_size, strides: nil, padding: false)
|
319
|
-
super()
|
320
|
-
@pool_size = pool_size.is_a?(Integer) ? [pool_size, pool_size] : pool_size
|
321
|
-
@strides = if strides
|
322
|
-
strides.is_a?(Integer) ? [strides, strides] : strides
|
323
|
-
else
|
324
|
-
@pool_size.clone
|
325
|
-
end
|
326
|
-
@padding = padding.is_a?(Integer) ? [padding, padding] : padding
|
327
|
-
end
|
328
|
-
|
329
|
-
def build(input_shape)
|
330
|
-
super
|
331
|
-
prev_h, prev_w = input_shape[0..1]
|
332
|
-
@num_channel = input_shape[2]
|
333
|
-
if @padding == true
|
334
|
-
out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, 0, 0, @strides)
|
335
|
-
@pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
|
336
|
-
elsif @padding.is_a?(Array)
|
337
|
-
@pad_size = @padding
|
338
|
-
else
|
339
|
-
@pad_size = [0, 0]
|
340
|
-
end
|
341
|
-
@out_size = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, *@pad_size, @strides)
|
342
|
-
end
|
343
|
-
|
344
|
-
def output_shape
|
345
|
-
[*@out_size, @num_channel]
|
346
|
-
end
|
347
|
-
|
348
|
-
def to_hash
|
349
|
-
super({pool_size: @pool_size,
|
350
|
-
strides: @strides,
|
351
|
-
padding: @padding})
|
352
|
-
end
|
353
|
-
end
|
354
|
-
|
355
|
-
|
356
|
-
class MaxPool2D < Pool2D
|
357
|
-
def self.from_hash(hash)
|
358
|
-
Pool2D.from_hash(self, hash)
|
359
|
-
end
|
360
|
-
|
361
|
-
def forward(x)
|
362
|
-
x = zero_padding(x, @pad_size) if @padding
|
363
|
-
@x_shape = x.shape
|
364
|
-
col = im2col(x, *@out_size, *@pool_size, @strides)
|
365
|
-
col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
|
366
|
-
.reshape(x.shape[0] * @out_size.reduce(:*) * x.shape[3], @pool_size.reduce(:*))
|
367
|
-
@max_index = col.max_index(1)
|
368
|
-
col.max(1).reshape(x.shape[0], *@out_size, x.shape[3])
|
369
|
-
end
|
370
|
-
|
371
|
-
def backward(dy)
|
372
|
-
dmax = Xumo::SFloat.zeros(dy.size * @pool_size.reduce(:*))
|
373
|
-
dmax[@max_index] = dy.flatten
|
374
|
-
dcol = dmax.reshape(dy.shape[0..2].reduce(:*), @pool_size.reduce(:*) * dy.shape[3])
|
375
|
-
dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
|
376
|
-
@padding ? zero_padding_bwd(dx, @pad_size) : dx
|
377
|
-
end
|
378
|
-
end
|
379
|
-
|
380
|
-
|
381
|
-
class AvgPool2D < Pool2D
|
382
|
-
def self.from_hash(hash)
|
383
|
-
Pool2D.from_hash(self, hash)
|
384
|
-
end
|
385
|
-
|
386
|
-
def forward(x)
|
387
|
-
x = zero_padding(x, @pad_size) if @padding
|
388
|
-
@x_shape = x.shape
|
389
|
-
col = im2col(x, *@out_size, *@pool_size, @strides)
|
390
|
-
col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
|
391
|
-
.reshape(x.shape[0] * @out_size.reduce(:*) * x.shape[3], @pool_size.reduce(:*))
|
392
|
-
col.mean(1).reshape(x.shape[0], *@out_size, x.shape[3])
|
393
|
-
end
|
394
|
-
|
395
|
-
def backward(dy)
|
396
|
-
row_length = @pool_size.reduce(:*)
|
397
|
-
dy /= row_length
|
398
|
-
davg = Xumo::SFloat.zeros(dy.size, row_length)
|
399
|
-
row_length.times do |i|
|
400
|
-
davg[true, i] = dy.flatten
|
401
|
-
end
|
402
|
-
dcol = davg.reshape(dy.shape[0..2].reduce(:*), dy.shape[3] * @pool_size.reduce(:*))
|
403
|
-
dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
|
404
|
-
@padding ? zero_padding_bwd(dx, @pad_size) : dx
|
405
|
-
end
|
406
|
-
end
|
407
|
-
|
408
|
-
|
409
|
-
class UnPool2D < Layer
|
410
|
-
include Conv2D_Utils
|
411
|
-
|
412
|
-
# @return [Array] Return unpooling size. unpooling size is of the form [height, width].
|
413
|
-
attr_reader :unpool_size
|
414
|
-
|
415
|
-
# @param [Array or Integer] unpool_size Unpooling size. unpooling size is of the form [height, width].
|
416
|
-
def initialize(unpool_size)
|
417
|
-
super()
|
418
|
-
@unpool_size = unpool_size.is_a?(Integer) ? [unpool_size, unpool_size] : unpool_size
|
419
|
-
end
|
420
|
-
|
421
|
-
def self.from_hash(hash)
|
422
|
-
UnPool2D.new(hash[:unpool_size])
|
423
|
-
end
|
424
|
-
|
425
|
-
def build(input_shape)
|
426
|
-
super
|
427
|
-
prev_h, prev_w = input_shape[0..1]
|
428
|
-
unpool_h, unpool_w = @unpool_size
|
429
|
-
out_h = prev_h * unpool_h
|
430
|
-
out_w = prev_w * unpool_w
|
431
|
-
@out_size = [out_h, out_w]
|
432
|
-
@num_channel = input_shape[2]
|
433
|
-
end
|
434
|
-
|
435
|
-
def forward(x)
|
436
|
-
@x_shape = x.shape
|
437
|
-
unpool_h, unpool_w = @unpool_size
|
438
|
-
x2 = Xumo::SFloat.zeros(x.shape[0], x.shape[1], unpool_h, x.shape[2], unpool_w, @num_channel)
|
439
|
-
unpool_h.times do |i|
|
440
|
-
unpool_w.times do |j|
|
441
|
-
x2[true, true, i, true, j, true] = x
|
442
|
-
end
|
443
|
-
end
|
444
|
-
x2.reshape(x.shape[0], *@out_size, x.shape[3])
|
445
|
-
end
|
446
|
-
|
447
|
-
def backward(dy)
|
448
|
-
in_size = input_shape[0..1]
|
449
|
-
col = im2col(dy, *input_shape[0..1], *@unpool_size, @unpool_size)
|
450
|
-
col = col.reshape(dy.shape[0] * in_size.reduce(:*), @unpool_size.reduce(:*), dy.shape[3]).transpose(0, 2, 1)
|
451
|
-
.reshape(dy.shape[0] * in_size.reduce(:*) * dy.shape[3], @unpool_size.reduce(:*))
|
452
|
-
col.sum(1).reshape(dy.shape[0], *in_size, dy.shape[3])
|
453
|
-
end
|
454
|
-
|
455
|
-
def output_shape
|
456
|
-
[*@out_size, @num_channel]
|
457
|
-
end
|
458
|
-
|
459
|
-
def to_hash
|
460
|
-
super({unpool_size: @unpool_size})
|
461
|
-
end
|
462
|
-
end
|
463
|
-
end
|
464
|
-
end
|
1
|
+
module DNN
|
2
|
+
module Layers
|
3
|
+
# This module is used for convolution.
|
4
|
+
module Conv2D_Utils
|
5
|
+
private
|
6
|
+
|
7
|
+
# img[bsize, out_h, out_w, ch] to col[bsize * out_h * out_w, fil_h * fil_w * ch]
|
8
|
+
def im2col(img, out_h, out_w, fil_h, fil_w, strides)
|
9
|
+
bsize = img.shape[0]
|
10
|
+
ch = img.shape[3]
|
11
|
+
col = Xumo::SFloat.zeros(bsize, out_h, out_w, fil_h, fil_w, ch)
|
12
|
+
(0...fil_h).each do |i|
|
13
|
+
i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a
|
14
|
+
(0...fil_w).each do |j|
|
15
|
+
j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a
|
16
|
+
col[true, true, true, i, j, true] = img[true, i_range, j_range, true]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
col.reshape(bsize * out_h * out_w, fil_h * fil_w * ch)
|
20
|
+
end
|
21
|
+
|
22
|
+
# col[bsize * out_h * out_w, fil_h * fil_w * ch] to img[bsize, out_h, out_w, ch]
|
23
|
+
def col2im(col, img_shape, out_h, out_w, fil_h, fil_w, strides)
|
24
|
+
bsize, img_h, img_w, ch = img_shape
|
25
|
+
col = col.reshape(bsize, out_h, out_w, fil_h, fil_w, ch)
|
26
|
+
img = Xumo::SFloat.zeros(bsize, img_h, img_w, ch)
|
27
|
+
(0...fil_h).each do |i|
|
28
|
+
i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a
|
29
|
+
(0...fil_w).each do |j|
|
30
|
+
j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a
|
31
|
+
img[true, i_range, j_range, true] += col[true, true, true, i, j, true]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
img
|
35
|
+
end
|
36
|
+
|
37
|
+
def zero_padding(img, pad)
|
38
|
+
bsize, img_h, img_w, ch = img.shape
|
39
|
+
img2 = Xumo::SFloat.zeros(bsize, img_h + pad[0], img_w + pad[1], ch)
|
40
|
+
i_begin = pad[0] / 2
|
41
|
+
i_end = i_begin + img_h
|
42
|
+
j_begin = pad[1] / 2
|
43
|
+
j_end = j_begin + img_w
|
44
|
+
img2[true, i_begin...i_end, j_begin...j_end, true] = img
|
45
|
+
img2
|
46
|
+
end
|
47
|
+
|
48
|
+
def zero_padding_bwd(img, pad)
|
49
|
+
i_begin = pad[0] / 2
|
50
|
+
i_end = img.shape[1] - (pad[0] / 2.0).round
|
51
|
+
j_begin = pad[1] / 2
|
52
|
+
j_end = img.shape[2] - (pad[1] / 2.0).round
|
53
|
+
img[true, i_begin...i_end, j_begin...j_end, true]
|
54
|
+
end
|
55
|
+
|
56
|
+
def calc_conv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
|
57
|
+
out_h = (prev_h + pad_h - fil_h) / strides[0] + 1
|
58
|
+
out_w = (prev_w + pad_w - fil_w) / strides[1] + 1
|
59
|
+
[out_h, out_w]
|
60
|
+
end
|
61
|
+
|
62
|
+
def calc_deconv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
|
63
|
+
out_h = (prev_h - 1) * strides[0] + fil_h - pad_h
|
64
|
+
out_w = (prev_w - 1) * strides[1] + fil_w - pad_w
|
65
|
+
[out_h, out_w]
|
66
|
+
end
|
67
|
+
|
68
|
+
def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
|
69
|
+
pad_h = (prev_h.to_f / strides[0]).ceil - out_h
|
70
|
+
pad_w = (prev_w.to_f / strides[1]).ceil - out_w
|
71
|
+
[pad_h, pad_w]
|
72
|
+
end
|
73
|
+
|
74
|
+
def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
|
75
|
+
pad_h = ((prev_h.to_f / strides[0]).ceil - out_h) * strides[0]
|
76
|
+
pad_w = ((prev_w.to_f / strides[1]).ceil - out_w) * strides[1]
|
77
|
+
[pad_h, pad_w]
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
class Conv2D < Connection
|
83
|
+
include Conv2D_Utils
|
84
|
+
|
85
|
+
# @return [Integer] number of filters.
|
86
|
+
attr_reader :num_filters
|
87
|
+
# @return [Array] Return filter size. filter size is of the form [height, width].
|
88
|
+
attr_reader :filter_size
|
89
|
+
# @return [Array] Return stride length. stride length is of the form [height, width].
|
90
|
+
attr_reader :strides
|
91
|
+
# @return [Array | Bool] Return padding size or whether to padding.
|
92
|
+
attr_reader :padding
|
93
|
+
|
94
|
+
def self.from_hash(hash)
|
95
|
+
Conv2D.new(hash[:num_filters], hash[:filter_size],
|
96
|
+
weight_initializer: Utils.from_hash(hash[:weight_initializer]),
|
97
|
+
bias_initializer: Utils.from_hash(hash[:bias_initializer]),
|
98
|
+
weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
|
99
|
+
bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
|
100
|
+
use_bias: hash[:use_bias],
|
101
|
+
strides: hash[:strides],
|
102
|
+
padding: hash[:padding])
|
103
|
+
end
|
104
|
+
|
105
|
+
# @param [Integer] num_filters Number of filters.
|
106
|
+
# @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
|
107
|
+
# @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
|
108
|
+
# @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
|
109
|
+
def initialize(num_filters, filter_size,
|
110
|
+
weight_initializer: Initializers::RandomNormal.new,
|
111
|
+
bias_initializer: Initializers::Zeros.new,
|
112
|
+
weight_regularizer: nil,
|
113
|
+
bias_regularizer: nil,
|
114
|
+
use_bias: true,
|
115
|
+
strides: 1,
|
116
|
+
padding: false)
|
117
|
+
super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
|
118
|
+
weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
|
119
|
+
@num_filters = num_filters
|
120
|
+
@filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
|
121
|
+
@strides = strides.is_a?(Integer) ? [strides, strides] : strides
|
122
|
+
@padding = padding.is_a?(Integer) ? [padding, padding] : padding
|
123
|
+
end
|
124
|
+
|
125
|
+
def build(input_shape)
|
126
|
+
super
|
127
|
+
prev_h, prev_w, num_prev_filter = *input_shape
|
128
|
+
@weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
|
129
|
+
@bias.data = Xumo::SFloat.new(@num_filters) if @bias
|
130
|
+
init_weight_and_bias
|
131
|
+
if @padding == true
|
132
|
+
out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
|
133
|
+
@pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
|
134
|
+
elsif @padding.is_a?(Array)
|
135
|
+
@pad_size = @padding
|
136
|
+
else
|
137
|
+
@pad_size = [0, 0]
|
138
|
+
end
|
139
|
+
@out_size = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
|
140
|
+
end
|
141
|
+
|
142
|
+
def forward(x)
|
143
|
+
x = zero_padding(x, @pad_size) if @padding
|
144
|
+
@x_shape = x.shape
|
145
|
+
@col = im2col(x, *@out_size, *@filter_size, @strides)
|
146
|
+
y = @col.dot(@weight.data)
|
147
|
+
y += @bias.data if @bias
|
148
|
+
y.reshape(x.shape[0], *@out_size, y.shape[3])
|
149
|
+
end
|
150
|
+
|
151
|
+
def backward(dy)
|
152
|
+
dy = dy.reshape(dy.shape[0..2].reduce(:*), dy.shape[3])
|
153
|
+
if @trainable
|
154
|
+
@weight.grad += @col.transpose.dot(dy)
|
155
|
+
@bias.grad += dy.sum(0) if @bias
|
156
|
+
end
|
157
|
+
dcol = dy.dot(@weight.data.transpose)
|
158
|
+
dx = col2im(dcol, @x_shape, *@out_size, *@filter_size, @strides)
|
159
|
+
@padding ? zero_padding_bwd(dx, @pad_size) : dx
|
160
|
+
end
|
161
|
+
|
162
|
+
def output_shape
|
163
|
+
[*@out_size, @num_filters]
|
164
|
+
end
|
165
|
+
|
166
|
+
# @return [Numo::SFloat] Convert weight to filter and return.
|
167
|
+
def filters
|
168
|
+
num_prev_filter = @input_shape[2]
|
169
|
+
@weight.data.reshape(*@filter_size, num_prev_filter, @num_filters)
|
170
|
+
end
|
171
|
+
|
172
|
+
# @param [Numo::SFloat] filters Convert weight to filters and set.
|
173
|
+
def filters=(filters)
|
174
|
+
num_prev_filter = @input_shape[2]
|
175
|
+
@weight.data = filters.reshape(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
|
176
|
+
end
|
177
|
+
|
178
|
+
def to_hash
|
179
|
+
super({num_filters: @num_filters,
|
180
|
+
filter_size: @filter_size,
|
181
|
+
strides: @strides,
|
182
|
+
padding: @padding})
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
class Conv2D_Transpose < Connection
|
188
|
+
include Conv2D_Utils
|
189
|
+
|
190
|
+
# @return [Integer] number of filters.
|
191
|
+
attr_reader :num_filters
|
192
|
+
# @return [Array] Return filter size. filter size is of the form [height, width].
|
193
|
+
attr_reader :filter_size
|
194
|
+
# @return [Array] Return stride length. stride length is of the form [height, width].
|
195
|
+
attr_reader :strides
|
196
|
+
# @return [Array] Return padding size.
|
197
|
+
attr_reader :padding
|
198
|
+
|
199
|
+
def self.from_hash(hash)
|
200
|
+
Conv2D_Transpose.new(hash[:num_filters], hash[:filter_size],
|
201
|
+
weight_initializer: Utils.from_hash(hash[:weight_initializer]),
|
202
|
+
bias_initializer: Utils.from_hash(hash[:bias_initializer]),
|
203
|
+
weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
|
204
|
+
bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
|
205
|
+
use_bias: hash[:use_bias],
|
206
|
+
strides: hash[:strides],
|
207
|
+
padding: hash[:padding])
|
208
|
+
end
|
209
|
+
|
210
|
+
# @param [Integer] num_filters Number of filters.
|
211
|
+
# @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
|
212
|
+
# @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
|
213
|
+
# @param [Array] padding Padding size. Padding size is of the form [height, width].
|
214
|
+
def initialize(num_filters, filter_size,
|
215
|
+
weight_initializer: Initializers::RandomNormal.new,
|
216
|
+
bias_initializer: Initializers::Zeros.new,
|
217
|
+
weight_regularizer: nil,
|
218
|
+
bias_regularizer: nil,
|
219
|
+
use_bias: true,
|
220
|
+
strides: 1,
|
221
|
+
padding: false)
|
222
|
+
super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
|
223
|
+
weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
|
224
|
+
@num_filters = num_filters
|
225
|
+
@filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
|
226
|
+
@strides = strides.is_a?(Integer) ? [strides, strides] : strides
|
227
|
+
@padding = padding.is_a?(Integer) ? [padding, padding] : padding
|
228
|
+
end
|
229
|
+
|
230
|
+
def build(input_shape)
|
231
|
+
super
|
232
|
+
prev_h, prev_w, num_prev_filter = *input_shape
|
233
|
+
@weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
|
234
|
+
@weight_initializer.init_param(self, @weight)
|
235
|
+
@weight_regularizer.param = @weight if @weight_regularizer
|
236
|
+
if @bias
|
237
|
+
@bias.data = Xumo::SFloat.new(@num_filters)
|
238
|
+
@bias_initializer.init_param(self, @bias)
|
239
|
+
@bias_regularizer.param = @bias if @bias_regularizer
|
240
|
+
end
|
241
|
+
if @padding == true
|
242
|
+
out_h, out_w = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
|
243
|
+
@pad_size = calc_padding_size(out_h, out_w, prev_h, prev_w, @strides)
|
244
|
+
elsif @padding.is_a?(Array)
|
245
|
+
@pad_size = @padding
|
246
|
+
else
|
247
|
+
@pad_size = [0, 0]
|
248
|
+
end
|
249
|
+
@out_size = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
|
250
|
+
end
|
251
|
+
|
252
|
+
def forward(x)
|
253
|
+
bsize = x.shape[0]
|
254
|
+
x = x.reshape(x.shape[0..2].reduce(:*), x.shape[3])
|
255
|
+
@x = x
|
256
|
+
col = x.dot(@weight.data.transpose)
|
257
|
+
img_shape = [bsize, @out_size[0] + @pad_size[0], @out_size[1] + @pad_size[1], @num_filters]
|
258
|
+
y = col2im(col, img_shape, *input_shape[0..1], *@filter_size, @strides)
|
259
|
+
y += @bias.data if @bias
|
260
|
+
@padding ? zero_padding_bwd(y, @pad_size) : y
|
261
|
+
end
|
262
|
+
|
263
|
+
def backward(dy)
|
264
|
+
dy = zero_padding(dy, @pad_size) if @padding
|
265
|
+
col = im2col(dy, *input_shape[0..1], *@filter_size, @strides)
|
266
|
+
if @trainable
|
267
|
+
@weight.grad += col.transpose.dot(@x)
|
268
|
+
@bias.grad += col.reshape(col.shape[0] * @filter_size.reduce(:*), @num_filters).sum(0) if @bias
|
269
|
+
end
|
270
|
+
dx = col.dot(@weight.data)
|
271
|
+
dx.reshape(dy.shape[0], *input_shape)
|
272
|
+
end
|
273
|
+
|
274
|
+
def output_shape
|
275
|
+
[*@out_size, @num_filters]
|
276
|
+
end
|
277
|
+
|
278
|
+
# @return [Numo::SFloat] Convert weight to filter and return.
|
279
|
+
def filters
|
280
|
+
num_prev_filter = @input_shape[2]
|
281
|
+
@weight.data.reshape(*@filter_size, @num_filters, num_prev_filter)
|
282
|
+
end
|
283
|
+
|
284
|
+
# @param [Numo::SFloat] filters Convert weight to filters and set.
|
285
|
+
def filters=(filters)
|
286
|
+
num_prev_filter = @input_shape[2]
|
287
|
+
@weight.data = filters.reshape(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
|
288
|
+
end
|
289
|
+
|
290
|
+
def to_hash
|
291
|
+
super({num_filters: @num_filters,
|
292
|
+
filter_size: @filter_size,
|
293
|
+
strides: @strides,
|
294
|
+
padding: @padding})
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
|
299
|
+
# Super class of all pooling2D class.
|
300
|
+
class Pool2D < Layer
|
301
|
+
include Conv2D_Utils
|
302
|
+
|
303
|
+
# @return [Array] Return pooling size. Pooling size is of the form [height, width].
|
304
|
+
attr_reader :pool_size
|
305
|
+
# @return [Array] Return stride length. Stride length is of the form [height, width].
|
306
|
+
attr_reader :strides
|
307
|
+
# @return [Array | Bool] Return padding size or whether to padding.
|
308
|
+
attr_reader :padding
|
309
|
+
|
310
|
+
def self.from_hash(pool2d_class, hash)
|
311
|
+
pool2d_class.new(hash[:pool_size], strides: hash[:strides], padding: hash[:padding])
|
312
|
+
end
|
313
|
+
|
314
|
+
# @param [Array | Integer] pool_size Pooling size. Pooling size is of the form [height, width].
|
315
|
+
# @param [Array | Integer | NilClass] strides stride length. Stride length is of the form [height, width].
|
316
|
+
# If you set nil, treat pool_size as strides.
|
317
|
+
# @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
|
318
|
+
def initialize(pool_size, strides: nil, padding: false)
|
319
|
+
super()
|
320
|
+
@pool_size = pool_size.is_a?(Integer) ? [pool_size, pool_size] : pool_size
|
321
|
+
@strides = if strides
|
322
|
+
strides.is_a?(Integer) ? [strides, strides] : strides
|
323
|
+
else
|
324
|
+
@pool_size.clone
|
325
|
+
end
|
326
|
+
@padding = padding.is_a?(Integer) ? [padding, padding] : padding
|
327
|
+
end
|
328
|
+
|
329
|
+
def build(input_shape)
|
330
|
+
super
|
331
|
+
prev_h, prev_w = input_shape[0..1]
|
332
|
+
@num_channel = input_shape[2]
|
333
|
+
if @padding == true
|
334
|
+
out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, 0, 0, @strides)
|
335
|
+
@pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
|
336
|
+
elsif @padding.is_a?(Array)
|
337
|
+
@pad_size = @padding
|
338
|
+
else
|
339
|
+
@pad_size = [0, 0]
|
340
|
+
end
|
341
|
+
@out_size = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, *@pad_size, @strides)
|
342
|
+
end
|
343
|
+
|
344
|
+
def output_shape
|
345
|
+
[*@out_size, @num_channel]
|
346
|
+
end
|
347
|
+
|
348
|
+
def to_hash
|
349
|
+
super({pool_size: @pool_size,
|
350
|
+
strides: @strides,
|
351
|
+
padding: @padding})
|
352
|
+
end
|
353
|
+
end
|
354
|
+
|
355
|
+
|
356
|
+
class MaxPool2D < Pool2D
|
357
|
+
def self.from_hash(hash)
|
358
|
+
Pool2D.from_hash(self, hash)
|
359
|
+
end
|
360
|
+
|
361
|
+
def forward(x)
|
362
|
+
x = zero_padding(x, @pad_size) if @padding
|
363
|
+
@x_shape = x.shape
|
364
|
+
col = im2col(x, *@out_size, *@pool_size, @strides)
|
365
|
+
col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
|
366
|
+
.reshape(x.shape[0] * @out_size.reduce(:*) * x.shape[3], @pool_size.reduce(:*))
|
367
|
+
@max_index = col.max_index(1)
|
368
|
+
col.max(1).reshape(x.shape[0], *@out_size, x.shape[3])
|
369
|
+
end
|
370
|
+
|
371
|
+
def backward(dy)
|
372
|
+
dmax = Xumo::SFloat.zeros(dy.size * @pool_size.reduce(:*))
|
373
|
+
dmax[@max_index] = dy.flatten
|
374
|
+
dcol = dmax.reshape(dy.shape[0..2].reduce(:*), @pool_size.reduce(:*) * dy.shape[3])
|
375
|
+
dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
|
376
|
+
@padding ? zero_padding_bwd(dx, @pad_size) : dx
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
|
381
|
+
class AvgPool2D < Pool2D
|
382
|
+
def self.from_hash(hash)
|
383
|
+
Pool2D.from_hash(self, hash)
|
384
|
+
end
|
385
|
+
|
386
|
+
def forward(x)
|
387
|
+
x = zero_padding(x, @pad_size) if @padding
|
388
|
+
@x_shape = x.shape
|
389
|
+
col = im2col(x, *@out_size, *@pool_size, @strides)
|
390
|
+
col = col.reshape(x.shape[0] * @out_size.reduce(:*), @pool_size.reduce(:*), x.shape[3]).transpose(0, 2, 1)
|
391
|
+
.reshape(x.shape[0] * @out_size.reduce(:*) * x.shape[3], @pool_size.reduce(:*))
|
392
|
+
col.mean(1).reshape(x.shape[0], *@out_size, x.shape[3])
|
393
|
+
end
|
394
|
+
|
395
|
+
def backward(dy)
|
396
|
+
row_length = @pool_size.reduce(:*)
|
397
|
+
dy /= row_length
|
398
|
+
davg = Xumo::SFloat.zeros(dy.size, row_length)
|
399
|
+
row_length.times do |i|
|
400
|
+
davg[true, i] = dy.flatten
|
401
|
+
end
|
402
|
+
dcol = davg.reshape(dy.shape[0..2].reduce(:*), dy.shape[3] * @pool_size.reduce(:*))
|
403
|
+
dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
|
404
|
+
@padding ? zero_padding_bwd(dx, @pad_size) : dx
|
405
|
+
end
|
406
|
+
end
|
407
|
+
|
408
|
+
|
409
|
+
class UnPool2D < Layer
|
410
|
+
include Conv2D_Utils
|
411
|
+
|
412
|
+
# @return [Array] Return unpooling size. unpooling size is of the form [height, width].
|
413
|
+
attr_reader :unpool_size
|
414
|
+
|
415
|
+
# @param [Array or Integer] unpool_size Unpooling size. unpooling size is of the form [height, width].
|
416
|
+
def initialize(unpool_size)
|
417
|
+
super()
|
418
|
+
@unpool_size = unpool_size.is_a?(Integer) ? [unpool_size, unpool_size] : unpool_size
|
419
|
+
end
|
420
|
+
|
421
|
+
def self.from_hash(hash)
|
422
|
+
UnPool2D.new(hash[:unpool_size])
|
423
|
+
end
|
424
|
+
|
425
|
+
def build(input_shape)
|
426
|
+
super
|
427
|
+
prev_h, prev_w = input_shape[0..1]
|
428
|
+
unpool_h, unpool_w = @unpool_size
|
429
|
+
out_h = prev_h * unpool_h
|
430
|
+
out_w = prev_w * unpool_w
|
431
|
+
@out_size = [out_h, out_w]
|
432
|
+
@num_channel = input_shape[2]
|
433
|
+
end
|
434
|
+
|
435
|
+
def forward(x)
|
436
|
+
@x_shape = x.shape
|
437
|
+
unpool_h, unpool_w = @unpool_size
|
438
|
+
x2 = Xumo::SFloat.zeros(x.shape[0], x.shape[1], unpool_h, x.shape[2], unpool_w, @num_channel)
|
439
|
+
unpool_h.times do |i|
|
440
|
+
unpool_w.times do |j|
|
441
|
+
x2[true, true, i, true, j, true] = x
|
442
|
+
end
|
443
|
+
end
|
444
|
+
x2.reshape(x.shape[0], *@out_size, x.shape[3])
|
445
|
+
end
|
446
|
+
|
447
|
+
def backward(dy)
|
448
|
+
in_size = input_shape[0..1]
|
449
|
+
col = im2col(dy, *input_shape[0..1], *@unpool_size, @unpool_size)
|
450
|
+
col = col.reshape(dy.shape[0] * in_size.reduce(:*), @unpool_size.reduce(:*), dy.shape[3]).transpose(0, 2, 1)
|
451
|
+
.reshape(dy.shape[0] * in_size.reduce(:*) * dy.shape[3], @unpool_size.reduce(:*))
|
452
|
+
col.sum(1).reshape(dy.shape[0], *in_size, dy.shape[3])
|
453
|
+
end
|
454
|
+
|
455
|
+
def output_shape
|
456
|
+
[*@out_size, @num_channel]
|
457
|
+
end
|
458
|
+
|
459
|
+
def to_hash
|
460
|
+
super({unpool_size: @unpool_size})
|
461
|
+
end
|
462
|
+
end
|
463
|
+
end
|
464
|
+
end
|