ruby-dnn 0.10.4 → 0.12.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -2
- data/README.md +33 -6
- data/examples/cifar100_example.rb +3 -3
- data/examples/cifar10_example.rb +3 -3
- data/examples/dcgan/dcgan.rb +112 -0
- data/examples/dcgan/imgen.rb +20 -0
- data/examples/dcgan/train.rb +41 -0
- data/examples/iris_example.rb +3 -6
- data/examples/mnist_conv2d_example.rb +5 -5
- data/examples/mnist_define_by_run.rb +52 -0
- data/examples/mnist_example.rb +3 -3
- data/examples/mnist_lstm_example.rb +3 -3
- data/examples/xor_example.rb +4 -5
- data/ext/rb_stb_image/rb_stb_image.c +103 -0
- data/lib/dnn.rb +10 -10
- data/lib/dnn/cifar10.rb +1 -1
- data/lib/dnn/cifar100.rb +1 -1
- data/lib/dnn/core/activations.rb +21 -22
- data/lib/dnn/core/cnn_layers.rb +94 -111
- data/lib/dnn/core/embedding.rb +30 -9
- data/lib/dnn/core/initializers.rb +31 -21
- data/lib/dnn/core/iterator.rb +52 -0
- data/lib/dnn/core/layers.rb +99 -66
- data/lib/dnn/core/link.rb +24 -0
- data/lib/dnn/core/losses.rb +69 -59
- data/lib/dnn/core/merge_layers.rb +71 -0
- data/lib/dnn/core/models.rb +393 -0
- data/lib/dnn/core/normalizations.rb +27 -14
- data/lib/dnn/core/optimizers.rb +212 -134
- data/lib/dnn/core/param.rb +8 -6
- data/lib/dnn/core/regularizers.rb +10 -7
- data/lib/dnn/core/rnn_layers.rb +78 -85
- data/lib/dnn/core/utils.rb +6 -3
- data/lib/dnn/downloader.rb +3 -3
- data/lib/dnn/fashion-mnist.rb +89 -0
- data/lib/dnn/image.rb +57 -18
- data/lib/dnn/iris.rb +1 -3
- data/lib/dnn/mnist.rb +38 -34
- data/lib/dnn/version.rb +1 -1
- data/third_party/stb_image.h +16 -4
- data/third_party/stb_image_resize.h +2630 -0
- data/third_party/stb_image_write.h +4 -7
- metadata +12 -4
- data/lib/dnn/core/dataset.rb +0 -34
- data/lib/dnn/core/model.rb +0 -440
@@ -7,7 +7,7 @@ include DNN::Layers
|
|
7
7
|
include DNN::Activations
|
8
8
|
include DNN::Optimizers
|
9
9
|
include DNN::Losses
|
10
|
-
|
10
|
+
include DNN::Models
|
11
11
|
MNIST = DNN::MNIST
|
12
12
|
|
13
13
|
x_train, y_train = MNIST.load_train
|
@@ -22,7 +22,7 @@ x_test /= 255
|
|
22
22
|
y_train = DNN::Utils.to_categorical(y_train, 10, Numo::SFloat)
|
23
23
|
y_test = DNN::Utils.to_categorical(y_test, 10, Numo::SFloat)
|
24
24
|
|
25
|
-
model =
|
25
|
+
model = Sequential.new
|
26
26
|
|
27
27
|
model << InputLayer.new([28, 28])
|
28
28
|
|
@@ -31,6 +31,6 @@ model << LSTM.new(200, return_sequences: false)
|
|
31
31
|
|
32
32
|
model << Dense.new(10)
|
33
33
|
|
34
|
-
model.
|
34
|
+
model.setup(Adam.new, SoftmaxCrossEntropy.new)
|
35
35
|
|
36
36
|
model.train(x_train, y_train, 10, batch_size: 100, test: [x_test, y_test])
|
data/examples/xor_example.rb
CHANGED
@@ -4,21 +4,20 @@ include DNN::Layers
|
|
4
4
|
include DNN::Activations
|
5
5
|
include DNN::Optimizers
|
6
6
|
include DNN::Losses
|
7
|
-
|
8
|
-
Utils = DNN::Utils
|
7
|
+
include DNN::Models
|
9
8
|
|
10
9
|
x = Numo::SFloat[[0, 0], [1, 0], [0, 1], [1, 1]]
|
11
10
|
y = Numo::SFloat[[0], [1], [1], [0]]
|
12
11
|
|
13
|
-
model =
|
12
|
+
model = Sequential.new
|
14
13
|
|
15
14
|
model << InputLayer.new(2)
|
16
15
|
model << Dense.new(16)
|
17
16
|
model << ReLU.new
|
18
17
|
model << Dense.new(1)
|
19
18
|
|
20
|
-
model.
|
19
|
+
model.setup(SGD.new, SigmoidCrossEntropy.new)
|
21
20
|
|
22
21
|
model.train(x, y, 20000, batch_size: 4, verbose: false)
|
23
22
|
|
24
|
-
p Utils.sigmoid(model.predict(x))
|
23
|
+
p DNN::Utils.sigmoid(model.predict(x))
|
@@ -3,9 +3,11 @@
|
|
3
3
|
|
4
4
|
#define STB_IMAGE_IMPLEMENTATION
|
5
5
|
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
6
|
+
#define STB_IMAGE_RESIZE_IMPLEMENTATION
|
6
7
|
|
7
8
|
#include "../../third_party/stb_image.h"
|
8
9
|
#include "../../third_party/stb_image_write.h"
|
10
|
+
#include "../../third_party/stb_image_resize.h"
|
9
11
|
|
10
12
|
// STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp);
|
11
13
|
static VALUE rb_stbi_load(VALUE self, VALUE rb_filename, VALUE rb_req_comp) {
|
@@ -94,14 +96,115 @@ static VALUE rb_stbi_write_jpg(VALUE self, VALUE rb_filename, VALUE rb_w, VALUE
|
|
94
96
|
return INT2FIX(result);
|
95
97
|
}
|
96
98
|
|
99
|
+
// STBIRDEF int stbir_resize_uint8( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
|
100
|
+
// unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
|
101
|
+
// int num_channels);
|
102
|
+
static VALUE rb_stbir_resize_uint8(VALUE self, VALUE rb_input_pixels, VALUE rb_input_w, VALUE rb_input_h, VALUE rb_input_stride_in_bytes,
|
103
|
+
VALUE rb_output_w, VALUE rb_output_h, VALUE rb_output_stride_in_bytes, VALUE rb_num_channels) {
|
104
|
+
uint8_t* input_pixels = (uint8_t*)StringValuePtr(rb_input_pixels);
|
105
|
+
int32_t input_w = FIX2INT(rb_input_w);
|
106
|
+
int32_t input_h = FIX2INT(rb_input_h);
|
107
|
+
int32_t input_stride_in_bytes = FIX2INT(rb_input_stride_in_bytes);
|
108
|
+
int32_t output_w = FIX2INT(rb_output_w);
|
109
|
+
int32_t output_h = FIX2INT(rb_output_h);
|
110
|
+
int32_t output_stride_in_bytes = FIX2INT(rb_output_stride_in_bytes);
|
111
|
+
int32_t num_channels = FIX2INT(rb_num_channels);
|
112
|
+
uint8_t* output_pixels;
|
113
|
+
VALUE rb_output_pixels;
|
114
|
+
int32_t result;
|
115
|
+
const int32_t output_size = output_h * output_w * num_channels;
|
116
|
+
|
117
|
+
output_pixels = (uint8_t*)malloc(output_size);
|
118
|
+
result = stbir_resize_uint8(input_pixels, input_w, input_h, input_stride_in_bytes,
|
119
|
+
output_pixels, output_w, output_h, output_stride_in_bytes, num_channels);
|
120
|
+
rb_output_pixels = rb_str_new((char*)output_pixels, output_size);
|
121
|
+
free(output_pixels);
|
122
|
+
return rb_ary_new3(2, rb_output_pixels, INT2FIX(result));
|
123
|
+
}
|
124
|
+
|
125
|
+
// STBIRDEF int stbir_resize_uint8_srgb(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
|
126
|
+
// unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
|
127
|
+
// int num_channels, int alpha_channel, int flags);
|
128
|
+
static VALUE rb_stbir_resize_uint8_srgb(VALUE self, VALUE rb_input_pixels, VALUE rb_input_w, VALUE rb_input_h, VALUE rb_input_stride_in_bytes,
|
129
|
+
VALUE rb_output_w, VALUE rb_output_h, VALUE rb_output_stride_in_bytes, VALUE rb_num_channels,
|
130
|
+
VALUE rb_alpha_channel, VALUE rb_flags) {
|
131
|
+
uint8_t* input_pixels = (uint8_t*)StringValuePtr(rb_input_pixels);
|
132
|
+
int32_t input_w = FIX2INT(rb_input_w);
|
133
|
+
int32_t input_h = FIX2INT(rb_input_h);
|
134
|
+
int32_t input_stride_in_bytes = FIX2INT(rb_input_stride_in_bytes);
|
135
|
+
int32_t output_w = FIX2INT(rb_output_w);
|
136
|
+
int32_t output_h = FIX2INT(rb_output_h);
|
137
|
+
int32_t output_stride_in_bytes = FIX2INT(rb_output_stride_in_bytes);
|
138
|
+
int32_t num_channels = FIX2INT(rb_num_channels);
|
139
|
+
int32_t alpha_channel = FIX2INT(rb_alpha_channel);
|
140
|
+
int32_t flags = FIX2INT(rb_flags);
|
141
|
+
uint8_t* output_pixels;
|
142
|
+
VALUE rb_output_pixels;
|
143
|
+
int32_t result;
|
144
|
+
const int32_t output_size = output_h * output_w * num_channels;
|
145
|
+
|
146
|
+
output_pixels = (uint8_t*)malloc(output_size);
|
147
|
+
result = stbir_resize_uint8_srgb(input_pixels, input_w, input_h, input_stride_in_bytes,
|
148
|
+
output_pixels, output_w, output_h, output_stride_in_bytes,
|
149
|
+
num_channels, alpha_channel, flags);
|
150
|
+
rb_output_pixels = rb_str_new((char*)output_pixels, output_size);
|
151
|
+
free(output_pixels);
|
152
|
+
return rb_ary_new3(2, rb_output_pixels, INT2FIX(result));
|
153
|
+
}
|
154
|
+
|
155
|
+
// STBIRDEF int stbir_resize_uint8_srgb_edgemode(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
|
156
|
+
// unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
|
157
|
+
// int num_channels, int alpha_channel, int flags,
|
158
|
+
// stbir_edge edge_wrap_mode);
|
159
|
+
static VALUE rb_stbir_resize_uint8_srgb_edgemode(VALUE self, VALUE rb_input_pixels, VALUE rb_input_w, VALUE rb_input_h, VALUE rb_input_stride_in_bytes,
|
160
|
+
VALUE rb_output_w, VALUE rb_output_h, VALUE rb_output_stride_in_bytes, VALUE rb_num_channels,
|
161
|
+
VALUE rb_alpha_channel, VALUE rb_flags, VALUE rb_edge_wrap_mode) {
|
162
|
+
uint8_t* input_pixels = (uint8_t*)StringValuePtr(rb_input_pixels);
|
163
|
+
int32_t input_w = FIX2INT(rb_input_w);
|
164
|
+
int32_t input_h = FIX2INT(rb_input_h);
|
165
|
+
int32_t input_stride_in_bytes = FIX2INT(rb_input_stride_in_bytes);
|
166
|
+
int32_t output_w = FIX2INT(rb_output_w);
|
167
|
+
int32_t output_h = FIX2INT(rb_output_h);
|
168
|
+
int32_t output_stride_in_bytes = FIX2INT(rb_output_stride_in_bytes);
|
169
|
+
int32_t num_channels = FIX2INT(rb_num_channels);
|
170
|
+
int32_t alpha_channel = FIX2INT(rb_alpha_channel);
|
171
|
+
int32_t flags = FIX2INT(rb_flags);
|
172
|
+
stbir_edge edge_wrap_mode = (stbir_edge)FIX2INT(rb_edge_wrap_mode);
|
173
|
+
uint8_t* output_pixels;
|
174
|
+
VALUE rb_output_pixels;
|
175
|
+
int32_t result;
|
176
|
+
const int32_t output_size = output_h * output_w * num_channels;
|
177
|
+
|
178
|
+
output_pixels = (uint8_t*)malloc(output_size);
|
179
|
+
result = stbir_resize_uint8_srgb_edgemode(input_pixels, input_w, input_h, input_stride_in_bytes,
|
180
|
+
output_pixels, output_w, output_h, output_stride_in_bytes,
|
181
|
+
num_channels, alpha_channel, flags, edge_wrap_mode);
|
182
|
+
rb_output_pixels = rb_str_new((char*)output_pixels, output_size);
|
183
|
+
free(output_pixels);
|
184
|
+
return rb_ary_new3(2, rb_output_pixels, INT2FIX(result));
|
185
|
+
}
|
186
|
+
|
97
187
|
void Init_rb_stb_image() {
|
98
188
|
VALUE rb_dnn = rb_define_module("DNN");
|
99
189
|
VALUE rb_stb = rb_define_module_under(rb_dnn, "Stb");
|
100
190
|
|
191
|
+
rb_define_const(rb_stb, "STBIR_ALPHA_CHANNEL_NONE", INT2FIX(STBIR_ALPHA_CHANNEL_NONE));
|
192
|
+
|
193
|
+
rb_define_const(rb_stb, "STBIR_FLAG_ALPHA_PREMULTIPLIED", INT2FIX(STBIR_FLAG_ALPHA_PREMULTIPLIED));
|
194
|
+
rb_define_const(rb_stb, "STBIR_FLAG_ALPHA_USES_COLORSPACE", INT2FIX(STBIR_FLAG_ALPHA_USES_COLORSPACE));
|
195
|
+
|
196
|
+
rb_define_const(rb_stb, "STBIR_EDGE_CLAMP", INT2FIX(STBIR_EDGE_CLAMP));
|
197
|
+
rb_define_const(rb_stb, "STBIR_EDGE_REFLECT", INT2FIX(STBIR_EDGE_REFLECT));
|
198
|
+
rb_define_const(rb_stb, "STBIR_EDGE_WRAP", INT2FIX(STBIR_EDGE_WRAP));
|
199
|
+
rb_define_const(rb_stb, "STBIR_EDGE_ZERO", INT2FIX(STBIR_EDGE_ZERO));
|
200
|
+
|
101
201
|
rb_define_module_function(rb_stb, "stbi_load", rb_stbi_load, 2);
|
102
202
|
rb_define_module_function(rb_stb, "stbi_write_png", rb_stbi_write_png, 6);
|
103
203
|
rb_define_module_function(rb_stb, "stbi_write_bmp", rb_stbi_write_bmp, 5);
|
104
204
|
rb_define_module_function(rb_stb, "stbi_write_tga", rb_stbi_write_tga, 5);
|
105
205
|
rb_define_module_function(rb_stb, "stbi_write_hdr", rb_stbi_write_hdr, 5);
|
106
206
|
rb_define_module_function(rb_stb, "stbi_write_jpg", rb_stbi_write_jpg, 6);
|
207
|
+
rb_define_module_function(rb_stb, "stbir_resize_uint8", rb_stbir_resize_uint8, 8);
|
208
|
+
rb_define_module_function(rb_stb, "stbir_resize_uint8_srgb", rb_stbir_resize_uint8_srgb, 10);
|
209
|
+
rb_define_module_function(rb_stb, "stbir_resize_uint8_srgb_edgemode", rb_stbir_resize_uint8_srgb_edgemode, 11);
|
107
210
|
}
|
data/lib/dnn.rb
CHANGED
@@ -1,23 +1,23 @@
|
|
1
|
-
if defined? Cumo
|
2
|
-
Xumo = Cumo
|
3
|
-
else
|
4
|
-
require "numo/narray"
|
5
|
-
Xumo = Numo
|
6
|
-
end
|
7
|
-
|
8
1
|
module DNN
|
9
|
-
|
2
|
+
if defined? ::Cumo
|
3
|
+
Xumo = ::Cumo
|
4
|
+
else
|
5
|
+
require "numo/narray"
|
6
|
+
Xumo = ::Numo
|
7
|
+
end
|
10
8
|
end
|
11
9
|
|
12
10
|
require_relative "dnn/version"
|
13
11
|
require_relative "dnn/core/error"
|
14
|
-
require_relative "dnn/core/
|
12
|
+
require_relative "dnn/core/models"
|
15
13
|
require_relative "dnn/core/param"
|
16
|
-
require_relative "dnn/core/
|
14
|
+
require_relative "dnn/core/link"
|
15
|
+
require_relative "dnn/core/iterator"
|
17
16
|
require_relative "dnn/core/initializers"
|
18
17
|
require_relative "dnn/core/layers"
|
19
18
|
require_relative "dnn/core/normalizations"
|
20
19
|
require_relative "dnn/core/activations"
|
20
|
+
require_relative "dnn/core/merge_layers"
|
21
21
|
require_relative "dnn/core/losses"
|
22
22
|
require_relative "dnn/core/regularizers"
|
23
23
|
require_relative "dnn/core/cnn_layers"
|
data/lib/dnn/cifar10.rb
CHANGED
@@ -16,7 +16,7 @@ module DNN
|
|
16
16
|
cifar10_binary_file_name = __dir__ + "/downloads/" + URL_CIFAR10.match(%r`.+/(.+)`)[1]
|
17
17
|
begin
|
18
18
|
Zlib::GzipReader.open(cifar10_binary_file_name) do |gz|
|
19
|
-
Archive::Tar::Minitar
|
19
|
+
Archive::Tar::Minitar.unpack(gz, __dir__ + "/downloads")
|
20
20
|
end
|
21
21
|
ensure
|
22
22
|
File.unlink(cifar10_binary_file_name)
|
data/lib/dnn/cifar100.rb
CHANGED
@@ -16,7 +16,7 @@ module DNN
|
|
16
16
|
cifar100_binary_file_name = __dir__ + "/downloads/" + URL_CIFAR100.match(%r`.+/(.+)`)[1]
|
17
17
|
begin
|
18
18
|
Zlib::GzipReader.open(cifar100_binary_file_name) do |gz|
|
19
|
-
Archive::Tar::Minitar
|
19
|
+
Archive::Tar::Minitar.unpack(gz, __dir__ + "/downloads")
|
20
20
|
end
|
21
21
|
ensure
|
22
22
|
File.unlink(cifar100_binary_file_name)
|
data/lib/dnn/core/activations.rb
CHANGED
@@ -3,9 +3,9 @@ module DNN
|
|
3
3
|
|
4
4
|
class Sigmoid < Layers::Layer
|
5
5
|
def forward(x)
|
6
|
-
@y = 1 / (1 + NMath.exp(-x))
|
6
|
+
@y = 1 / (1 + Xumo::NMath.exp(-x))
|
7
7
|
end
|
8
|
-
|
8
|
+
|
9
9
|
def backward(dy)
|
10
10
|
dy * (1 - @y) * @y
|
11
11
|
end
|
@@ -14,11 +14,11 @@ module DNN
|
|
14
14
|
|
15
15
|
class Tanh < Layers::Layer
|
16
16
|
def forward(x)
|
17
|
-
@y = NMath.tanh(x)
|
17
|
+
@y = Xumo::NMath.tanh(x)
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
def backward(dy)
|
21
|
-
dy * (1 - @y**2)
|
21
|
+
dy * (1 - @y ** 2)
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
@@ -30,7 +30,7 @@ module DNN
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def backward(dy)
|
33
|
-
dy * (1 / (1 + @x.abs)**2)
|
33
|
+
dy * (1 / (1 + @x.abs) ** 2)
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
@@ -38,11 +38,11 @@ module DNN
|
|
38
38
|
class Softplus < Layers::Layer
|
39
39
|
def forward(x)
|
40
40
|
@x = x
|
41
|
-
NMath.log(1 + NMath.exp(x))
|
41
|
+
Xumo::NMath.log(1 + Xumo::NMath.exp(x))
|
42
42
|
end
|
43
43
|
|
44
44
|
def backward(dy)
|
45
|
-
dy * (1 / (1 + NMath.exp(-@x)))
|
45
|
+
dy * (1 / (1 + Xumo::NMath.exp(-@x)))
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
@@ -50,22 +50,21 @@ module DNN
|
|
50
50
|
class Swish < Layers::Layer
|
51
51
|
def forward(x)
|
52
52
|
@x = x
|
53
|
-
@y = x * (1 / (1 + NMath.exp(-x)))
|
53
|
+
@y = x * (1 / (1 + Xumo::NMath.exp(-x)))
|
54
54
|
end
|
55
|
-
|
55
|
+
|
56
56
|
def backward(dy)
|
57
|
-
dy * (@y + (1 / (1 + NMath.exp(-@x))) * (1 - @y))
|
57
|
+
dy * (@y + (1 / (1 + Xumo::NMath.exp(-@x))) * (1 - @y))
|
58
58
|
end
|
59
59
|
end
|
60
|
-
|
61
|
-
|
60
|
+
|
61
|
+
|
62
62
|
class ReLU < Layers::Layer
|
63
63
|
def forward(x)
|
64
64
|
@x = x
|
65
|
-
|
66
|
-
x
|
65
|
+
Xumo::SFloat.maximum(0, x)
|
67
66
|
end
|
68
|
-
|
67
|
+
|
69
68
|
def backward(dy)
|
70
69
|
dx = Xumo::SFloat.ones(@x.shape)
|
71
70
|
dx[@x <= 0] = 0
|
@@ -75,7 +74,6 @@ module DNN
|
|
75
74
|
|
76
75
|
|
77
76
|
class LeakyReLU < Layers::Layer
|
78
|
-
# @return [Float] Return the alpha value.
|
79
77
|
attr_reader :alpha
|
80
78
|
|
81
79
|
def self.from_hash(hash)
|
@@ -84,6 +82,7 @@ module DNN
|
|
84
82
|
|
85
83
|
# @param [Float] alpha The slope when the output value is negative.
|
86
84
|
def initialize(alpha = 0.3)
|
85
|
+
super()
|
87
86
|
@alpha = alpha
|
88
87
|
end
|
89
88
|
|
@@ -101,13 +100,12 @@ module DNN
|
|
101
100
|
end
|
102
101
|
|
103
102
|
def to_hash
|
104
|
-
|
103
|
+
super(alpha: @alpha)
|
105
104
|
end
|
106
105
|
end
|
107
106
|
|
108
107
|
|
109
108
|
class ELU < Layers::Layer
|
110
|
-
# @return [Float] Return the alpha value.
|
111
109
|
attr_reader :alpha
|
112
110
|
|
113
111
|
def self.from_hash(hash)
|
@@ -116,6 +114,7 @@ module DNN
|
|
116
114
|
|
117
115
|
# @param [Float] alpha The slope when the output value is negative.
|
118
116
|
def initialize(alpha = 1.0)
|
117
|
+
super()
|
119
118
|
@alpha = alpha
|
120
119
|
end
|
121
120
|
|
@@ -126,7 +125,7 @@ module DNN
|
|
126
125
|
x1 *= x
|
127
126
|
x2 = Xumo::SFloat.zeros(x.shape)
|
128
127
|
x2[x < 0] = 1
|
129
|
-
x2 *= @alpha * NMath.exp(x) - @alpha
|
128
|
+
x2 *= @alpha * Xumo::NMath.exp(x) - @alpha
|
130
129
|
x1 + x2
|
131
130
|
end
|
132
131
|
|
@@ -135,12 +134,12 @@ module DNN
|
|
135
134
|
dx[@x < 0] = 0
|
136
135
|
dx2 = Xumo::SFloat.zeros(@x.shape)
|
137
136
|
dx2[@x < 0] = 1
|
138
|
-
dx2 *= @alpha * NMath.exp(@x)
|
137
|
+
dx2 *= @alpha * Xumo::NMath.exp(@x)
|
139
138
|
dy * (dx + dx2)
|
140
139
|
end
|
141
140
|
|
142
141
|
def to_hash
|
143
|
-
|
142
|
+
super(alpha: @alpha)
|
144
143
|
end
|
145
144
|
end
|
146
145
|
|
data/lib/dnn/core/cnn_layers.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
module DNN
|
2
2
|
module Layers
|
3
|
+
|
3
4
|
# This module is used for convolution.
|
4
|
-
module
|
5
|
+
module Conv2DUtils
|
5
6
|
private
|
6
7
|
|
7
8
|
# img[bsize, out_h, out_w, ch] to col[bsize * out_h * out_w, fil_h * fil_w * ch]
|
@@ -59,53 +60,53 @@ module DNN
|
|
59
60
|
[out_h, out_w]
|
60
61
|
end
|
61
62
|
|
62
|
-
def
|
63
|
+
def calc_conv2d_transpose_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
|
63
64
|
out_h = (prev_h - 1) * strides[0] + fil_h - pad_h
|
64
65
|
out_w = (prev_w - 1) * strides[1] + fil_w - pad_w
|
65
66
|
[out_h, out_w]
|
66
67
|
end
|
67
68
|
|
68
|
-
def
|
69
|
-
|
70
|
-
|
69
|
+
def calc_conv2d_padding_size(prev_h, prev_w, fil_h, fil_w, strides)
|
70
|
+
out_h = prev_h / strides[0]
|
71
|
+
out_w = prev_w / strides[1]
|
72
|
+
pad_h = out_h * strides[0] - prev_h + fil_h - strides[0]
|
73
|
+
pad_w = out_w * strides[1] - prev_w + fil_w - strides[1]
|
71
74
|
[pad_h, pad_w]
|
72
75
|
end
|
73
76
|
|
74
|
-
def
|
75
|
-
|
76
|
-
|
77
|
+
def calc_conv2d_transpose_padding_size(prev_h, prev_w, fil_h, fil_w, strides)
|
78
|
+
out_h = prev_h * strides[0]
|
79
|
+
out_w = prev_w * strides[1]
|
80
|
+
pad_h = (prev_h - 1) * strides[0] + fil_h - out_h
|
81
|
+
pad_w = (prev_w - 1) * strides[1] + fil_w - out_w
|
77
82
|
[pad_h, pad_w]
|
78
83
|
end
|
79
84
|
end
|
80
|
-
|
81
|
-
|
85
|
+
|
86
|
+
|
82
87
|
class Conv2D < Connection
|
83
|
-
include
|
88
|
+
include Conv2DUtils
|
84
89
|
|
85
|
-
# @return [Integer] number of filters.
|
86
90
|
attr_reader :num_filters
|
87
|
-
# @return [Array] Return filter size. filter size is of the form [height, width].
|
88
91
|
attr_reader :filter_size
|
89
|
-
# @return [Array] Return stride length. stride length is of the form [height, width].
|
90
92
|
attr_reader :strides
|
91
|
-
# @return [Array | Bool] Return padding size or whether to padding.
|
92
93
|
attr_reader :padding
|
93
94
|
|
94
95
|
def self.from_hash(hash)
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
end
|
104
|
-
|
96
|
+
self.new(hash[:num_filters], hash[:filter_size],
|
97
|
+
weight_initializer: Utils.hash_to_obj(hash[:weight_initializer]),
|
98
|
+
bias_initializer: Utils.hash_to_obj(hash[:bias_initializer]),
|
99
|
+
weight_regularizer: Utils.hash_to_obj(hash[:weight_regularizer]),
|
100
|
+
bias_regularizer: Utils.hash_to_obj(hash[:bias_regularizer]),
|
101
|
+
use_bias: hash[:use_bias],
|
102
|
+
strides: hash[:strides],
|
103
|
+
padding: hash[:padding])
|
104
|
+
end
|
105
|
+
|
105
106
|
# @param [Integer] num_filters Number of filters.
|
106
107
|
# @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
|
107
108
|
# @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
|
108
|
-
# @param [Array |
|
109
|
+
# @param [Array | Boolean] padding Padding size or whether to padding. Padding size is of the form [height, width].
|
109
110
|
def initialize(num_filters, filter_size,
|
110
111
|
weight_initializer: Initializers::RandomNormal.new,
|
111
112
|
bias_initializer: Initializers::Zeros.new,
|
@@ -127,17 +128,16 @@ module DNN
|
|
127
128
|
raise DNN_ShapeError.new("Input shape is #{input_shape}. But input shape must be 3 dimensional.")
|
128
129
|
end
|
129
130
|
super
|
130
|
-
prev_h, prev_w,
|
131
|
-
@weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) *
|
131
|
+
prev_h, prev_w, num_prev_filters = *input_shape
|
132
|
+
@weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * num_prev_filters, @num_filters)
|
132
133
|
@bias.data = Xumo::SFloat.new(@num_filters) if @bias
|
133
134
|
init_weight_and_bias
|
134
|
-
if @padding == true
|
135
|
-
|
136
|
-
@pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
|
135
|
+
@pad_size = if @padding == true
|
136
|
+
calc_conv2d_padding_size(prev_h, prev_w, *@filter_size, @strides)
|
137
137
|
elsif @padding.is_a?(Array)
|
138
|
-
@
|
138
|
+
@padding
|
139
139
|
else
|
140
|
-
|
140
|
+
[0, 0]
|
141
141
|
end
|
142
142
|
@out_size = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
|
143
143
|
end
|
@@ -168,48 +168,44 @@ module DNN
|
|
168
168
|
|
169
169
|
# @return [Numo::SFloat] Convert weight to filter and return.
|
170
170
|
def filters
|
171
|
-
|
172
|
-
@weight.data.reshape(*@filter_size,
|
171
|
+
num_prev_filters = @input_shape[2]
|
172
|
+
@weight.data.reshape(*@filter_size, num_prev_filters, @num_filters)
|
173
173
|
end
|
174
174
|
|
175
175
|
# @param [Numo::SFloat] filters Convert weight to filters and set.
|
176
176
|
def filters=(filters)
|
177
|
-
|
178
|
-
@weight.data = filters.reshape(@filter_size.reduce(:*) *
|
177
|
+
num_prev_filters = @input_shape[2]
|
178
|
+
@weight.data = filters.reshape(@filter_size.reduce(:*) * num_prev_filters, @num_filters)
|
179
179
|
end
|
180
180
|
|
181
181
|
def to_hash
|
182
|
-
super(
|
183
|
-
|
184
|
-
|
185
|
-
|
182
|
+
super(num_filters: @num_filters,
|
183
|
+
filter_size: @filter_size,
|
184
|
+
strides: @strides,
|
185
|
+
padding: @padding)
|
186
186
|
end
|
187
187
|
end
|
188
188
|
|
189
189
|
|
190
|
-
class
|
191
|
-
include
|
190
|
+
class Conv2DTranspose < Connection
|
191
|
+
include Conv2DUtils
|
192
192
|
|
193
|
-
# @return [Integer] number of filters.
|
194
193
|
attr_reader :num_filters
|
195
|
-
# @return [Array] Return filter size. filter size is of the form [height, width].
|
196
194
|
attr_reader :filter_size
|
197
|
-
# @return [Array] Return stride length. stride length is of the form [height, width].
|
198
195
|
attr_reader :strides
|
199
|
-
# @return [Array] Return padding size.
|
200
196
|
attr_reader :padding
|
201
197
|
|
202
198
|
def self.from_hash(hash)
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
end
|
212
|
-
|
199
|
+
self.new(hash[:num_filters], hash[:filter_size],
|
200
|
+
weight_initializer: Utils.hash_to_obj(hash[:weight_initializer]),
|
201
|
+
bias_initializer: Utils.hash_to_obj(hash[:bias_initializer]),
|
202
|
+
weight_regularizer: Utils.hash_to_obj(hash[:weight_regularizer]),
|
203
|
+
bias_regularizer: Utils.hash_to_obj(hash[:bias_regularizer]),
|
204
|
+
use_bias: hash[:use_bias],
|
205
|
+
strides: hash[:strides],
|
206
|
+
padding: hash[:padding])
|
207
|
+
end
|
208
|
+
|
213
209
|
# @param [Integer] num_filters Number of filters.
|
214
210
|
# @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
|
215
211
|
# @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
|
@@ -235,19 +231,18 @@ module DNN
|
|
235
231
|
raise DNN_ShapeError.new("Input shape is #{input_shape}. But input shape must be 3 dimensional.")
|
236
232
|
end
|
237
233
|
super
|
238
|
-
prev_h, prev_w,
|
239
|
-
@weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * @num_filters,
|
234
|
+
prev_h, prev_w, num_prev_filters = *input_shape
|
235
|
+
@weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * @num_filters, num_prev_filters)
|
240
236
|
@bias.data = Xumo::SFloat.new(@num_filters) if @bias
|
241
237
|
init_weight_and_bias
|
242
|
-
if @padding == true
|
243
|
-
|
244
|
-
@pad_size = calc_padding_size(out_h, out_w, prev_h, prev_w, @strides)
|
238
|
+
@pad_size = if @padding == true
|
239
|
+
calc_conv2d_transpose_padding_size(prev_h, prev_w, *@filter_size, @strides)
|
245
240
|
elsif @padding.is_a?(Array)
|
246
|
-
@
|
241
|
+
@padding
|
247
242
|
else
|
248
|
-
|
243
|
+
[0, 0]
|
249
244
|
end
|
250
|
-
@out_size =
|
245
|
+
@out_size = calc_conv2d_transpose_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
|
251
246
|
end
|
252
247
|
|
253
248
|
def forward(x)
|
@@ -278,44 +273,41 @@ module DNN
|
|
278
273
|
|
279
274
|
# @return [Numo::SFloat] Convert weight to filter and return.
|
280
275
|
def filters
|
281
|
-
|
282
|
-
@weight.data.reshape(*@filter_size, @num_filters,
|
276
|
+
num_prev_filters = @input_shape[2]
|
277
|
+
@weight.data.reshape(*@filter_size, @num_filters, num_prev_filters)
|
283
278
|
end
|
284
279
|
|
285
280
|
# @param [Numo::SFloat] filters Convert weight to filters and set.
|
286
281
|
def filters=(filters)
|
287
|
-
|
288
|
-
@weight.data = filters.reshape(@filter_size.reduce(:*) * @num_filters,
|
282
|
+
num_prev_filters = @input_shape[2]
|
283
|
+
@weight.data = filters.reshape(@filter_size.reduce(:*) * @num_filters, num_prev_filters)
|
289
284
|
end
|
290
285
|
|
291
286
|
def to_hash
|
292
|
-
super(
|
293
|
-
|
294
|
-
|
295
|
-
|
287
|
+
super(num_filters: @num_filters,
|
288
|
+
filter_size: @filter_size,
|
289
|
+
strides: @strides,
|
290
|
+
padding: @padding)
|
296
291
|
end
|
297
292
|
end
|
298
293
|
|
299
|
-
|
294
|
+
|
300
295
|
# Super class of all pooling2D class.
|
301
296
|
class Pool2D < Layer
|
302
|
-
include
|
297
|
+
include Conv2DUtils
|
303
298
|
|
304
|
-
# @return [Array] Return pooling size. Pooling size is of the form [height, width].
|
305
299
|
attr_reader :pool_size
|
306
|
-
# @return [Array] Return stride length. Stride length is of the form [height, width].
|
307
300
|
attr_reader :strides
|
308
|
-
# @return [Array | Bool] Return padding size or whether to padding.
|
309
301
|
attr_reader :padding
|
310
302
|
|
311
|
-
def self.from_hash(
|
312
|
-
|
303
|
+
def self.from_hash(hash)
|
304
|
+
self.new(hash[:pool_size], strides: hash[:strides], padding: hash[:padding])
|
313
305
|
end
|
314
306
|
|
315
307
|
# @param [Array | Integer] pool_size Pooling size. Pooling size is of the form [height, width].
|
316
|
-
# @param [Array | Integer | NilClass] strides
|
317
|
-
#
|
318
|
-
# @param [Array |
|
308
|
+
# @param [Array | Integer | NilClass] strides Stride length. Stride length is of the form [height, width].
|
309
|
+
# If you set nil, treat pool_size as strides.
|
310
|
+
# @param [Array | Boolean] padding Padding size or whether to padding. Padding size is of the form [height, width].
|
319
311
|
def initialize(pool_size, strides: nil, padding: false)
|
320
312
|
super()
|
321
313
|
@pool_size = pool_size.is_a?(Integer) ? [pool_size, pool_size] : pool_size
|
@@ -334,13 +326,12 @@ module DNN
|
|
334
326
|
super
|
335
327
|
prev_h, prev_w = input_shape[0..1]
|
336
328
|
@num_channel = input_shape[2]
|
337
|
-
if @padding == true
|
338
|
-
|
339
|
-
@pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
|
329
|
+
@pad_size = if @padding == true
|
330
|
+
calc_conv2d_padding_size(prev_h, prev_w, *@pool_size, @strides)
|
340
331
|
elsif @padding.is_a?(Array)
|
341
|
-
@
|
332
|
+
@padding
|
342
333
|
else
|
343
|
-
|
334
|
+
[0, 0]
|
344
335
|
end
|
345
336
|
@out_size = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, *@pad_size, @strides)
|
346
337
|
end
|
@@ -350,18 +341,14 @@ module DNN
|
|
350
341
|
end
|
351
342
|
|
352
343
|
def to_hash
|
353
|
-
super(
|
354
|
-
|
355
|
-
|
344
|
+
super(pool_size: @pool_size,
|
345
|
+
strides: @strides,
|
346
|
+
padding: @padding)
|
356
347
|
end
|
357
348
|
end
|
358
|
-
|
359
|
-
|
360
|
-
class MaxPool2D < Pool2D
|
361
|
-
def self.from_hash(hash)
|
362
|
-
Pool2D.from_hash(self, hash)
|
363
|
-
end
|
364
349
|
|
350
|
+
|
351
|
+
class MaxPool2D < Pool2D
|
365
352
|
def forward(x)
|
366
353
|
x = zero_padding(x, @pad_size) if @padding
|
367
354
|
@x_shape = x.shape
|
@@ -383,10 +370,6 @@ module DNN
|
|
383
370
|
|
384
371
|
|
385
372
|
class AvgPool2D < Pool2D
|
386
|
-
def self.from_hash(hash)
|
387
|
-
Pool2D.from_hash(self, hash)
|
388
|
-
end
|
389
|
-
|
390
373
|
def forward(x)
|
391
374
|
x = zero_padding(x, @pad_size) if @padding
|
392
375
|
@x_shape = x.shape
|
@@ -411,21 +394,20 @@ module DNN
|
|
411
394
|
|
412
395
|
|
413
396
|
class UnPool2D < Layer
|
414
|
-
include
|
415
|
-
|
416
|
-
# @return [Array] Return unpooling size. unpooling size is of the form [height, width].
|
397
|
+
include Conv2DUtils
|
398
|
+
|
417
399
|
attr_reader :unpool_size
|
418
400
|
|
419
|
-
|
401
|
+
def self.from_hash(hash)
|
402
|
+
self.new(hash[:unpool_size])
|
403
|
+
end
|
404
|
+
|
405
|
+
# @param [Array | Integer] unpool_size Unpooling size. unpooling size is of the form [height, width].
|
420
406
|
def initialize(unpool_size)
|
421
407
|
super()
|
422
408
|
@unpool_size = unpool_size.is_a?(Integer) ? [unpool_size, unpool_size] : unpool_size
|
423
409
|
end
|
424
410
|
|
425
|
-
def self.from_hash(hash)
|
426
|
-
UnPool2D.new(hash[:unpool_size])
|
427
|
-
end
|
428
|
-
|
429
411
|
def build(input_shape)
|
430
412
|
unless input_shape.length == 3
|
431
413
|
raise DNN_ShapeError.new("Input shape is #{input_shape}. But input shape must be 3 dimensional.")
|
@@ -453,7 +435,7 @@ module DNN
|
|
453
435
|
|
454
436
|
def backward(dy)
|
455
437
|
in_size = input_shape[0..1]
|
456
|
-
col = im2col(dy, *
|
438
|
+
col = im2col(dy, *in_size, *@unpool_size, @unpool_size)
|
457
439
|
col = col.reshape(dy.shape[0] * in_size.reduce(:*), @unpool_size.reduce(:*), dy.shape[3]).transpose(0, 2, 1)
|
458
440
|
.reshape(dy.shape[0] * in_size.reduce(:*) * dy.shape[3], @unpool_size.reduce(:*))
|
459
441
|
col.sum(1).reshape(dy.shape[0], *in_size, dy.shape[3])
|
@@ -464,8 +446,9 @@ module DNN
|
|
464
446
|
end
|
465
447
|
|
466
448
|
def to_hash
|
467
|
-
super(
|
449
|
+
super(unpool_size: @unpool_size)
|
468
450
|
end
|
469
451
|
end
|
452
|
+
|
470
453
|
end
|
471
454
|
end
|