ruby-dnn 0.10.4 → 0.12.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +1 -2
  3. data/README.md +33 -6
  4. data/examples/cifar100_example.rb +3 -3
  5. data/examples/cifar10_example.rb +3 -3
  6. data/examples/dcgan/dcgan.rb +112 -0
  7. data/examples/dcgan/imgen.rb +20 -0
  8. data/examples/dcgan/train.rb +41 -0
  9. data/examples/iris_example.rb +3 -6
  10. data/examples/mnist_conv2d_example.rb +5 -5
  11. data/examples/mnist_define_by_run.rb +52 -0
  12. data/examples/mnist_example.rb +3 -3
  13. data/examples/mnist_lstm_example.rb +3 -3
  14. data/examples/xor_example.rb +4 -5
  15. data/ext/rb_stb_image/rb_stb_image.c +103 -0
  16. data/lib/dnn.rb +10 -10
  17. data/lib/dnn/cifar10.rb +1 -1
  18. data/lib/dnn/cifar100.rb +1 -1
  19. data/lib/dnn/core/activations.rb +21 -22
  20. data/lib/dnn/core/cnn_layers.rb +94 -111
  21. data/lib/dnn/core/embedding.rb +30 -9
  22. data/lib/dnn/core/initializers.rb +31 -21
  23. data/lib/dnn/core/iterator.rb +52 -0
  24. data/lib/dnn/core/layers.rb +99 -66
  25. data/lib/dnn/core/link.rb +24 -0
  26. data/lib/dnn/core/losses.rb +69 -59
  27. data/lib/dnn/core/merge_layers.rb +71 -0
  28. data/lib/dnn/core/models.rb +393 -0
  29. data/lib/dnn/core/normalizations.rb +27 -14
  30. data/lib/dnn/core/optimizers.rb +212 -134
  31. data/lib/dnn/core/param.rb +8 -6
  32. data/lib/dnn/core/regularizers.rb +10 -7
  33. data/lib/dnn/core/rnn_layers.rb +78 -85
  34. data/lib/dnn/core/utils.rb +6 -3
  35. data/lib/dnn/downloader.rb +3 -3
  36. data/lib/dnn/fashion-mnist.rb +89 -0
  37. data/lib/dnn/image.rb +57 -18
  38. data/lib/dnn/iris.rb +1 -3
  39. data/lib/dnn/mnist.rb +38 -34
  40. data/lib/dnn/version.rb +1 -1
  41. data/third_party/stb_image.h +16 -4
  42. data/third_party/stb_image_resize.h +2630 -0
  43. data/third_party/stb_image_write.h +4 -7
  44. metadata +12 -4
  45. data/lib/dnn/core/dataset.rb +0 -34
  46. data/lib/dnn/core/model.rb +0 -440
@@ -7,7 +7,7 @@ include DNN::Layers
7
7
  include DNN::Activations
8
8
  include DNN::Optimizers
9
9
  include DNN::Losses
10
- Model = DNN::Model
10
+ include DNN::Models
11
11
  MNIST = DNN::MNIST
12
12
 
13
13
  x_train, y_train = MNIST.load_train
@@ -22,7 +22,7 @@ x_test /= 255
22
22
  y_train = DNN::Utils.to_categorical(y_train, 10, Numo::SFloat)
23
23
  y_test = DNN::Utils.to_categorical(y_test, 10, Numo::SFloat)
24
24
 
25
- model = Model.new
25
+ model = Sequential.new
26
26
 
27
27
  model << InputLayer.new([28, 28])
28
28
 
@@ -31,6 +31,6 @@ model << LSTM.new(200, return_sequences: false)
31
31
 
32
32
  model << Dense.new(10)
33
33
 
34
- model.compile(Adam.new, SoftmaxCrossEntropy.new)
34
+ model.setup(Adam.new, SoftmaxCrossEntropy.new)
35
35
 
36
36
  model.train(x_train, y_train, 10, batch_size: 100, test: [x_test, y_test])
@@ -4,21 +4,20 @@ include DNN::Layers
4
4
  include DNN::Activations
5
5
  include DNN::Optimizers
6
6
  include DNN::Losses
7
- Model = DNN::Model
8
- Utils = DNN::Utils
7
+ include DNN::Models
9
8
 
10
9
  x = Numo::SFloat[[0, 0], [1, 0], [0, 1], [1, 1]]
11
10
  y = Numo::SFloat[[0], [1], [1], [0]]
12
11
 
13
- model = Model.new
12
+ model = Sequential.new
14
13
 
15
14
  model << InputLayer.new(2)
16
15
  model << Dense.new(16)
17
16
  model << ReLU.new
18
17
  model << Dense.new(1)
19
18
 
20
- model.compile(SGD.new, SigmoidCrossEntropy.new)
19
+ model.setup(SGD.new, SigmoidCrossEntropy.new)
21
20
 
22
21
  model.train(x, y, 20000, batch_size: 4, verbose: false)
23
22
 
24
- p Utils.sigmoid(model.predict(x))
23
+ p DNN::Utils.sigmoid(model.predict(x))
@@ -3,9 +3,11 @@
3
3
 
4
4
  #define STB_IMAGE_IMPLEMENTATION
5
5
  #define STB_IMAGE_WRITE_IMPLEMENTATION
6
+ #define STB_IMAGE_RESIZE_IMPLEMENTATION
6
7
 
7
8
  #include "../../third_party/stb_image.h"
8
9
  #include "../../third_party/stb_image_write.h"
10
+ #include "../../third_party/stb_image_resize.h"
9
11
 
10
12
  // STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp);
11
13
  static VALUE rb_stbi_load(VALUE self, VALUE rb_filename, VALUE rb_req_comp) {
@@ -94,14 +96,115 @@ static VALUE rb_stbi_write_jpg(VALUE self, VALUE rb_filename, VALUE rb_w, VALUE
94
96
  return INT2FIX(result);
95
97
  }
96
98
 
99
+ // STBIRDEF int stbir_resize_uint8( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
100
+ // unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
101
+ // int num_channels);
102
+ static VALUE rb_stbir_resize_uint8(VALUE self, VALUE rb_input_pixels, VALUE rb_input_w, VALUE rb_input_h, VALUE rb_input_stride_in_bytes,
103
+ VALUE rb_output_w, VALUE rb_output_h, VALUE rb_output_stride_in_bytes, VALUE rb_num_channels) {
104
+ uint8_t* input_pixels = (uint8_t*)StringValuePtr(rb_input_pixels);
105
+ int32_t input_w = FIX2INT(rb_input_w);
106
+ int32_t input_h = FIX2INT(rb_input_h);
107
+ int32_t input_stride_in_bytes = FIX2INT(rb_input_stride_in_bytes);
108
+ int32_t output_w = FIX2INT(rb_output_w);
109
+ int32_t output_h = FIX2INT(rb_output_h);
110
+ int32_t output_stride_in_bytes = FIX2INT(rb_output_stride_in_bytes);
111
+ int32_t num_channels = FIX2INT(rb_num_channels);
112
+ uint8_t* output_pixels;
113
+ VALUE rb_output_pixels;
114
+ int32_t result;
115
+ const int32_t output_size = output_h * output_w * num_channels;
116
+
117
+ output_pixels = (uint8_t*)malloc(output_size);
118
+ result = stbir_resize_uint8(input_pixels, input_w, input_h, input_stride_in_bytes,
119
+ output_pixels, output_w, output_h, output_stride_in_bytes, num_channels);
120
+ rb_output_pixels = rb_str_new((char*)output_pixels, output_size);
121
+ free(output_pixels);
122
+ return rb_ary_new3(2, rb_output_pixels, INT2FIX(result));
123
+ }
124
+
125
+ // STBIRDEF int stbir_resize_uint8_srgb(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
126
+ // unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
127
+ // int num_channels, int alpha_channel, int flags);
128
+ static VALUE rb_stbir_resize_uint8_srgb(VALUE self, VALUE rb_input_pixels, VALUE rb_input_w, VALUE rb_input_h, VALUE rb_input_stride_in_bytes,
129
+ VALUE rb_output_w, VALUE rb_output_h, VALUE rb_output_stride_in_bytes, VALUE rb_num_channels,
130
+ VALUE rb_alpha_channel, VALUE rb_flags) {
131
+ uint8_t* input_pixels = (uint8_t*)StringValuePtr(rb_input_pixels);
132
+ int32_t input_w = FIX2INT(rb_input_w);
133
+ int32_t input_h = FIX2INT(rb_input_h);
134
+ int32_t input_stride_in_bytes = FIX2INT(rb_input_stride_in_bytes);
135
+ int32_t output_w = FIX2INT(rb_output_w);
136
+ int32_t output_h = FIX2INT(rb_output_h);
137
+ int32_t output_stride_in_bytes = FIX2INT(rb_output_stride_in_bytes);
138
+ int32_t num_channels = FIX2INT(rb_num_channels);
139
+ int32_t alpha_channel = FIX2INT(rb_alpha_channel);
140
+ int32_t flags = FIX2INT(rb_flags);
141
+ uint8_t* output_pixels;
142
+ VALUE rb_output_pixels;
143
+ int32_t result;
144
+ const int32_t output_size = output_h * output_w * num_channels;
145
+
146
+ output_pixels = (uint8_t*)malloc(output_size);
147
+ result = stbir_resize_uint8_srgb(input_pixels, input_w, input_h, input_stride_in_bytes,
148
+ output_pixels, output_w, output_h, output_stride_in_bytes,
149
+ num_channels, alpha_channel, flags);
150
+ rb_output_pixels = rb_str_new((char*)output_pixels, output_size);
151
+ free(output_pixels);
152
+ return rb_ary_new3(2, rb_output_pixels, INT2FIX(result));
153
+ }
154
+
155
+ // STBIRDEF int stbir_resize_uint8_srgb_edgemode(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
156
+ // unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
157
+ // int num_channels, int alpha_channel, int flags,
158
+ // stbir_edge edge_wrap_mode);
159
+ static VALUE rb_stbir_resize_uint8_srgb_edgemode(VALUE self, VALUE rb_input_pixels, VALUE rb_input_w, VALUE rb_input_h, VALUE rb_input_stride_in_bytes,
160
+ VALUE rb_output_w, VALUE rb_output_h, VALUE rb_output_stride_in_bytes, VALUE rb_num_channels,
161
+ VALUE rb_alpha_channel, VALUE rb_flags, VALUE rb_edge_wrap_mode) {
162
+ uint8_t* input_pixels = (uint8_t*)StringValuePtr(rb_input_pixels);
163
+ int32_t input_w = FIX2INT(rb_input_w);
164
+ int32_t input_h = FIX2INT(rb_input_h);
165
+ int32_t input_stride_in_bytes = FIX2INT(rb_input_stride_in_bytes);
166
+ int32_t output_w = FIX2INT(rb_output_w);
167
+ int32_t output_h = FIX2INT(rb_output_h);
168
+ int32_t output_stride_in_bytes = FIX2INT(rb_output_stride_in_bytes);
169
+ int32_t num_channels = FIX2INT(rb_num_channels);
170
+ int32_t alpha_channel = FIX2INT(rb_alpha_channel);
171
+ int32_t flags = FIX2INT(rb_flags);
172
+ stbir_edge edge_wrap_mode = (stbir_edge)FIX2INT(rb_edge_wrap_mode);
173
+ uint8_t* output_pixels;
174
+ VALUE rb_output_pixels;
175
+ int32_t result;
176
+ const int32_t output_size = output_h * output_w * num_channels;
177
+
178
+ output_pixels = (uint8_t*)malloc(output_size);
179
+ result = stbir_resize_uint8_srgb_edgemode(input_pixels, input_w, input_h, input_stride_in_bytes,
180
+ output_pixels, output_w, output_h, output_stride_in_bytes,
181
+ num_channels, alpha_channel, flags, edge_wrap_mode);
182
+ rb_output_pixels = rb_str_new((char*)output_pixels, output_size);
183
+ free(output_pixels);
184
+ return rb_ary_new3(2, rb_output_pixels, INT2FIX(result));
185
+ }
186
+
97
187
  void Init_rb_stb_image() {
98
188
  VALUE rb_dnn = rb_define_module("DNN");
99
189
  VALUE rb_stb = rb_define_module_under(rb_dnn, "Stb");
100
190
 
191
+ rb_define_const(rb_stb, "STBIR_ALPHA_CHANNEL_NONE", INT2FIX(STBIR_ALPHA_CHANNEL_NONE));
192
+
193
+ rb_define_const(rb_stb, "STBIR_FLAG_ALPHA_PREMULTIPLIED", INT2FIX(STBIR_FLAG_ALPHA_PREMULTIPLIED));
194
+ rb_define_const(rb_stb, "STBIR_FLAG_ALPHA_USES_COLORSPACE", INT2FIX(STBIR_FLAG_ALPHA_USES_COLORSPACE));
195
+
196
+ rb_define_const(rb_stb, "STBIR_EDGE_CLAMP", INT2FIX(STBIR_EDGE_CLAMP));
197
+ rb_define_const(rb_stb, "STBIR_EDGE_REFLECT", INT2FIX(STBIR_EDGE_REFLECT));
198
+ rb_define_const(rb_stb, "STBIR_EDGE_WRAP", INT2FIX(STBIR_EDGE_WRAP));
199
+ rb_define_const(rb_stb, "STBIR_EDGE_ZERO", INT2FIX(STBIR_EDGE_ZERO));
200
+
101
201
  rb_define_module_function(rb_stb, "stbi_load", rb_stbi_load, 2);
102
202
  rb_define_module_function(rb_stb, "stbi_write_png", rb_stbi_write_png, 6);
103
203
  rb_define_module_function(rb_stb, "stbi_write_bmp", rb_stbi_write_bmp, 5);
104
204
  rb_define_module_function(rb_stb, "stbi_write_tga", rb_stbi_write_tga, 5);
105
205
  rb_define_module_function(rb_stb, "stbi_write_hdr", rb_stbi_write_hdr, 5);
106
206
  rb_define_module_function(rb_stb, "stbi_write_jpg", rb_stbi_write_jpg, 6);
207
+ rb_define_module_function(rb_stb, "stbir_resize_uint8", rb_stbir_resize_uint8, 8);
208
+ rb_define_module_function(rb_stb, "stbir_resize_uint8_srgb", rb_stbir_resize_uint8_srgb, 10);
209
+ rb_define_module_function(rb_stb, "stbir_resize_uint8_srgb_edgemode", rb_stbir_resize_uint8_srgb_edgemode, 11);
107
210
  }
data/lib/dnn.rb CHANGED
@@ -1,23 +1,23 @@
1
- if defined? Cumo
2
- Xumo = Cumo
3
- else
4
- require "numo/narray"
5
- Xumo = Numo
6
- end
7
-
8
1
  module DNN
9
- NMath = Xumo::NMath
2
+ if defined? ::Cumo
3
+ Xumo = ::Cumo
4
+ else
5
+ require "numo/narray"
6
+ Xumo = ::Numo
7
+ end
10
8
  end
11
9
 
12
10
  require_relative "dnn/version"
13
11
  require_relative "dnn/core/error"
14
- require_relative "dnn/core/model"
12
+ require_relative "dnn/core/models"
15
13
  require_relative "dnn/core/param"
16
- require_relative "dnn/core/dataset"
14
+ require_relative "dnn/core/link"
15
+ require_relative "dnn/core/iterator"
17
16
  require_relative "dnn/core/initializers"
18
17
  require_relative "dnn/core/layers"
19
18
  require_relative "dnn/core/normalizations"
20
19
  require_relative "dnn/core/activations"
20
+ require_relative "dnn/core/merge_layers"
21
21
  require_relative "dnn/core/losses"
22
22
  require_relative "dnn/core/regularizers"
23
23
  require_relative "dnn/core/cnn_layers"
data/lib/dnn/cifar10.rb CHANGED
@@ -16,7 +16,7 @@ module DNN
16
16
  cifar10_binary_file_name = __dir__ + "/downloads/" + URL_CIFAR10.match(%r`.+/(.+)`)[1]
17
17
  begin
18
18
  Zlib::GzipReader.open(cifar10_binary_file_name) do |gz|
19
- Archive::Tar::Minitar::unpack(gz, __dir__ + "/downloads")
19
+ Archive::Tar::Minitar.unpack(gz, __dir__ + "/downloads")
20
20
  end
21
21
  ensure
22
22
  File.unlink(cifar10_binary_file_name)
data/lib/dnn/cifar100.rb CHANGED
@@ -16,7 +16,7 @@ module DNN
16
16
  cifar100_binary_file_name = __dir__ + "/downloads/" + URL_CIFAR100.match(%r`.+/(.+)`)[1]
17
17
  begin
18
18
  Zlib::GzipReader.open(cifar100_binary_file_name) do |gz|
19
- Archive::Tar::Minitar::unpack(gz, __dir__ + "/downloads")
19
+ Archive::Tar::Minitar.unpack(gz, __dir__ + "/downloads")
20
20
  end
21
21
  ensure
22
22
  File.unlink(cifar100_binary_file_name)
@@ -3,9 +3,9 @@ module DNN
3
3
 
4
4
  class Sigmoid < Layers::Layer
5
5
  def forward(x)
6
- @y = 1 / (1 + NMath.exp(-x))
6
+ @y = 1 / (1 + Xumo::NMath.exp(-x))
7
7
  end
8
-
8
+
9
9
  def backward(dy)
10
10
  dy * (1 - @y) * @y
11
11
  end
@@ -14,11 +14,11 @@ module DNN
14
14
 
15
15
  class Tanh < Layers::Layer
16
16
  def forward(x)
17
- @y = NMath.tanh(x)
17
+ @y = Xumo::NMath.tanh(x)
18
18
  end
19
-
19
+
20
20
  def backward(dy)
21
- dy * (1 - @y**2)
21
+ dy * (1 - @y ** 2)
22
22
  end
23
23
  end
24
24
 
@@ -30,7 +30,7 @@ module DNN
30
30
  end
31
31
 
32
32
  def backward(dy)
33
- dy * (1 / (1 + @x.abs)**2)
33
+ dy * (1 / (1 + @x.abs) ** 2)
34
34
  end
35
35
  end
36
36
 
@@ -38,11 +38,11 @@ module DNN
38
38
  class Softplus < Layers::Layer
39
39
  def forward(x)
40
40
  @x = x
41
- NMath.log(1 + NMath.exp(x))
41
+ Xumo::NMath.log(1 + Xumo::NMath.exp(x))
42
42
  end
43
43
 
44
44
  def backward(dy)
45
- dy * (1 / (1 + NMath.exp(-@x)))
45
+ dy * (1 / (1 + Xumo::NMath.exp(-@x)))
46
46
  end
47
47
  end
48
48
 
@@ -50,22 +50,21 @@ module DNN
50
50
  class Swish < Layers::Layer
51
51
  def forward(x)
52
52
  @x = x
53
- @y = x * (1 / (1 + NMath.exp(-x)))
53
+ @y = x * (1 / (1 + Xumo::NMath.exp(-x)))
54
54
  end
55
-
55
+
56
56
  def backward(dy)
57
- dy * (@y + (1 / (1 + NMath.exp(-@x))) * (1 - @y))
57
+ dy * (@y + (1 / (1 + Xumo::NMath.exp(-@x))) * (1 - @y))
58
58
  end
59
59
  end
60
-
61
-
60
+
61
+
62
62
  class ReLU < Layers::Layer
63
63
  def forward(x)
64
64
  @x = x
65
- x[x < 0] = 0
66
- x
65
+ Xumo::SFloat.maximum(0, x)
67
66
  end
68
-
67
+
69
68
  def backward(dy)
70
69
  dx = Xumo::SFloat.ones(@x.shape)
71
70
  dx[@x <= 0] = 0
@@ -75,7 +74,6 @@ module DNN
75
74
 
76
75
 
77
76
  class LeakyReLU < Layers::Layer
78
- # @return [Float] Return the alpha value.
79
77
  attr_reader :alpha
80
78
 
81
79
  def self.from_hash(hash)
@@ -84,6 +82,7 @@ module DNN
84
82
 
85
83
  # @param [Float] alpha The slope when the output value is negative.
86
84
  def initialize(alpha = 0.3)
85
+ super()
87
86
  @alpha = alpha
88
87
  end
89
88
 
@@ -101,13 +100,12 @@ module DNN
101
100
  end
102
101
 
103
102
  def to_hash
104
- {class: self.class.name, alpha: alpha}
103
+ super(alpha: @alpha)
105
104
  end
106
105
  end
107
106
 
108
107
 
109
108
  class ELU < Layers::Layer
110
- # @return [Float] Return the alpha value.
111
109
  attr_reader :alpha
112
110
 
113
111
  def self.from_hash(hash)
@@ -116,6 +114,7 @@ module DNN
116
114
 
117
115
  # @param [Float] alpha The slope when the output value is negative.
118
116
  def initialize(alpha = 1.0)
117
+ super()
119
118
  @alpha = alpha
120
119
  end
121
120
 
@@ -126,7 +125,7 @@ module DNN
126
125
  x1 *= x
127
126
  x2 = Xumo::SFloat.zeros(x.shape)
128
127
  x2[x < 0] = 1
129
- x2 *= @alpha * NMath.exp(x) - @alpha
128
+ x2 *= @alpha * Xumo::NMath.exp(x) - @alpha
130
129
  x1 + x2
131
130
  end
132
131
 
@@ -135,12 +134,12 @@ module DNN
135
134
  dx[@x < 0] = 0
136
135
  dx2 = Xumo::SFloat.zeros(@x.shape)
137
136
  dx2[@x < 0] = 1
138
- dx2 *= @alpha * NMath.exp(@x)
137
+ dx2 *= @alpha * Xumo::NMath.exp(@x)
139
138
  dy * (dx + dx2)
140
139
  end
141
140
 
142
141
  def to_hash
143
- {class: self.class.name, alpha: @alpha}
142
+ super(alpha: @alpha)
144
143
  end
145
144
  end
146
145
 
@@ -1,7 +1,8 @@
1
1
  module DNN
2
2
  module Layers
3
+
3
4
  # This module is used for convolution.
4
- module Conv2D_Utils
5
+ module Conv2DUtils
5
6
  private
6
7
 
7
8
  # img[bsize, out_h, out_w, ch] to col[bsize * out_h * out_w, fil_h * fil_w * ch]
@@ -59,53 +60,53 @@ module DNN
59
60
  [out_h, out_w]
60
61
  end
61
62
 
62
- def calc_deconv2d_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
63
+ def calc_conv2d_transpose_out_size(prev_h, prev_w, fil_h, fil_w, pad_h, pad_w, strides)
63
64
  out_h = (prev_h - 1) * strides[0] + fil_h - pad_h
64
65
  out_w = (prev_w - 1) * strides[1] + fil_w - pad_w
65
66
  [out_h, out_w]
66
67
  end
67
68
 
68
- def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
69
- pad_h = (prev_h.to_f / strides[0]).ceil - out_h
70
- pad_w = (prev_w.to_f / strides[1]).ceil - out_w
69
+ def calc_conv2d_padding_size(prev_h, prev_w, fil_h, fil_w, strides)
70
+ out_h = prev_h / strides[0]
71
+ out_w = prev_w / strides[1]
72
+ pad_h = out_h * strides[0] - prev_h + fil_h - strides[0]
73
+ pad_w = out_w * strides[1] - prev_w + fil_w - strides[1]
71
74
  [pad_h, pad_w]
72
75
  end
73
76
 
74
- def calc_padding_size(prev_h, prev_w, out_h, out_w, strides)
75
- pad_h = ((prev_h.to_f / strides[0]).ceil - out_h) * strides[0]
76
- pad_w = ((prev_w.to_f / strides[1]).ceil - out_w) * strides[1]
77
+ def calc_conv2d_transpose_padding_size(prev_h, prev_w, fil_h, fil_w, strides)
78
+ out_h = prev_h * strides[0]
79
+ out_w = prev_w * strides[1]
80
+ pad_h = (prev_h - 1) * strides[0] + fil_h - out_h
81
+ pad_w = (prev_w - 1) * strides[1] + fil_w - out_w
77
82
  [pad_h, pad_w]
78
83
  end
79
84
  end
80
-
81
-
85
+
86
+
82
87
  class Conv2D < Connection
83
- include Conv2D_Utils
88
+ include Conv2DUtils
84
89
 
85
- # @return [Integer] number of filters.
86
90
  attr_reader :num_filters
87
- # @return [Array] Return filter size. filter size is of the form [height, width].
88
91
  attr_reader :filter_size
89
- # @return [Array] Return stride length. stride length is of the form [height, width].
90
92
  attr_reader :strides
91
- # @return [Array | Bool] Return padding size or whether to padding.
92
93
  attr_reader :padding
93
94
 
94
95
  def self.from_hash(hash)
95
- Conv2D.new(hash[:num_filters], hash[:filter_size],
96
- weight_initializer: Utils.from_hash(hash[:weight_initializer]),
97
- bias_initializer: Utils.from_hash(hash[:bias_initializer]),
98
- weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
99
- bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
100
- use_bias: hash[:use_bias],
101
- strides: hash[:strides],
102
- padding: hash[:padding])
103
- end
104
-
96
+ self.new(hash[:num_filters], hash[:filter_size],
97
+ weight_initializer: Utils.hash_to_obj(hash[:weight_initializer]),
98
+ bias_initializer: Utils.hash_to_obj(hash[:bias_initializer]),
99
+ weight_regularizer: Utils.hash_to_obj(hash[:weight_regularizer]),
100
+ bias_regularizer: Utils.hash_to_obj(hash[:bias_regularizer]),
101
+ use_bias: hash[:use_bias],
102
+ strides: hash[:strides],
103
+ padding: hash[:padding])
104
+ end
105
+
105
106
  # @param [Integer] num_filters Number of filters.
106
107
  # @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
107
108
  # @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
108
- # @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
109
+ # @param [Array | Boolean] padding Padding size or whether to padding. Padding size is of the form [height, width].
109
110
  def initialize(num_filters, filter_size,
110
111
  weight_initializer: Initializers::RandomNormal.new,
111
112
  bias_initializer: Initializers::Zeros.new,
@@ -127,17 +128,16 @@ module DNN
127
128
  raise DNN_ShapeError.new("Input shape is #{input_shape}. But input shape must be 3 dimensional.")
128
129
  end
129
130
  super
130
- prev_h, prev_w, num_prev_filter = *input_shape
131
- @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
131
+ prev_h, prev_w, num_prev_filters = *input_shape
132
+ @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * num_prev_filters, @num_filters)
132
133
  @bias.data = Xumo::SFloat.new(@num_filters) if @bias
133
134
  init_weight_and_bias
134
- if @padding == true
135
- out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
136
- @pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
135
+ @pad_size = if @padding == true
136
+ calc_conv2d_padding_size(prev_h, prev_w, *@filter_size, @strides)
137
137
  elsif @padding.is_a?(Array)
138
- @pad_size = @padding
138
+ @padding
139
139
  else
140
- @pad_size = [0, 0]
140
+ [0, 0]
141
141
  end
142
142
  @out_size = calc_conv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
143
143
  end
@@ -168,48 +168,44 @@ module DNN
168
168
 
169
169
  # @return [Numo::SFloat] Convert weight to filter and return.
170
170
  def filters
171
- num_prev_filter = @input_shape[2]
172
- @weight.data.reshape(*@filter_size, num_prev_filter, @num_filters)
171
+ num_prev_filters = @input_shape[2]
172
+ @weight.data.reshape(*@filter_size, num_prev_filters, @num_filters)
173
173
  end
174
174
 
175
175
  # @param [Numo::SFloat] filters Convert weight to filters and set.
176
176
  def filters=(filters)
177
- num_prev_filter = @input_shape[2]
178
- @weight.data = filters.reshape(@filter_size.reduce(:*) * num_prev_filter, @num_filters)
177
+ num_prev_filters = @input_shape[2]
178
+ @weight.data = filters.reshape(@filter_size.reduce(:*) * num_prev_filters, @num_filters)
179
179
  end
180
180
 
181
181
  def to_hash
182
- super({num_filters: @num_filters,
183
- filter_size: @filter_size,
184
- strides: @strides,
185
- padding: @padding})
182
+ super(num_filters: @num_filters,
183
+ filter_size: @filter_size,
184
+ strides: @strides,
185
+ padding: @padding)
186
186
  end
187
187
  end
188
188
 
189
189
 
190
- class Conv2D_Transpose < Connection
191
- include Conv2D_Utils
190
+ class Conv2DTranspose < Connection
191
+ include Conv2DUtils
192
192
 
193
- # @return [Integer] number of filters.
194
193
  attr_reader :num_filters
195
- # @return [Array] Return filter size. filter size is of the form [height, width].
196
194
  attr_reader :filter_size
197
- # @return [Array] Return stride length. stride length is of the form [height, width].
198
195
  attr_reader :strides
199
- # @return [Array] Return padding size.
200
196
  attr_reader :padding
201
197
 
202
198
  def self.from_hash(hash)
203
- Conv2D_Transpose.new(hash[:num_filters], hash[:filter_size],
204
- weight_initializer: Utils.from_hash(hash[:weight_initializer]),
205
- bias_initializer: Utils.from_hash(hash[:bias_initializer]),
206
- weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
207
- bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
208
- use_bias: hash[:use_bias],
209
- strides: hash[:strides],
210
- padding: hash[:padding])
211
- end
212
-
199
+ self.new(hash[:num_filters], hash[:filter_size],
200
+ weight_initializer: Utils.hash_to_obj(hash[:weight_initializer]),
201
+ bias_initializer: Utils.hash_to_obj(hash[:bias_initializer]),
202
+ weight_regularizer: Utils.hash_to_obj(hash[:weight_regularizer]),
203
+ bias_regularizer: Utils.hash_to_obj(hash[:bias_regularizer]),
204
+ use_bias: hash[:use_bias],
205
+ strides: hash[:strides],
206
+ padding: hash[:padding])
207
+ end
208
+
213
209
  # @param [Integer] num_filters Number of filters.
214
210
  # @param [Array | Integer] filter_size Filter size. Filter size is of the form [height, width].
215
211
  # @param [Array | Integer] strides Stride length. Stride length is of the form [height, width].
@@ -235,19 +231,18 @@ module DNN
235
231
  raise DNN_ShapeError.new("Input shape is #{input_shape}. But input shape must be 3 dimensional.")
236
232
  end
237
233
  super
238
- prev_h, prev_w, num_prev_filter = *input_shape
239
- @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
234
+ prev_h, prev_w, num_prev_filters = *input_shape
235
+ @weight.data = Xumo::SFloat.new(@filter_size.reduce(:*) * @num_filters, num_prev_filters)
240
236
  @bias.data = Xumo::SFloat.new(@num_filters) if @bias
241
237
  init_weight_and_bias
242
- if @padding == true
243
- out_h, out_w = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, 0, 0, @strides)
244
- @pad_size = calc_padding_size(out_h, out_w, prev_h, prev_w, @strides)
238
+ @pad_size = if @padding == true
239
+ calc_conv2d_transpose_padding_size(prev_h, prev_w, *@filter_size, @strides)
245
240
  elsif @padding.is_a?(Array)
246
- @pad_size = @padding
241
+ @padding
247
242
  else
248
- @pad_size = [0, 0]
243
+ [0, 0]
249
244
  end
250
- @out_size = calc_deconv2d_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
245
+ @out_size = calc_conv2d_transpose_out_size(prev_h, prev_w, *@filter_size, *@pad_size, @strides)
251
246
  end
252
247
 
253
248
  def forward(x)
@@ -278,44 +273,41 @@ module DNN
278
273
 
279
274
  # @return [Numo::SFloat] Convert weight to filter and return.
280
275
  def filters
281
- num_prev_filter = @input_shape[2]
282
- @weight.data.reshape(*@filter_size, @num_filters, num_prev_filter)
276
+ num_prev_filters = @input_shape[2]
277
+ @weight.data.reshape(*@filter_size, @num_filters, num_prev_filters)
283
278
  end
284
279
 
285
280
  # @param [Numo::SFloat] filters Convert weight to filters and set.
286
281
  def filters=(filters)
287
- num_prev_filter = @input_shape[2]
288
- @weight.data = filters.reshape(@filter_size.reduce(:*) * @num_filters, num_prev_filter)
282
+ num_prev_filters = @input_shape[2]
283
+ @weight.data = filters.reshape(@filter_size.reduce(:*) * @num_filters, num_prev_filters)
289
284
  end
290
285
 
291
286
  def to_hash
292
- super({num_filters: @num_filters,
293
- filter_size: @filter_size,
294
- strides: @strides,
295
- padding: @padding})
287
+ super(num_filters: @num_filters,
288
+ filter_size: @filter_size,
289
+ strides: @strides,
290
+ padding: @padding)
296
291
  end
297
292
  end
298
293
 
299
-
294
+
300
295
  # Super class of all pooling2D class.
301
296
  class Pool2D < Layer
302
- include Conv2D_Utils
297
+ include Conv2DUtils
303
298
 
304
- # @return [Array] Return pooling size. Pooling size is of the form [height, width].
305
299
  attr_reader :pool_size
306
- # @return [Array] Return stride length. Stride length is of the form [height, width].
307
300
  attr_reader :strides
308
- # @return [Array | Bool] Return padding size or whether to padding.
309
301
  attr_reader :padding
310
302
 
311
- def self.from_hash(pool2d_class, hash)
312
- pool2d_class.new(hash[:pool_size], strides: hash[:strides], padding: hash[:padding])
303
+ def self.from_hash(hash)
304
+ self.new(hash[:pool_size], strides: hash[:strides], padding: hash[:padding])
313
305
  end
314
306
 
315
307
  # @param [Array | Integer] pool_size Pooling size. Pooling size is of the form [height, width].
316
- # @param [Array | Integer | NilClass] strides stride length. Stride length is of the form [height, width].
317
- # If you set nil, treat pool_size as strides.
318
- # @param [Array | Bool] padding Padding size or whether to padding. Padding size is of the form [height, width].
308
+ # @param [Array | Integer | NilClass] strides Stride length. Stride length is of the form [height, width].
309
+ # If you set nil, treat pool_size as strides.
310
+ # @param [Array | Boolean] padding Padding size or whether to padding. Padding size is of the form [height, width].
319
311
  def initialize(pool_size, strides: nil, padding: false)
320
312
  super()
321
313
  @pool_size = pool_size.is_a?(Integer) ? [pool_size, pool_size] : pool_size
@@ -334,13 +326,12 @@ module DNN
334
326
  super
335
327
  prev_h, prev_w = input_shape[0..1]
336
328
  @num_channel = input_shape[2]
337
- if @padding == true
338
- out_h, out_w = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, 0, 0, @strides)
339
- @pad_size = calc_padding_size(prev_h, prev_w, out_h, out_w, @strides)
329
+ @pad_size = if @padding == true
330
+ calc_conv2d_padding_size(prev_h, prev_w, *@pool_size, @strides)
340
331
  elsif @padding.is_a?(Array)
341
- @pad_size = @padding
332
+ @padding
342
333
  else
343
- @pad_size = [0, 0]
334
+ [0, 0]
344
335
  end
345
336
  @out_size = calc_conv2d_out_size(prev_h, prev_w, *@pool_size, *@pad_size, @strides)
346
337
  end
@@ -350,18 +341,14 @@ module DNN
350
341
  end
351
342
 
352
343
  def to_hash
353
- super({pool_size: @pool_size,
354
- strides: @strides,
355
- padding: @padding})
344
+ super(pool_size: @pool_size,
345
+ strides: @strides,
346
+ padding: @padding)
356
347
  end
357
348
  end
358
-
359
-
360
- class MaxPool2D < Pool2D
361
- def self.from_hash(hash)
362
- Pool2D.from_hash(self, hash)
363
- end
364
349
 
350
+
351
+ class MaxPool2D < Pool2D
365
352
  def forward(x)
366
353
  x = zero_padding(x, @pad_size) if @padding
367
354
  @x_shape = x.shape
@@ -383,10 +370,6 @@ module DNN
383
370
 
384
371
 
385
372
  class AvgPool2D < Pool2D
386
- def self.from_hash(hash)
387
- Pool2D.from_hash(self, hash)
388
- end
389
-
390
373
  def forward(x)
391
374
  x = zero_padding(x, @pad_size) if @padding
392
375
  @x_shape = x.shape
@@ -411,21 +394,20 @@ module DNN
411
394
 
412
395
 
413
396
  class UnPool2D < Layer
414
- include Conv2D_Utils
415
-
416
- # @return [Array] Return unpooling size. unpooling size is of the form [height, width].
397
+ include Conv2DUtils
398
+
417
399
  attr_reader :unpool_size
418
400
 
419
- # @param [Array or Integer] unpool_size Unpooling size. unpooling size is of the form [height, width].
401
+ def self.from_hash(hash)
402
+ self.new(hash[:unpool_size])
403
+ end
404
+
405
+ # @param [Array | Integer] unpool_size Unpooling size. unpooling size is of the form [height, width].
420
406
  def initialize(unpool_size)
421
407
  super()
422
408
  @unpool_size = unpool_size.is_a?(Integer) ? [unpool_size, unpool_size] : unpool_size
423
409
  end
424
410
 
425
- def self.from_hash(hash)
426
- UnPool2D.new(hash[:unpool_size])
427
- end
428
-
429
411
  def build(input_shape)
430
412
  unless input_shape.length == 3
431
413
  raise DNN_ShapeError.new("Input shape is #{input_shape}. But input shape must be 3 dimensional.")
@@ -453,7 +435,7 @@ module DNN
453
435
 
454
436
  def backward(dy)
455
437
  in_size = input_shape[0..1]
456
- col = im2col(dy, *input_shape[0..1], *@unpool_size, @unpool_size)
438
+ col = im2col(dy, *in_size, *@unpool_size, @unpool_size)
457
439
  col = col.reshape(dy.shape[0] * in_size.reduce(:*), @unpool_size.reduce(:*), dy.shape[3]).transpose(0, 2, 1)
458
440
  .reshape(dy.shape[0] * in_size.reduce(:*) * dy.shape[3], @unpool_size.reduce(:*))
459
441
  col.sum(1).reshape(dy.shape[0], *in_size, dy.shape[3])
@@ -464,8 +446,9 @@ module DNN
464
446
  end
465
447
 
466
448
  def to_hash
467
- super({unpool_size: @unpool_size})
449
+ super(unpool_size: @unpool_size)
468
450
  end
469
451
  end
452
+
470
453
  end
471
454
  end