ruby-dnn 0.7.3 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5c2ef751fa76ea2918c5f706e1828eba514385b831dc042fa561a08d38da977a
4
- data.tar.gz: b906f6a9c63620378b284a2be3ca3bb8e007534b945493b9f8323f77c012cd0f
3
+ metadata.gz: 308a84624d71cb5b27d4b72f1ea69880dbfebf226ac9a5b44cf5775cc7e22703
4
+ data.tar.gz: baa0b39dcca002f79eb660129cf0042dfda7d4768e9b2067d7135ce877047770
5
5
  SHA512:
6
- metadata.gz: 94e7af50f4cdc556b252e3afed0703aa7d9e9d519f2e1e24253a1a27b388fe9d00444edfc271343365dc39e8a02d326df27e923216834509cb639986576664de
7
- data.tar.gz: 5fc8acfeed67fff5b41247ab6c3f03b68f96f48bdda877951e23d1fb12f2cc6c113a6102b8b531cd41d1c596d2ab6fa96a243607d38182d804ab1f5396e5439c
6
+ metadata.gz: a98278bb5cfd211bcf1231a4f5b0f13fa9d7ee4684d0334a6d2052d77b405d1873a65bd11d1f2df2f1f29926de484d4d0a7cbd8ef8d88cb4735923904eeb91fd
7
+ data.tar.gz: 06d52f9698bf600e0bb63c7cc703697d3bb1ec9827871e6dd595a7fdd17cf3e63980d8abe376c9418db9412309b5aed39187c8b988388fd2bbaf09afc4702aa7
@@ -2,7 +2,7 @@
2
2
  ruby-dnnのAPIリファレンスです。このリファレンスでは、APIを利用するうえで必要となるクラスとメソッドしか記載していません。
3
3
  そのため、プログラムの詳細が必要な場合は、ソースコードを参照してください。
4
4
 
5
- 最終更新バージョン:0.7.0
5
+ 最終更新バージョン:0.8.0
6
6
 
7
7
  # module DNN
8
8
  ruby-dnnの名前空間をなすモジュールです。
@@ -299,7 +299,7 @@ Hash
299
299
  ## def initialize(dim_or_shape)
300
300
  コンストラクタ
301
301
  ### arguments
302
- * Integer|Array dim_or_shape
302
+ * Integer | Array dim_or_shape
303
303
  入力層のdimentionまたはshapeを指定します。引数がIntegerだとdimentionとみなし、Arrayだとshapeとみなします。
304
304
 
305
305
 
@@ -600,15 +600,11 @@ Numo::SFloat y
600
600
 
601
601
  ## 【Instance methods】
602
602
 
603
- ## def initialize(momentum: 0.9, running_mean: nil, running_var: nil)
603
+ ## def initialize(momentum: 0.9
604
604
  コンストラクタ。
605
605
  ### arguments
606
606
  * Float momenum: 0.9
607
607
  推論時に使用する平均と分散を求めるための指数平均移動の係数。
608
- * Float running_mean: nil
609
- 推論時に使用する平均。
610
- * Float running_var: nil
611
- 推論時に使用する分散。
612
608
 
613
609
 
614
610
  # module Activations
@@ -745,11 +741,11 @@ Float max
745
741
  一様分布の最大値。
746
742
 
747
743
  ## 【Instance methods】
748
- ## def initialize(min = -0.25, max = 0.25)
744
+ ## def initialize(min = -0.05, max = 0.05)
749
745
  ### arguments
750
- * Float min = -0.25
746
+ * Float min = -0.05
751
747
  一様分布の最小値。
752
- * Float max = 0.25
748
+ * Float max = 0.05
753
749
  一様分布の最大値。
754
750
 
755
751
 
@@ -782,8 +778,8 @@ Float learning_rate
782
778
  * Float learning_rate
783
779
  Optimizerの学習率。
784
780
 
785
- ## abstruct def update(layer)
786
- layerのgradsを元に、layerのparamsを更新します。全てのOptimizerを継承するクラスは、このメソッドを実装する必要があります。
781
+ ## abstruct def update(params)
782
+ paramsが持つ全ての学習パラメータにおいて、gradを元に、dataを更新します。全てのOptimizerを継承するクラスは、このメソッドを実装する必要があります。
787
783
  ### arguments
788
784
  * Layer layer
789
785
  paramsを更新するレイヤー。
data/lib/dnn.rb CHANGED
@@ -12,6 +12,7 @@ module DNN; end
12
12
  require_relative "dnn/version"
13
13
  require_relative "dnn/core/error"
14
14
  require_relative "dnn/core/model"
15
+ require_relative "dnn/core/learning_param"
15
16
  require_relative "dnn/core/initializers"
16
17
  require_relative "dnn/core/layers"
17
18
  require_relative "dnn/core/activations"
@@ -1,6 +1,6 @@
1
1
  module DNN
2
2
  module Layers
3
- #This module is used for convolution.
3
+ # This module is used for convolution.
4
4
  module Conv2DModule
5
5
  private
6
6
 
@@ -107,20 +107,20 @@ module DNN
107
107
  x = padding(x, @pad) if @padding
108
108
  @x_shape = x.shape
109
109
  @col = im2col(x, *@out_size, *@filter_size, @strides)
110
- out = @col.dot(@params[:weight]) + @params[:bias]
110
+ out = @col.dot(@weight.data) + @bias.data
111
111
  out.reshape(x.shape[0], *@out_size, out.shape[3])
112
112
  end
113
113
 
114
114
  def backward(dout)
115
115
  dout = dout.reshape(dout.shape[0..2].reduce(:*), dout.shape[3])
116
- @grads[:weight] = @col.transpose.dot(dout)
116
+ @weight.grad = @col.transpose.dot(dout)
117
117
  if @l1_lambda > 0
118
- @grads[:weight] += dlasso
118
+ @weight.grad += dlasso
119
119
  elsif @l2_lambda > 0
120
- @grads[:weight] += dridge
120
+ @weight.grad += dridge
121
121
  end
122
- @grads[:bias] = dout.sum(0)
123
- dcol = dout.dot(@params[:weight].transpose)
122
+ @bias.grad = dout.sum(0)
123
+ dcol = dout.dot(@weight.data.transpose)
124
124
  dx = col2im(dcol, @x_shape, *@out_size, *@filter_size, @strides)
125
125
  @padding ? back_padding(dx, @pad) : dx
126
126
  end
@@ -140,13 +140,14 @@ module DNN
140
140
 
141
141
  def init_params
142
142
  num_prev_filter = prev_layer.shape[2]
143
- @params[:weight] = Xumo::SFloat.new(num_prev_filter * @filter_size.reduce(:*), @num_filters)
144
- @params[:bias] = Xumo::SFloat.new(@num_filters)
143
+ @weight.data = Xumo::SFloat.new(num_prev_filter * @filter_size.reduce(:*), @num_filters)
144
+ @bias.data = Xumo::SFloat.new(@num_filters)
145
145
  super()
146
146
  end
147
147
  end
148
148
 
149
- #Super class of all pooling2D class.
149
+
150
+ # Super class of all pooling2D class.
150
151
  class Pool2D < Layer
151
152
  include Conv2DModule
152
153
 
@@ -185,8 +186,7 @@ module DNN
185
186
  end
186
187
 
187
188
  def to_hash
188
- super({pool_width: @pool_width,
189
- pool_height: @pool_height,
189
+ super({pool_size: @pool_size,
190
190
  strides: @strides,
191
191
  padding: @padding})
192
192
  end
@@ -2,9 +2,8 @@ module DNN
2
2
  module Initializers
3
3
 
4
4
  class Initializer
5
- def init_param(layer, param_key, param)
6
- layer.params[param_key] = param
7
- end
5
+ # Classes that inherit from this class must implement this method.
6
+ # def init_param(param) end
8
7
 
9
8
  def to_hash(merge_hash = nil)
10
9
  hash = {class: self.class.name}
@@ -15,8 +14,8 @@ module DNN
15
14
 
16
15
 
17
16
  class Zeros < Initializer
18
- def init_param(layer, param_key)
19
- super(layer, param_key, layer.params[param_key].fill(0))
17
+ def init_param(param)
18
+ param.data = param.data.fill(0)
20
19
  end
21
20
  end
22
21
 
@@ -34,8 +33,8 @@ module DNN
34
33
  @std = std
35
34
  end
36
35
 
37
- def init_param(layer, param_key)
38
- super(layer, param_key, layer.params[param_key].rand_norm(@mean, @std))
36
+ def init_param(param)
37
+ param.data = param.data.rand_norm(@mean, @std)
39
38
  end
40
39
 
41
40
  def to_hash
@@ -57,8 +56,8 @@ module DNN
57
56
  @max = max
58
57
  end
59
58
 
60
- def init_param(layer, param_key)
61
- super(layer, param_key, layer.params[param_key].rand(@min, @max))
59
+ def init_param(param)
60
+ param.data = param.data.rand(@min, @max)
62
61
  end
63
62
 
64
63
  def to_hash
@@ -68,17 +67,17 @@ module DNN
68
67
 
69
68
 
70
69
  class Xavier < Initializer
71
- def init_param(layer, param_key)
72
- num_prev_nodes = layer.prev_layer.shape.reduce(:*)
73
- super(layer, param_key, layer.params[param_key].rand_norm / Math.sqrt(num_prev_nodes))
70
+ def init_param(param)
71
+ num_prev_nodes = param.layer.prev_layer.shape.reduce(:*)
72
+ param.data = param.data.rand_norm / Math.sqrt(num_prev_nodes)
74
73
  end
75
74
  end
76
75
 
77
76
 
78
77
  class He < Initializer
79
- def init_param(layer, param_key)
80
- num_prev_nodes = layer.prev_layer.shape.reduce(:*)
81
- super(layer, param_key, layer.params[param_key].rand_norm / Math.sqrt(num_prev_nodes) * Math.sqrt(2))
78
+ def init_param(param)
79
+ num_prev_nodes = param.layer.prev_layer.shape.reduce(:*)
80
+ param.data = param.data.rand_norm / Math.sqrt(num_prev_nodes) * Math.sqrt(2)
82
81
  end
83
82
  end
84
83
 
@@ -49,12 +49,10 @@ module DNN
49
49
  class HasParamLayer < Layer
50
50
  attr_accessor :trainable # Setting false prevents learning of parameters.
51
51
  attr_reader :params # The parameters of the layer.
52
- attr_reader :grads # Differential value of parameter of layer.
53
52
 
54
53
  def initialize
55
- super
54
+ super()
56
55
  @params = {}
57
- @grads = {}
58
56
  @trainable = true
59
57
  end
60
58
 
@@ -68,12 +66,13 @@ module DNN
68
66
 
69
67
  # Update the parameters.
70
68
  def update
71
- @model.optimizer.update(self) if @trainable
69
+ @model.optimizer.update(@params) if @trainable
72
70
  end
73
71
 
74
72
  private
75
73
 
76
74
  # Initialize of the parameters.
75
+ # Classes that inherit from this class must implement this method.
77
76
  def init_params() end
78
77
  end
79
78
 
@@ -120,11 +119,13 @@ module DNN
120
119
  @bias_initializer = (bias_initializer || Zeros.new)
121
120
  @l1_lambda = l1_lambda
122
121
  @l2_lambda = l2_lambda
122
+ @params[:weight] = @weight = LearningParam.new(self)
123
+ @params[:bias] = @bias = LearningParam.new(self)
123
124
  end
124
125
 
125
126
  def lasso
126
127
  if @l1_lambda > 0
127
- @l1_lambda * @params[:weight].abs.sum
128
+ @l1_lambda * @weight.data.abs.sum
128
129
  else
129
130
  0
130
131
  end
@@ -132,20 +133,20 @@ module DNN
132
133
 
133
134
  def ridge
134
135
  if @l2_lambda > 0
135
- 0.5 * @l2_lambda * (@params[:weight]**2).sum
136
+ 0.5 * @l2_lambda * (@weight.data**2).sum
136
137
  else
137
138
  0
138
139
  end
139
140
  end
140
141
 
141
142
  def dlasso
142
- dlasso = Xumo::SFloat.ones(*@params[:weight].shape)
143
- dlasso[@params[:weight] < 0] = -1
143
+ dlasso = Xumo::SFloat.ones(*@weight.data.shape)
144
+ dlasso[@weight.data < 0] = -1
144
145
  @l1_lambda * dlasso
145
146
  end
146
147
 
147
148
  def dridge
148
- @l2_lambda * @params[:weight]
149
+ @l2_lambda * @weight.data
149
150
  end
150
151
 
151
152
  def to_hash(merge_hash)
@@ -158,8 +159,8 @@ module DNN
158
159
  private
159
160
 
160
161
  def init_params
161
- @weight_initializer.init_param(self, :weight)
162
- @bias_initializer.init_param(self, :bias)
162
+ @weight_initializer.init_param(@weight)
163
+ @bias_initializer.init_param(@bias)
163
164
  end
164
165
  end
165
166
 
@@ -187,18 +188,18 @@ module DNN
187
188
 
188
189
  def forward(x)
189
190
  @x = x
190
- @x.dot(@params[:weight]) + @params[:bias]
191
+ @x.dot(@weight.data) + @bias.data
191
192
  end
192
193
 
193
194
  def backward(dout)
194
- @grads[:weight] = @x.transpose.dot(dout)
195
+ @weight.grad = @x.transpose.dot(dout)
195
196
  if @l1_lambda > 0
196
- @grads[:weight] += dlasso
197
+ @weight.grad += dlasso
197
198
  elsif @l2_lambda > 0
198
- @grads[:weight] += dridge
199
+ @weight.grad += dridge
199
200
  end
200
- @grads[:bias] = dout.sum(0)
201
- dout.dot(@params[:weight].transpose)
201
+ @bias.grad = dout.sum(0)
202
+ dout.dot(@weight.data.transpose)
202
203
  end
203
204
 
204
205
  def shape
@@ -213,8 +214,8 @@ module DNN
213
214
 
214
215
  def init_params
215
216
  num_prev_nodes = prev_layer.shape[0]
216
- @params[:weight] = Xumo::SFloat.new(num_prev_nodes, @num_nodes)
217
- @params[:bias] = Xumo::SFloat.new(@num_nodes)
217
+ @weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes)
218
+ @bias.data = Xumo::SFloat.new(@num_nodes)
218
219
  super()
219
220
  end
220
221
  end
@@ -317,22 +318,22 @@ module DNN
317
318
  attr_reader :momentum
318
319
 
319
320
  def self.load_hash(hash)
320
- running_mean = Xumo::SFloat.cast(hash[:running_mean])
321
- running_var = Xumo::SFloat.cast(hash[:running_var])
322
- self.new(momentum: hash[:momentum], running_mean: running_mean, running_var: running_var)
321
+ self.new(momentum: hash[:momentum])
323
322
  end
324
323
 
325
- def initialize(momentum: 0.9, running_mean: nil, running_var: nil)
324
+ def initialize(momentum: 0.9)
326
325
  super()
327
326
  @momentum = momentum
328
- @running_mean = running_mean
329
- @running_var = running_var
327
+ @params[:gamma] = @gamma = LearningParam.new(self)
328
+ @params[:beta] = @beta = LearningParam.new(self)
329
+ @params[:running_mean] = nil
330
+ @params[:running_var] = nil
330
331
  end
331
332
 
332
333
  def build(model)
333
334
  super
334
- @running_mean ||= Xumo::SFloat.zeros(*shape)
335
- @running_var ||= Xumo::SFloat.zeros(*shape)
335
+ @params[:running_mean] ||= Xumo::SFloat.zeros(*shape)
336
+ @params[:running_var] ||= Xumo::SFloat.zeros(*shape)
336
337
  end
337
338
 
338
339
  def forward(x)
@@ -343,20 +344,20 @@ module DNN
343
344
  @std = Xumo::NMath.sqrt(var + 1e-7)
344
345
  xn = @xc / @std
345
346
  @xn = xn
346
- @running_mean = @momentum * @running_mean + (1 - @momentum) * mean
347
- @running_var = @momentum * @running_var + (1 - @momentum) * var
347
+ @params[:running_mean] = @momentum * @params[:running_mean] + (1 - @momentum) * mean
348
+ @params[:running_var] = @momentum * @params[:running_var] + (1 - @momentum) * var
348
349
  else
349
- xc = x - @running_mean
350
- xn = xc / Xumo::NMath.sqrt(@running_var + 1e-7)
350
+ xc = x - @params[:running_mean]
351
+ xn = xc / Xumo::NMath.sqrt(@params[:running_var] + 1e-7)
351
352
  end
352
- @params[:gamma] * xn + @params[:beta]
353
+ @gamma.data * xn + @beta.data
353
354
  end
354
355
 
355
356
  def backward(dout)
356
357
  batch_size = dout.shape[0]
357
- @grads[:beta] = dout.sum(0)
358
- @grads[:gamma] = (@xn * dout).sum(0)
359
- dxn = @params[:gamma] * dout
358
+ @beta.grad = dout.sum(0)
359
+ @gamma.grad = (@xn * dout).sum(0)
360
+ dxn = @gamma.data * dout
360
361
  dxc = dxn / @std
361
362
  dstd = -((dxn * @xc) / (@std**2)).sum(0)
362
363
  dvar = 0.5 * dstd / @std
@@ -366,16 +367,14 @@ module DNN
366
367
  end
367
368
 
368
369
  def to_hash
369
- super({momentum: @momentum,
370
- running_mean: @running_mean.to_a,
371
- running_var: @running_var.to_a})
370
+ super({momentum: @momentum})
372
371
  end
373
372
 
374
373
  private
375
374
 
376
375
  def init_params
377
- @params[:gamma] = Xumo::SFloat.ones(*shape)
378
- @params[:beta] = Xumo::SFloat.zeros(*shape)
376
+ @gamma.data = Xumo::SFloat.ones(*shape)
377
+ @beta.data = Xumo::SFloat.zeros(*shape)
379
378
  end
380
379
  end
381
380
  end
@@ -0,0 +1,9 @@
1
+ class DNN::LearningParam
2
+ attr_accessor :data
3
+ attr_accessor :grad
4
+ attr_reader :layer
5
+
6
+ def initialize(layer)
7
+ @layer = layer
8
+ end
9
+ end
@@ -35,7 +35,12 @@ module DNN
35
35
  hash_params = has_param_layers_params[has_param_layers_index]
36
36
  hash_params.each do |key, (shape, base64_param)|
37
37
  bin = Base64.decode64(base64_param)
38
- layer.params[key] = Xumo::SFloat.from_binary(bin).reshape(*shape)
38
+ data = Xumo::SFloat.from_binary(bin).reshape(*shape)
39
+ if layer.params[key].is_a?(LearningParam)
40
+ layer.params[key].data = data
41
+ else
42
+ layer.params[key] = data
43
+ end
39
44
  end
40
45
  has_param_layers_index += 1
41
46
  end
@@ -55,13 +60,14 @@ module DNN
55
60
  def to_json
56
61
  hash_layers = @layers.map { |layer| layer.to_hash }
57
62
  hash = {version: VERSION, layers: hash_layers, optimizer: @optimizer.to_hash}
58
- JSON.dump(hash)
63
+ JSON.pretty_generate(hash)
59
64
  end
60
65
 
61
66
  def params_to_json
62
- has_param_layers = @layers.select { |layer| layer.is_a?(HasParamLayer) }
67
+ has_param_layers = @layers.select { |layer| layer.is_a?(Layers::HasParamLayer) }
63
68
  has_param_layers_params = has_param_layers.map do |layer|
64
69
  layer.params.map { |key, param|
70
+ param = param.data if param.is_a?(LearningParam)
65
71
  base64_param = Base64.encode64(param.to_binary)
66
72
  [key, [param.shape, base64_param]]
67
73
  }.to_h
@@ -160,7 +166,7 @@ module DNN
160
166
  loss_value
161
167
  end
162
168
 
163
- def accurate(x, y, batch_size = 1, &batch_proc)
169
+ def accurate(x, y, batch_size = 100, &batch_proc)
164
170
  input_data_shape_check(x, y)
165
171
  batch_size = batch_size >= x.shape[0] ? x.shape[0] : batch_size
166
172
  correct = 0
@@ -9,8 +9,9 @@ module DNN
9
9
  @learning_rate = learning_rate
10
10
  end
11
11
 
12
- # Update layer has params.
13
- def update(layer) end
12
+ # Update params.
13
+ # Classes that inherit from this class must implement this method.
14
+ # def update(params) end
14
15
 
15
16
  def to_hash(merge_hash = nil)
16
17
  hash = {class: self.class.name, learning_rate: @learning_rate}
@@ -33,16 +34,15 @@ module DNN
33
34
  @v = {}
34
35
  end
35
36
 
36
- def update(layer)
37
- @v[layer] ||= {}
38
- layer.params.each_key do |key|
39
- amount = layer.grads[key] * @learning_rate
37
+ def update(params)
38
+ params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
39
+ amount = param.grad * @learning_rate
40
40
  if @momentum > 0
41
- @v[layer][key] ||= 0
42
- amount += @momentum * @v[layer][key]
43
- @v[layer][key] = amount
41
+ @v[param] ||= 0
42
+ amount += @momentum * @v[param]
43
+ @v[param] = amount
44
44
  end
45
- layer.params[key] -= amount
45
+ param.data -= amount
46
46
  end
47
47
  end
48
48
 
@@ -61,13 +61,12 @@ module DNN
61
61
  super(learning_rate, momentum: momentum)
62
62
  end
63
63
 
64
- def update(layer)
65
- @v[layer] ||= {}
66
- layer.params.each_key do |key|
67
- @v[layer][key] ||= 0
68
- amount = layer.grads[key] * @learning_rate
69
- @v[layer][key] = @v[layer][key] * @momentum - amount
70
- layer.params[key] = (layer.params[key] + @momentum**2 * @v[layer][key]) - (1 + @momentum) * amount
64
+ def update(params)
65
+ params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
66
+ @v[param] ||= 0
67
+ amount = param.grad * @learning_rate
68
+ @v[param] = @v[param] * @momentum - amount
69
+ param.data = (param.data + @momentum**2 * @v[param]) - (1 + @momentum) * amount
71
70
  end
72
71
  end
73
72
  end
@@ -83,12 +82,11 @@ module DNN
83
82
  self.new(hash[:learning_rate])
84
83
  end
85
84
 
86
- def update(layer)
87
- @g[layer] ||= {}
88
- layer.params.each_key do |key|
89
- @g[layer][key] ||= 0
90
- @g[layer][key] += layer.grads[key]**2
91
- layer.params[key] -= (@learning_rate / Xumo::NMath.sqrt(@g[layer][key] + 1e-7)) * layer.grads[key]
85
+ def update(params)
86
+ params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
87
+ @g[param] ||= 0
88
+ @g[param] += param.grad**2
89
+ param.data -= (@learning_rate / Xumo::NMath.sqrt(@g[param] + 1e-7)) * param.grad
92
90
  end
93
91
  end
94
92
  end
@@ -107,12 +105,11 @@ module DNN
107
105
  @g = {}
108
106
  end
109
107
 
110
- def update(layer)
111
- @g[layer] ||= {}
112
- layer.params.each_key do |key|
113
- @g[layer][key] ||= 0
114
- @g[layer][key] = @alpha * @g[layer][key] + (1 - @alpha) * layer.grads[key]**2
115
- layer.params[key] -= (@learning_rate / Xumo::NMath.sqrt(@g[layer][key] + 1e-7)) * layer.grads[key]
108
+ def update(params)
109
+ params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
110
+ @g[param] ||= 0
111
+ @g[param] = @alpha * @g[param] + (1 - @alpha) * param.grad**2
112
+ param.data -= (@learning_rate / Xumo::NMath.sqrt(@g[param] + 1e-7)) * param.grad
116
113
  end
117
114
  end
118
115
 
@@ -136,16 +133,14 @@ module DNN
136
133
  @s = {}
137
134
  end
138
135
 
139
- def update(layer)
140
- @h[layer] ||= {}
141
- @s[layer] ||= {}
142
- layer.params.each_key do |key|
143
- @h[layer][key] ||= Xumo::SFloat.zeros(*layer.params[key].shape)
144
- @s[layer][key] ||= Xumo::SFloat.zeros(*layer.params[key].shape)
145
- @h[layer][key] = @rho * @h[layer][key] + (1 - @rho) * layer.grads[key]**2
146
- v = (Xumo::NMath.sqrt(@s[layer][key] + 1e-6) / Xumo::NMath.sqrt(@h[layer][key] + 1e-6)) * layer.grads[key]
147
- @s[layer][key] = @rho * @s[layer][key] + (1 - @rho) * v**2
148
- layer.params[key] -= v
136
+ def update(params)
137
+ params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
138
+ @h[param] ||= Xumo::SFloat.zeros(*param.data.shape)
139
+ @s[param] ||= Xumo::SFloat.zeros(*param.data.shape)
140
+ @h[param] = @rho * @h[param] + (1 - @rho) * param.grad**2
141
+ v = (Xumo::NMath.sqrt(@s[param] + 1e-6) / Xumo::NMath.sqrt(@h[param] + 1e-6)) * param.grad
142
+ @s[param] = @rho * @s[param] + (1 - @rho) * v**2
143
+ param.data -= v
149
144
  end
150
145
  end
151
146
 
@@ -172,17 +167,15 @@ module DNN
172
167
  @v = {}
173
168
  end
174
169
 
175
- def update(layer)
170
+ def update(params)
176
171
  @iter += 1
177
- @m[layer] ||= {}
178
- @v[layer] ||= {}
179
172
  lr = @learning_rate * Math.sqrt(1 - @beta2**@iter) / (1 - @beta1**@iter)
180
- layer.params.each_key do |key|
181
- @m[layer][key] ||= 0
182
- @v[layer][key] ||= 0
183
- @m[layer][key] += (1 - @beta1) * (layer.grads[key] - @m[layer][key])
184
- @v[layer][key] += (1 - @beta2) * (layer.grads[key]**2 - @v[layer][key])
185
- layer.params[key] -= lr * @m[layer][key] / Xumo::NMath.sqrt(@v[layer][key] + 1e-7)
173
+ params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
174
+ @m[param] ||= 0
175
+ @v[param] ||= 0
176
+ @m[param] += (1 - @beta1) * (param.grad - @m[param])
177
+ @v[param] += (1 - @beta2) * (param.grad**2 - @v[param])
178
+ param.data -= lr * @m[param] / Xumo::NMath.sqrt(@v[param] + 1e-7)
186
179
  end
187
180
  end
188
181
 
@@ -5,9 +5,11 @@ module DNN
5
5
  class RNN < Connection
6
6
  include Activations
7
7
 
8
- attr_accessor :h
9
8
  attr_reader :num_nodes
10
9
  attr_reader :stateful
10
+ attr_reader :weight
11
+ attr_reader :weight2
12
+ attr_reader :bias
11
13
 
12
14
  def initialize(num_nodes,
13
15
  stateful: false,
@@ -22,26 +24,27 @@ module DNN
22
24
  @stateful = stateful
23
25
  @return_sequences = return_sequences
24
26
  @layers = []
25
- @h = nil
27
+ @params[:h] = nil
28
+ @params[:weight2] = @weight2 = LearningParam.new(self)
26
29
  end
27
30
 
28
31
  def forward(xs)
29
32
  @xs_shape = xs.shape
30
33
  hs = Xumo::SFloat.zeros(xs.shape[0], @time_length, @num_nodes)
31
- h = (@stateful && @h) ? @h : Xumo::SFloat.zeros(xs.shape[0], @num_nodes)
34
+ h = (@stateful && @params[:h]) ? @params[:h] : Xumo::SFloat.zeros(xs.shape[0], @num_nodes)
32
35
  xs.shape[1].times do |t|
33
36
  x = xs[true, t, false]
34
37
  h = @layers[t].forward(x, h)
35
38
  hs[true, t, false] = h
36
39
  end
37
- @h = h
40
+ @params[:h] = h
38
41
  @return_sequences ? hs : h
39
42
  end
40
43
 
41
44
  def backward(dh2s)
42
- @grads[:weight] = Xumo::SFloat.zeros(*@params[:weight].shape)
43
- @grads[:weight2] = Xumo::SFloat.zeros(*@params[:weight2].shape)
44
- @grads[:bias] = Xumo::SFloat.zeros(*@params[:bias].shape)
45
+ @weight.grad = Xumo::SFloat.zeros(*@weight.data.shape)
46
+ @weight2.grad = Xumo::SFloat.zeros(*@weight2.data.shape)
47
+ @bias.grad = Xumo::SFloat.zeros(*@bias.data.shape)
45
48
  unless @return_sequences
46
49
  dh = dh2s
47
50
  dh2s = Xumo::SFloat.zeros(dh.shape[0], @time_length, dh.shape[1])
@@ -61,8 +64,7 @@ module DNN
61
64
  hash = {
62
65
  num_nodes: @num_nodes,
63
66
  stateful: @stateful,
64
- return_sequences: @return_sequences,
65
- h: @h.to_a
67
+ return_sequences: @return_sequences
66
68
  }
67
69
  hash.merge!(merge_hash) if merge_hash
68
70
  super(hash)
@@ -73,12 +75,12 @@ module DNN
73
75
  end
74
76
 
75
77
  def reset_state
76
- @h = @h.fill(0) if @h
78
+ @params[:h] = @params[:h].fill(0) if @params[:h]
77
79
  end
78
80
 
79
81
  def lasso
80
82
  if @l1_lambda > 0
81
- @l1_lambda * (@params[:weight].abs.sum + @params[:weight2].abs.sum)
83
+ @l1_lambda * (@weight.data.abs.sum + @weight2.data.abs.sum)
82
84
  else
83
85
  0
84
86
  end
@@ -86,30 +88,30 @@ module DNN
86
88
 
87
89
  def ridge
88
90
  if @l2_lambda > 0
89
- 0.5 * (@l2_lambda * ((@params[:weight]**2).sum + (@params[:weight2]**2).sum))
91
+ 0.5 * (@l2_lambda * ((@weight.data**2).sum + (@weight2.data**2).sum))
90
92
  else
91
93
  0
92
94
  end
93
95
  end
94
96
 
95
97
  def dlasso
96
- dlasso = Xumo::SFloat.ones(*@params[:weight].shape)
97
- dlasso[@params[:weight] < 0] = -1
98
+ dlasso = Xumo::SFloat.ones(*@weight.data.shape)
99
+ dlasso[@weight.data < 0] = -1
98
100
  @l1_lambda * dlasso
99
101
  end
100
102
 
101
103
  def dridge
102
- @l2_lambda * @params[:weight]
104
+ @l2_lambda * @weight.data
103
105
  end
104
106
 
105
107
  def dlasso2
106
- dlasso = Xumo::SFloat.ones(*@params[:weight2].shape)
107
- dlasso[@params[:weight2] < 0] = -1
108
+ dlasso = Xumo::SFloat.ones(*@weight2.data.shape)
109
+ dlasso[@weight2.data < 0] = -1
108
110
  @l1_lambda * dlasso
109
111
  end
110
112
 
111
113
  def dridge2
112
- @l2_lambda * @params[:weight2]
114
+ @l2_lambda * @weight2.data
113
115
  end
114
116
 
115
117
  private
@@ -129,30 +131,32 @@ module DNN
129
131
  def forward(x, h)
130
132
  @x = x
131
133
  @h = h
132
- h2 = x.dot(@rnn.params[:weight]) + h.dot(@rnn.params[:weight2]) + @rnn.params[:bias]
134
+ h2 = x.dot(@rnn.weight.data) + h.dot(@rnn.weight2.data) + @rnn.bias.data
133
135
  @activation.forward(h2)
134
136
  end
135
137
 
136
138
  def backward(dh2)
137
139
  dh2 = @activation.backward(dh2)
138
- @rnn.grads[:weight] += @x.transpose.dot(dh2)
139
- @rnn.grads[:weight2] += @h.transpose.dot(dh2)
140
+ @rnn.weight.grad += @x.transpose.dot(dh2)
141
+ @rnn.weight2.grad += @h.transpose.dot(dh2)
140
142
  if @rnn.l1_lambda > 0
141
- @rnn.grads[:weight] += dlasso
142
- @rnn.grads[:weight2] += dlasso2
143
+ @rnn.weight.grad += dlasso
144
+ @rnn.weight2.grad += dlasso2
143
145
  elsif @rnn.l2_lambda > 0
144
- @rnn.grads[:weight] += dridge
145
- @grads[:weight2] += dridge2
146
+ @rnn.weight.grad += dridge
147
+ @rnn.weight2.grad += dridge2
146
148
  end
147
- @rnn.grads[:bias] += dh2.sum(0)
148
- dx = dh2.dot(@rnn.params[:weight].transpose)
149
- dh = dh2.dot(@rnn.params[:weight2].transpose)
149
+ @rnn.bias.grad += dh2.sum(0)
150
+ dx = dh2.dot(@rnn.weight.data.transpose)
151
+ dh = dh2.dot(@rnn.weight2.data.transpose)
150
152
  [dx, dh]
151
153
  end
152
154
  end
153
155
 
154
156
 
155
157
  class SimpleRNN < RNN
158
+ attr_reader :activation
159
+
156
160
  def self.load_hash(hash)
157
161
  simple_rnn = self.new(hash[:num_nodes],
158
162
  stateful: hash[:stateful],
@@ -162,7 +166,6 @@ module DNN
162
166
  bias_initializer: Util.load_hash(hash[:bias_initializer]),
163
167
  l1_lambda: hash[:l1_lambda],
164
168
  l2_lambda: hash[:l2_lambda])
165
- simple_rnn.h = Xumo::SFloat.cast(hash[:h])
166
169
  simple_rnn
167
170
  end
168
171
 
@@ -193,12 +196,12 @@ module DNN
193
196
  def init_params
194
197
  super()
195
198
  num_prev_nodes = prev_layer.shape[1]
196
- @params[:weight] = Xumo::SFloat.new(num_prev_nodes, @num_nodes)
197
- @params[:weight2] = Xumo::SFloat.new(@num_nodes, @num_nodes)
198
- @params[:bias] = Xumo::SFloat.new(@num_nodes)
199
- @weight_initializer.init_param(self, :weight)
200
- @weight_initializer.init_param(self, :weight2)
201
- @bias_initializer.init_param(self, :bias)
199
+ @weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes)
200
+ @weight2.data = Xumo::SFloat.new(@num_nodes, @num_nodes)
201
+ @bias.data = Xumo::SFloat.new(@num_nodes)
202
+ @weight_initializer.init_param(@weight)
203
+ @weight_initializer.init_param(@weight2)
204
+ @bias_initializer.init_param(@bias)
202
205
  @time_length.times do |t|
203
206
  @layers << SimpleRNN_Dense.new(self)
204
207
  end
@@ -221,7 +224,7 @@ module DNN
221
224
  @h = h
222
225
  @c = c
223
226
  num_nodes = h.shape[1]
224
- a = x.dot(@rnn.params[:weight]) + h.dot(@rnn.params[:weight2]) + @rnn.params[:bias]
227
+ a = x.dot(@rnn.weight.data) + h.dot(@rnn.weight2.data) + @rnn.bias.data
225
228
 
226
229
  @forget = @forget_sigmoid.forward(a[true, 0...num_nodes])
227
230
  @g = @g_tanh.forward(a[true, num_nodes...(num_nodes * 2)])
@@ -245,18 +248,18 @@ module DNN
245
248
 
246
249
  da = Xumo::SFloat.hstack([dforget, dg, din, dout])
247
250
 
248
- @rnn.grads[:weight] += @x.transpose.dot(da)
249
- @rnn.grads[:weight2] += @h.transpose.dot(da)
251
+ @rnn.weight.grad += @x.transpose.dot(da)
252
+ @rnn.weight2.grad += @h.transpose.dot(da)
250
253
  if @rnn.l1_lambda > 0
251
- @rnn.grads[:weight] += dlasso
252
- @rnn.grads[:weight2] += dlasso2
254
+ @rnn.weight.grad += dlasso
255
+ @rnn.weight2.grad += dlasso2
253
256
  elsif @rnn.l2_lambda > 0
254
- @rnn.grads[:weight] += dridge
255
- @rnn.grads[:weight2] += dridge2
257
+ @rnn.weight.grad += dridge
258
+ @rnn.weight2.grad += dridge2
256
259
  end
257
- @rnn.grads[:bias] += da.sum(0)
258
- dx = da.dot(@rnn.params[:weight].transpose)
259
- dh = da.dot(@rnn.params[:weight2].transpose)
260
+ @rnn.bias.grad += da.sum(0)
261
+ dx = da.dot(@rnn.weight.data.transpose)
262
+ dh = da.dot(@rnn.weight2.data.transpose)
260
263
  dc = dc2_tmp * @forget
261
264
  [dx, dh, dc]
262
265
  end
@@ -264,8 +267,6 @@ module DNN
264
267
 
265
268
 
266
269
  class LSTM < RNN
267
- attr_accessor :c
268
-
269
270
  def self.load_hash(hash)
270
271
  lstm = self.new(hash[:num_nodes],
271
272
  stateful: hash[:stateful],
@@ -274,8 +275,6 @@ module DNN
274
275
  bias_initializer: Util.load_hash(hash[:bias_initializer]),
275
276
  l1_lambda: hash[:l1_lambda],
276
277
  l2_lambda: hash[:l2_lambda])
277
- lstm.h = Xumo::SFloat.cast(hash[:h])
278
- lstm.c = Xumo::SFloat.cast(hash[:c])
279
278
  lstm
280
279
  end
281
280
 
@@ -287,7 +286,7 @@ module DNN
287
286
  l1_lambda: 0,
288
287
  l2_lambda: 0)
289
288
  super
290
- @c = nil
289
+ @params[:c] = nil
291
290
  end
292
291
 
293
292
  def forward(xs)
@@ -296,8 +295,8 @@ module DNN
296
295
  h = nil
297
296
  c = nil
298
297
  if @stateful
299
- h = @h if @h
300
- c = @c if @c
298
+ h = @params[:h] if @params[:h]
299
+ c = @params[:c] if @params[:c]
301
300
  end
302
301
  h ||= Xumo::SFloat.zeros(xs.shape[0], @num_nodes)
303
302
  c ||= Xumo::SFloat.zeros(xs.shape[0], @num_nodes)
@@ -306,15 +305,15 @@ module DNN
306
305
  h, c = @layers[t].forward(x, h, c)
307
306
  hs[true, t, false] = h
308
307
  end
309
- @h = h
310
- @c = c
308
+ @params[:h] = h
309
+ @params[:c] = c
311
310
  @return_sequences ? hs : h
312
311
  end
313
312
 
314
313
  def backward(dh2s)
315
- @grads[:weight] = Xumo::SFloat.zeros(*@params[:weight].shape)
316
- @grads[:weight2] = Xumo::SFloat.zeros(*@params[:weight2].shape)
317
- @grads[:bias] = Xumo::SFloat.zeros(*@params[:bias].shape)
314
+ @weight.grad = Xumo::SFloat.zeros(*@weight.data.shape)
315
+ @weight2.grad = Xumo::SFloat.zeros(*@weight2.data.shape)
316
+ @bias.grad = Xumo::SFloat.zeros(*@bias.data.shape)
318
317
  unless @return_sequences
319
318
  dh = dh2s
320
319
  dh2s = Xumo::SFloat.zeros(dh.shape[0], @time_length, dh.shape[1])
@@ -333,11 +332,7 @@ module DNN
333
332
 
334
333
  def reset_state
335
334
  super()
336
- @c = @c.fill(0) if @c
337
- end
338
-
339
- def to_hash
340
- super({c: @c.to_a})
335
+ @params[:c] = @params[:c].fill(0) if @params[:c]
341
336
  end
342
337
 
343
338
  private
@@ -345,12 +340,12 @@ module DNN
345
340
  def init_params
346
341
  super()
347
342
  num_prev_nodes = prev_layer.shape[1]
348
- @params[:weight] = Xumo::SFloat.new(num_prev_nodes, @num_nodes * 4)
349
- @params[:weight2] = Xumo::SFloat.new(@num_nodes, @num_nodes * 4)
350
- @params[:bias] = Xumo::SFloat.new(@num_nodes * 4)
351
- @weight_initializer.init_param(self, :weight)
352
- @weight_initializer.init_param(self, :weight2)
353
- @bias_initializer.init_param(self, :bias)
343
+ @weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes * 4)
344
+ @weight2.data = Xumo::SFloat.new(@num_nodes, @num_nodes * 4)
345
+ @bias.data = Xumo::SFloat.new(@num_nodes * 4)
346
+ @weight_initializer.init_param(@weight)
347
+ @weight_initializer.init_param(@weight2)
348
+ @bias_initializer.init_param(@bias)
354
349
  @time_length.times do |t|
355
350
  @layers << LSTM_Dense.new(self)
356
351
  end
@@ -370,16 +365,16 @@ module DNN
370
365
  @x = x
371
366
  @h = h
372
367
  num_nodes = h.shape[1]
373
- @weight_a = @rnn.params[:weight][true, 0...(num_nodes * 2)]
374
- @weight2_a = @rnn.params[:weight2][true, 0...(num_nodes * 2)]
375
- bias_a = @rnn.params[:bias][0...(num_nodes * 2)]
368
+ @weight_a = @rnn.weight.data[true, 0...(num_nodes * 2)]
369
+ @weight2_a = @rnn.weight2.data[true, 0...(num_nodes * 2)]
370
+ bias_a = @rnn.bias.data[0...(num_nodes * 2)]
376
371
  a = x.dot(@weight_a) + h.dot(@weight2_a) + bias_a
377
372
  @update = @update_sigmoid.forward(a[true, 0...num_nodes])
378
373
  @reset = @reset_sigmoid.forward(a[true, num_nodes..-1])
379
374
 
380
- @weight_h = @rnn.params[:weight][true, (num_nodes * 2)..-1]
381
- @weight2_h = @rnn.params[:weight2][true, (num_nodes * 2)..-1]
382
- bias_h = @rnn.params[:bias][(num_nodes * 2)..-1]
375
+ @weight_h = @rnn.weight.data[true, (num_nodes * 2)..-1]
376
+ @weight2_h = @rnn.weight2.data[true, (num_nodes * 2)..-1]
377
+ bias_h = @rnn.bias.data[(num_nodes * 2)..-1]
383
378
  @tanh_h = @tanh.forward(x.dot(@weight_h) + (h * @reset).dot(@weight2_h) + bias_h)
384
379
  h2 = (1 - @update) * h + @update * @tanh_h
385
380
  h2
@@ -404,16 +399,16 @@ module DNN
404
399
  dh += da.dot(@weight2_a.transpose)
405
400
  dbias_a = da.sum(0)
406
401
 
407
- @rnn.grads[:weight] += Xumo::SFloat.hstack([dweight_a, dweight_h])
408
- @rnn.grads[:weight2] += Xumo::SFloat.hstack([dweight2_a, dweight2_h])
402
+ @rnn.weight.grad += Xumo::SFloat.hstack([dweight_a, dweight_h])
403
+ @rnn.weight2.grad += Xumo::SFloat.hstack([dweight2_a, dweight2_h])
409
404
  if @rnn.l1_lambda > 0
410
- @rnn.grads[:weight] += dlasso
411
- @rnn.grads[:weight2] += dlasso2
405
+ @rnn.weight.grad += dlasso
406
+ @rnn.weight2.grad += dlasso2
412
407
  elsif @rnn.l2_lambda > 0
413
- @rnn.grads[:weight] += dridge
414
- @rnn.grads[:weight2] += dridge2
408
+ @rnn.weight.grad += dridge
409
+ @rnn.weight2.grad += dridge2
415
410
  end
416
- @rnn.grads[:bias] += Xumo::SFloat.hstack([dbias_a, dbias_h])
411
+ @rnn.bias.grad += Xumo::SFloat.hstack([dbias_a, dbias_h])
417
412
  [dx, dh]
418
413
  end
419
414
  end
@@ -428,7 +423,6 @@ module DNN
428
423
  bias_initializer: Util.load_hash(hash[:bias_initializer]),
429
424
  l1_lambda: hash[:l1_lambda],
430
425
  l2_lambda: hash[:l2_lambda])
431
- gru.h = Xumo::SFloat.cast(hash[:h])
432
426
  gru
433
427
  end
434
428
 
@@ -447,12 +441,12 @@ module DNN
447
441
  def init_params
448
442
  super()
449
443
  num_prev_nodes = prev_layer.shape[1]
450
- @params[:weight] = Xumo::SFloat.new(num_prev_nodes, @num_nodes * 3)
451
- @params[:weight2] = Xumo::SFloat.new(@num_nodes, @num_nodes * 3)
452
- @params[:bias] = Xumo::SFloat.new(@num_nodes * 3)
453
- @weight_initializer.init_param(self, :weight)
454
- @weight_initializer.init_param(self, :weight2)
455
- @bias_initializer.init_param(self, :bias)
444
+ @weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes * 3)
445
+ @weight2.data = Xumo::SFloat.new(@num_nodes, @num_nodes * 3)
446
+ @bias.data = Xumo::SFloat.new(@num_nodes * 3)
447
+ @weight_initializer.init_param(@weight)
448
+ @weight_initializer.init_param(@weight2)
449
+ @bias_initializer.init_param(@bias)
456
450
  @time_length.times do |t|
457
451
  @layers << GRU_Dense.new(self)
458
452
  end
@@ -1,3 +1,3 @@
1
1
  module DNN
2
- VERSION = "0.7.3"
2
+ VERSION = "0.8.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-dnn
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.3
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - unagiootoro
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-12-17 00:00:00.000000000 Z
11
+ date: 2019-01-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -111,6 +111,7 @@ files:
111
111
  - lib/dnn/core/error.rb
112
112
  - lib/dnn/core/initializers.rb
113
113
  - lib/dnn/core/layers.rb
114
+ - lib/dnn/core/learning_param.rb
114
115
  - lib/dnn/core/model.rb
115
116
  - lib/dnn/core/optimizers.rb
116
117
  - lib/dnn/core/rnn_layers.rb
@@ -145,8 +146,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
145
146
  - !ruby/object:Gem::Version
146
147
  version: '0'
147
148
  requirements: []
148
- rubyforge_project:
149
- rubygems_version: 2.7.6
149
+ rubygems_version: 3.0.1
150
150
  signing_key:
151
151
  specification_version: 4
152
152
  summary: ruby deep learning library.