ruby-dnn 0.7.3 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/API-Reference.ja.md +8 -12
- data/lib/dnn.rb +1 -0
- data/lib/dnn/core/cnn_layers.rb +12 -12
- data/lib/dnn/core/initializers.rb +14 -15
- data/lib/dnn/core/layers.rb +39 -40
- data/lib/dnn/core/learning_param.rb +9 -0
- data/lib/dnn/core/model.rb +10 -4
- data/lib/dnn/core/optimizers.rb +41 -48
- data/lib/dnn/core/rnn_layers.rb +82 -88
- data/lib/dnn/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 308a84624d71cb5b27d4b72f1ea69880dbfebf226ac9a5b44cf5775cc7e22703
|
4
|
+
data.tar.gz: baa0b39dcca002f79eb660129cf0042dfda7d4768e9b2067d7135ce877047770
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a98278bb5cfd211bcf1231a4f5b0f13fa9d7ee4684d0334a6d2052d77b405d1873a65bd11d1f2df2f1f29926de484d4d0a7cbd8ef8d88cb4735923904eeb91fd
|
7
|
+
data.tar.gz: 06d52f9698bf600e0bb63c7cc703697d3bb1ec9827871e6dd595a7fdd17cf3e63980d8abe376c9418db9412309b5aed39187c8b988388fd2bbaf09afc4702aa7
|
data/API-Reference.ja.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
ruby-dnnのAPIリファレンスです。このリファレンスでは、APIを利用するうえで必要となるクラスとメソッドしか記載していません。
|
3
3
|
そのため、プログラムの詳細が必要な場合は、ソースコードを参照してください。
|
4
4
|
|
5
|
-
最終更新バージョン:0.
|
5
|
+
最終更新バージョン:0.8.0
|
6
6
|
|
7
7
|
# module DNN
|
8
8
|
ruby-dnnの名前空間をなすモジュールです。
|
@@ -299,7 +299,7 @@ Hash
|
|
299
299
|
## def initialize(dim_or_shape)
|
300
300
|
コンストラクタ
|
301
301
|
### arguments
|
302
|
-
* Integer|Array dim_or_shape
|
302
|
+
* Integer | Array dim_or_shape
|
303
303
|
入力層のdimentionまたはshapeを指定します。引数がIntegerだとdimentionとみなし、Arrayだとshapeとみなします。
|
304
304
|
|
305
305
|
|
@@ -600,15 +600,11 @@ Numo::SFloat y
|
|
600
600
|
|
601
601
|
## 【Instance methods】
|
602
602
|
|
603
|
-
## def initialize(momentum: 0.9
|
603
|
+
## def initialize(momentum: 0.9
|
604
604
|
コンストラクタ。
|
605
605
|
### arguments
|
606
606
|
* Float momenum: 0.9
|
607
607
|
推論時に使用する平均と分散を求めるための指数平均移動の係数。
|
608
|
-
* Float running_mean: nil
|
609
|
-
推論時に使用する平均。
|
610
|
-
* Float running_var: nil
|
611
|
-
推論時に使用する分散。
|
612
608
|
|
613
609
|
|
614
610
|
# module Activations
|
@@ -745,11 +741,11 @@ Float max
|
|
745
741
|
一様分布の最大値。
|
746
742
|
|
747
743
|
## 【Instance methods】
|
748
|
-
## def initialize(min = -0.
|
744
|
+
## def initialize(min = -0.05, max = 0.05)
|
749
745
|
### arguments
|
750
|
-
* Float min = -0.
|
746
|
+
* Float min = -0.05
|
751
747
|
一様分布の最小値。
|
752
|
-
* Float max = 0.
|
748
|
+
* Float max = 0.05
|
753
749
|
一様分布の最大値。
|
754
750
|
|
755
751
|
|
@@ -782,8 +778,8 @@ Float learning_rate
|
|
782
778
|
* Float learning_rate
|
783
779
|
Optimizerの学習率。
|
784
780
|
|
785
|
-
## abstruct def update(
|
786
|
-
|
781
|
+
## abstruct def update(params)
|
782
|
+
paramsが持つ全ての学習パラメータにおいて、gradを元に、dataを更新します。全てのOptimizerを継承するクラスは、このメソッドを実装する必要があります。
|
787
783
|
### arguments
|
788
784
|
* Layer layer
|
789
785
|
paramsを更新するレイヤー。
|
data/lib/dnn.rb
CHANGED
@@ -12,6 +12,7 @@ module DNN; end
|
|
12
12
|
require_relative "dnn/version"
|
13
13
|
require_relative "dnn/core/error"
|
14
14
|
require_relative "dnn/core/model"
|
15
|
+
require_relative "dnn/core/learning_param"
|
15
16
|
require_relative "dnn/core/initializers"
|
16
17
|
require_relative "dnn/core/layers"
|
17
18
|
require_relative "dnn/core/activations"
|
data/lib/dnn/core/cnn_layers.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module DNN
|
2
2
|
module Layers
|
3
|
-
#This module is used for convolution.
|
3
|
+
# This module is used for convolution.
|
4
4
|
module Conv2DModule
|
5
5
|
private
|
6
6
|
|
@@ -107,20 +107,20 @@ module DNN
|
|
107
107
|
x = padding(x, @pad) if @padding
|
108
108
|
@x_shape = x.shape
|
109
109
|
@col = im2col(x, *@out_size, *@filter_size, @strides)
|
110
|
-
out = @col.dot(@
|
110
|
+
out = @col.dot(@weight.data) + @bias.data
|
111
111
|
out.reshape(x.shape[0], *@out_size, out.shape[3])
|
112
112
|
end
|
113
113
|
|
114
114
|
def backward(dout)
|
115
115
|
dout = dout.reshape(dout.shape[0..2].reduce(:*), dout.shape[3])
|
116
|
-
@
|
116
|
+
@weight.grad = @col.transpose.dot(dout)
|
117
117
|
if @l1_lambda > 0
|
118
|
-
@
|
118
|
+
@weight.grad += dlasso
|
119
119
|
elsif @l2_lambda > 0
|
120
|
-
@
|
120
|
+
@weight.grad += dridge
|
121
121
|
end
|
122
|
-
@
|
123
|
-
dcol = dout.dot(@
|
122
|
+
@bias.grad = dout.sum(0)
|
123
|
+
dcol = dout.dot(@weight.data.transpose)
|
124
124
|
dx = col2im(dcol, @x_shape, *@out_size, *@filter_size, @strides)
|
125
125
|
@padding ? back_padding(dx, @pad) : dx
|
126
126
|
end
|
@@ -140,13 +140,14 @@ module DNN
|
|
140
140
|
|
141
141
|
def init_params
|
142
142
|
num_prev_filter = prev_layer.shape[2]
|
143
|
-
@
|
144
|
-
@
|
143
|
+
@weight.data = Xumo::SFloat.new(num_prev_filter * @filter_size.reduce(:*), @num_filters)
|
144
|
+
@bias.data = Xumo::SFloat.new(@num_filters)
|
145
145
|
super()
|
146
146
|
end
|
147
147
|
end
|
148
148
|
|
149
|
-
|
149
|
+
|
150
|
+
# Super class of all pooling2D class.
|
150
151
|
class Pool2D < Layer
|
151
152
|
include Conv2DModule
|
152
153
|
|
@@ -185,8 +186,7 @@ module DNN
|
|
185
186
|
end
|
186
187
|
|
187
188
|
def to_hash
|
188
|
-
super({
|
189
|
-
pool_height: @pool_height,
|
189
|
+
super({pool_size: @pool_size,
|
190
190
|
strides: @strides,
|
191
191
|
padding: @padding})
|
192
192
|
end
|
@@ -2,9 +2,8 @@ module DNN
|
|
2
2
|
module Initializers
|
3
3
|
|
4
4
|
class Initializer
|
5
|
-
|
6
|
-
|
7
|
-
end
|
5
|
+
# Classes that inherit from this class must implement this method.
|
6
|
+
# def init_param(param) end
|
8
7
|
|
9
8
|
def to_hash(merge_hash = nil)
|
10
9
|
hash = {class: self.class.name}
|
@@ -15,8 +14,8 @@ module DNN
|
|
15
14
|
|
16
15
|
|
17
16
|
class Zeros < Initializer
|
18
|
-
def init_param(
|
19
|
-
|
17
|
+
def init_param(param)
|
18
|
+
param.data = param.data.fill(0)
|
20
19
|
end
|
21
20
|
end
|
22
21
|
|
@@ -34,8 +33,8 @@ module DNN
|
|
34
33
|
@std = std
|
35
34
|
end
|
36
35
|
|
37
|
-
def init_param(
|
38
|
-
|
36
|
+
def init_param(param)
|
37
|
+
param.data = param.data.rand_norm(@mean, @std)
|
39
38
|
end
|
40
39
|
|
41
40
|
def to_hash
|
@@ -57,8 +56,8 @@ module DNN
|
|
57
56
|
@max = max
|
58
57
|
end
|
59
58
|
|
60
|
-
def init_param(
|
61
|
-
|
59
|
+
def init_param(param)
|
60
|
+
param.data = param.data.rand(@min, @max)
|
62
61
|
end
|
63
62
|
|
64
63
|
def to_hash
|
@@ -68,17 +67,17 @@ module DNN
|
|
68
67
|
|
69
68
|
|
70
69
|
class Xavier < Initializer
|
71
|
-
def init_param(
|
72
|
-
num_prev_nodes = layer.prev_layer.shape.reduce(:*)
|
73
|
-
|
70
|
+
def init_param(param)
|
71
|
+
num_prev_nodes = param.layer.prev_layer.shape.reduce(:*)
|
72
|
+
param.data = param.data.rand_norm / Math.sqrt(num_prev_nodes)
|
74
73
|
end
|
75
74
|
end
|
76
75
|
|
77
76
|
|
78
77
|
class He < Initializer
|
79
|
-
def init_param(
|
80
|
-
num_prev_nodes = layer.prev_layer.shape.reduce(:*)
|
81
|
-
|
78
|
+
def init_param(param)
|
79
|
+
num_prev_nodes = param.layer.prev_layer.shape.reduce(:*)
|
80
|
+
param.data = param.data.rand_norm / Math.sqrt(num_prev_nodes) * Math.sqrt(2)
|
82
81
|
end
|
83
82
|
end
|
84
83
|
|
data/lib/dnn/core/layers.rb
CHANGED
@@ -49,12 +49,10 @@ module DNN
|
|
49
49
|
class HasParamLayer < Layer
|
50
50
|
attr_accessor :trainable # Setting false prevents learning of parameters.
|
51
51
|
attr_reader :params # The parameters of the layer.
|
52
|
-
attr_reader :grads # Differential value of parameter of layer.
|
53
52
|
|
54
53
|
def initialize
|
55
|
-
super
|
54
|
+
super()
|
56
55
|
@params = {}
|
57
|
-
@grads = {}
|
58
56
|
@trainable = true
|
59
57
|
end
|
60
58
|
|
@@ -68,12 +66,13 @@ module DNN
|
|
68
66
|
|
69
67
|
# Update the parameters.
|
70
68
|
def update
|
71
|
-
@model.optimizer.update(
|
69
|
+
@model.optimizer.update(@params) if @trainable
|
72
70
|
end
|
73
71
|
|
74
72
|
private
|
75
73
|
|
76
74
|
# Initialize of the parameters.
|
75
|
+
# Classes that inherit from this class must implement this method.
|
77
76
|
def init_params() end
|
78
77
|
end
|
79
78
|
|
@@ -120,11 +119,13 @@ module DNN
|
|
120
119
|
@bias_initializer = (bias_initializer || Zeros.new)
|
121
120
|
@l1_lambda = l1_lambda
|
122
121
|
@l2_lambda = l2_lambda
|
122
|
+
@params[:weight] = @weight = LearningParam.new(self)
|
123
|
+
@params[:bias] = @bias = LearningParam.new(self)
|
123
124
|
end
|
124
125
|
|
125
126
|
def lasso
|
126
127
|
if @l1_lambda > 0
|
127
|
-
@l1_lambda * @
|
128
|
+
@l1_lambda * @weight.data.abs.sum
|
128
129
|
else
|
129
130
|
0
|
130
131
|
end
|
@@ -132,20 +133,20 @@ module DNN
|
|
132
133
|
|
133
134
|
def ridge
|
134
135
|
if @l2_lambda > 0
|
135
|
-
0.5 * @l2_lambda * (@
|
136
|
+
0.5 * @l2_lambda * (@weight.data**2).sum
|
136
137
|
else
|
137
138
|
0
|
138
139
|
end
|
139
140
|
end
|
140
141
|
|
141
142
|
def dlasso
|
142
|
-
dlasso = Xumo::SFloat.ones(*@
|
143
|
-
dlasso[@
|
143
|
+
dlasso = Xumo::SFloat.ones(*@weight.data.shape)
|
144
|
+
dlasso[@weight.data < 0] = -1
|
144
145
|
@l1_lambda * dlasso
|
145
146
|
end
|
146
147
|
|
147
148
|
def dridge
|
148
|
-
@l2_lambda * @
|
149
|
+
@l2_lambda * @weight.data
|
149
150
|
end
|
150
151
|
|
151
152
|
def to_hash(merge_hash)
|
@@ -158,8 +159,8 @@ module DNN
|
|
158
159
|
private
|
159
160
|
|
160
161
|
def init_params
|
161
|
-
@weight_initializer.init_param(
|
162
|
-
@bias_initializer.init_param(
|
162
|
+
@weight_initializer.init_param(@weight)
|
163
|
+
@bias_initializer.init_param(@bias)
|
163
164
|
end
|
164
165
|
end
|
165
166
|
|
@@ -187,18 +188,18 @@ module DNN
|
|
187
188
|
|
188
189
|
def forward(x)
|
189
190
|
@x = x
|
190
|
-
@x.dot(@
|
191
|
+
@x.dot(@weight.data) + @bias.data
|
191
192
|
end
|
192
193
|
|
193
194
|
def backward(dout)
|
194
|
-
@
|
195
|
+
@weight.grad = @x.transpose.dot(dout)
|
195
196
|
if @l1_lambda > 0
|
196
|
-
@
|
197
|
+
@weight.grad += dlasso
|
197
198
|
elsif @l2_lambda > 0
|
198
|
-
@
|
199
|
+
@weight.grad += dridge
|
199
200
|
end
|
200
|
-
@
|
201
|
-
dout.dot(@
|
201
|
+
@bias.grad = dout.sum(0)
|
202
|
+
dout.dot(@weight.data.transpose)
|
202
203
|
end
|
203
204
|
|
204
205
|
def shape
|
@@ -213,8 +214,8 @@ module DNN
|
|
213
214
|
|
214
215
|
def init_params
|
215
216
|
num_prev_nodes = prev_layer.shape[0]
|
216
|
-
@
|
217
|
-
@
|
217
|
+
@weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes)
|
218
|
+
@bias.data = Xumo::SFloat.new(@num_nodes)
|
218
219
|
super()
|
219
220
|
end
|
220
221
|
end
|
@@ -317,22 +318,22 @@ module DNN
|
|
317
318
|
attr_reader :momentum
|
318
319
|
|
319
320
|
def self.load_hash(hash)
|
320
|
-
|
321
|
-
running_var = Xumo::SFloat.cast(hash[:running_var])
|
322
|
-
self.new(momentum: hash[:momentum], running_mean: running_mean, running_var: running_var)
|
321
|
+
self.new(momentum: hash[:momentum])
|
323
322
|
end
|
324
323
|
|
325
|
-
def initialize(momentum: 0.9
|
324
|
+
def initialize(momentum: 0.9)
|
326
325
|
super()
|
327
326
|
@momentum = momentum
|
328
|
-
@
|
329
|
-
@
|
327
|
+
@params[:gamma] = @gamma = LearningParam.new(self)
|
328
|
+
@params[:beta] = @beta = LearningParam.new(self)
|
329
|
+
@params[:running_mean] = nil
|
330
|
+
@params[:running_var] = nil
|
330
331
|
end
|
331
332
|
|
332
333
|
def build(model)
|
333
334
|
super
|
334
|
-
@running_mean ||= Xumo::SFloat.zeros(*shape)
|
335
|
-
@running_var ||= Xumo::SFloat.zeros(*shape)
|
335
|
+
@params[:running_mean] ||= Xumo::SFloat.zeros(*shape)
|
336
|
+
@params[:running_var] ||= Xumo::SFloat.zeros(*shape)
|
336
337
|
end
|
337
338
|
|
338
339
|
def forward(x)
|
@@ -343,20 +344,20 @@ module DNN
|
|
343
344
|
@std = Xumo::NMath.sqrt(var + 1e-7)
|
344
345
|
xn = @xc / @std
|
345
346
|
@xn = xn
|
346
|
-
@running_mean = @momentum * @running_mean + (1 - @momentum) * mean
|
347
|
-
@running_var = @momentum * @running_var + (1 - @momentum) * var
|
347
|
+
@params[:running_mean] = @momentum * @params[:running_mean] + (1 - @momentum) * mean
|
348
|
+
@params[:running_var] = @momentum * @params[:running_var] + (1 - @momentum) * var
|
348
349
|
else
|
349
|
-
xc = x - @running_mean
|
350
|
-
xn = xc / Xumo::NMath.sqrt(@running_var + 1e-7)
|
350
|
+
xc = x - @params[:running_mean]
|
351
|
+
xn = xc / Xumo::NMath.sqrt(@params[:running_var] + 1e-7)
|
351
352
|
end
|
352
|
-
@
|
353
|
+
@gamma.data * xn + @beta.data
|
353
354
|
end
|
354
355
|
|
355
356
|
def backward(dout)
|
356
357
|
batch_size = dout.shape[0]
|
357
|
-
@
|
358
|
-
@
|
359
|
-
dxn = @
|
358
|
+
@beta.grad = dout.sum(0)
|
359
|
+
@gamma.grad = (@xn * dout).sum(0)
|
360
|
+
dxn = @gamma.data * dout
|
360
361
|
dxc = dxn / @std
|
361
362
|
dstd = -((dxn * @xc) / (@std**2)).sum(0)
|
362
363
|
dvar = 0.5 * dstd / @std
|
@@ -366,16 +367,14 @@ module DNN
|
|
366
367
|
end
|
367
368
|
|
368
369
|
def to_hash
|
369
|
-
super({momentum: @momentum
|
370
|
-
running_mean: @running_mean.to_a,
|
371
|
-
running_var: @running_var.to_a})
|
370
|
+
super({momentum: @momentum})
|
372
371
|
end
|
373
372
|
|
374
373
|
private
|
375
374
|
|
376
375
|
def init_params
|
377
|
-
@
|
378
|
-
@
|
376
|
+
@gamma.data = Xumo::SFloat.ones(*shape)
|
377
|
+
@beta.data = Xumo::SFloat.zeros(*shape)
|
379
378
|
end
|
380
379
|
end
|
381
380
|
end
|
data/lib/dnn/core/model.rb
CHANGED
@@ -35,7 +35,12 @@ module DNN
|
|
35
35
|
hash_params = has_param_layers_params[has_param_layers_index]
|
36
36
|
hash_params.each do |key, (shape, base64_param)|
|
37
37
|
bin = Base64.decode64(base64_param)
|
38
|
-
|
38
|
+
data = Xumo::SFloat.from_binary(bin).reshape(*shape)
|
39
|
+
if layer.params[key].is_a?(LearningParam)
|
40
|
+
layer.params[key].data = data
|
41
|
+
else
|
42
|
+
layer.params[key] = data
|
43
|
+
end
|
39
44
|
end
|
40
45
|
has_param_layers_index += 1
|
41
46
|
end
|
@@ -55,13 +60,14 @@ module DNN
|
|
55
60
|
def to_json
|
56
61
|
hash_layers = @layers.map { |layer| layer.to_hash }
|
57
62
|
hash = {version: VERSION, layers: hash_layers, optimizer: @optimizer.to_hash}
|
58
|
-
JSON.
|
63
|
+
JSON.pretty_generate(hash)
|
59
64
|
end
|
60
65
|
|
61
66
|
def params_to_json
|
62
|
-
has_param_layers = @layers.select { |layer| layer.is_a?(HasParamLayer) }
|
67
|
+
has_param_layers = @layers.select { |layer| layer.is_a?(Layers::HasParamLayer) }
|
63
68
|
has_param_layers_params = has_param_layers.map do |layer|
|
64
69
|
layer.params.map { |key, param|
|
70
|
+
param = param.data if param.is_a?(LearningParam)
|
65
71
|
base64_param = Base64.encode64(param.to_binary)
|
66
72
|
[key, [param.shape, base64_param]]
|
67
73
|
}.to_h
|
@@ -160,7 +166,7 @@ module DNN
|
|
160
166
|
loss_value
|
161
167
|
end
|
162
168
|
|
163
|
-
def accurate(x, y, batch_size =
|
169
|
+
def accurate(x, y, batch_size = 100, &batch_proc)
|
164
170
|
input_data_shape_check(x, y)
|
165
171
|
batch_size = batch_size >= x.shape[0] ? x.shape[0] : batch_size
|
166
172
|
correct = 0
|
data/lib/dnn/core/optimizers.rb
CHANGED
@@ -9,8 +9,9 @@ module DNN
|
|
9
9
|
@learning_rate = learning_rate
|
10
10
|
end
|
11
11
|
|
12
|
-
# Update
|
13
|
-
|
12
|
+
# Update params.
|
13
|
+
# Classes that inherit from this class must implement this method.
|
14
|
+
# def update(params) end
|
14
15
|
|
15
16
|
def to_hash(merge_hash = nil)
|
16
17
|
hash = {class: self.class.name, learning_rate: @learning_rate}
|
@@ -33,16 +34,15 @@ module DNN
|
|
33
34
|
@v = {}
|
34
35
|
end
|
35
36
|
|
36
|
-
def update(
|
37
|
-
|
38
|
-
|
39
|
-
amount = layer.grads[key] * @learning_rate
|
37
|
+
def update(params)
|
38
|
+
params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
|
39
|
+
amount = param.grad * @learning_rate
|
40
40
|
if @momentum > 0
|
41
|
-
@v[
|
42
|
-
amount += @momentum * @v[
|
43
|
-
@v[
|
41
|
+
@v[param] ||= 0
|
42
|
+
amount += @momentum * @v[param]
|
43
|
+
@v[param] = amount
|
44
44
|
end
|
45
|
-
|
45
|
+
param.data -= amount
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
@@ -61,13 +61,12 @@ module DNN
|
|
61
61
|
super(learning_rate, momentum: momentum)
|
62
62
|
end
|
63
63
|
|
64
|
-
def update(
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
layer.params[key] = (layer.params[key] + @momentum**2 * @v[layer][key]) - (1 + @momentum) * amount
|
64
|
+
def update(params)
|
65
|
+
params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
|
66
|
+
@v[param] ||= 0
|
67
|
+
amount = param.grad * @learning_rate
|
68
|
+
@v[param] = @v[param] * @momentum - amount
|
69
|
+
param.data = (param.data + @momentum**2 * @v[param]) - (1 + @momentum) * amount
|
71
70
|
end
|
72
71
|
end
|
73
72
|
end
|
@@ -83,12 +82,11 @@ module DNN
|
|
83
82
|
self.new(hash[:learning_rate])
|
84
83
|
end
|
85
84
|
|
86
|
-
def update(
|
87
|
-
|
88
|
-
|
89
|
-
@g[
|
90
|
-
@g[
|
91
|
-
layer.params[key] -= (@learning_rate / Xumo::NMath.sqrt(@g[layer][key] + 1e-7)) * layer.grads[key]
|
85
|
+
def update(params)
|
86
|
+
params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
|
87
|
+
@g[param] ||= 0
|
88
|
+
@g[param] += param.grad**2
|
89
|
+
param.data -= (@learning_rate / Xumo::NMath.sqrt(@g[param] + 1e-7)) * param.grad
|
92
90
|
end
|
93
91
|
end
|
94
92
|
end
|
@@ -107,12 +105,11 @@ module DNN
|
|
107
105
|
@g = {}
|
108
106
|
end
|
109
107
|
|
110
|
-
def update(
|
111
|
-
|
112
|
-
|
113
|
-
@g[
|
114
|
-
|
115
|
-
layer.params[key] -= (@learning_rate / Xumo::NMath.sqrt(@g[layer][key] + 1e-7)) * layer.grads[key]
|
108
|
+
def update(params)
|
109
|
+
params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
|
110
|
+
@g[param] ||= 0
|
111
|
+
@g[param] = @alpha * @g[param] + (1 - @alpha) * param.grad**2
|
112
|
+
param.data -= (@learning_rate / Xumo::NMath.sqrt(@g[param] + 1e-7)) * param.grad
|
116
113
|
end
|
117
114
|
end
|
118
115
|
|
@@ -136,16 +133,14 @@ module DNN
|
|
136
133
|
@s = {}
|
137
134
|
end
|
138
135
|
|
139
|
-
def update(
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
@h[
|
144
|
-
@s[
|
145
|
-
@
|
146
|
-
|
147
|
-
@s[layer][key] = @rho * @s[layer][key] + (1 - @rho) * v**2
|
148
|
-
layer.params[key] -= v
|
136
|
+
def update(params)
|
137
|
+
params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
|
138
|
+
@h[param] ||= Xumo::SFloat.zeros(*param.data.shape)
|
139
|
+
@s[param] ||= Xumo::SFloat.zeros(*param.data.shape)
|
140
|
+
@h[param] = @rho * @h[param] + (1 - @rho) * param.grad**2
|
141
|
+
v = (Xumo::NMath.sqrt(@s[param] + 1e-6) / Xumo::NMath.sqrt(@h[param] + 1e-6)) * param.grad
|
142
|
+
@s[param] = @rho * @s[param] + (1 - @rho) * v**2
|
143
|
+
param.data -= v
|
149
144
|
end
|
150
145
|
end
|
151
146
|
|
@@ -172,17 +167,15 @@ module DNN
|
|
172
167
|
@v = {}
|
173
168
|
end
|
174
169
|
|
175
|
-
def update(
|
170
|
+
def update(params)
|
176
171
|
@iter += 1
|
177
|
-
@m[layer] ||= {}
|
178
|
-
@v[layer] ||= {}
|
179
172
|
lr = @learning_rate * Math.sqrt(1 - @beta2**@iter) / (1 - @beta1**@iter)
|
180
|
-
|
181
|
-
@m[
|
182
|
-
@v[
|
183
|
-
@m[
|
184
|
-
@v[
|
185
|
-
|
173
|
+
params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
|
174
|
+
@m[param] ||= 0
|
175
|
+
@v[param] ||= 0
|
176
|
+
@m[param] += (1 - @beta1) * (param.grad - @m[param])
|
177
|
+
@v[param] += (1 - @beta2) * (param.grad**2 - @v[param])
|
178
|
+
param.data -= lr * @m[param] / Xumo::NMath.sqrt(@v[param] + 1e-7)
|
186
179
|
end
|
187
180
|
end
|
188
181
|
|
data/lib/dnn/core/rnn_layers.rb
CHANGED
@@ -5,9 +5,11 @@ module DNN
|
|
5
5
|
class RNN < Connection
|
6
6
|
include Activations
|
7
7
|
|
8
|
-
attr_accessor :h
|
9
8
|
attr_reader :num_nodes
|
10
9
|
attr_reader :stateful
|
10
|
+
attr_reader :weight
|
11
|
+
attr_reader :weight2
|
12
|
+
attr_reader :bias
|
11
13
|
|
12
14
|
def initialize(num_nodes,
|
13
15
|
stateful: false,
|
@@ -22,26 +24,27 @@ module DNN
|
|
22
24
|
@stateful = stateful
|
23
25
|
@return_sequences = return_sequences
|
24
26
|
@layers = []
|
25
|
-
@h = nil
|
27
|
+
@params[:h] = nil
|
28
|
+
@params[:weight2] = @weight2 = LearningParam.new(self)
|
26
29
|
end
|
27
30
|
|
28
31
|
def forward(xs)
|
29
32
|
@xs_shape = xs.shape
|
30
33
|
hs = Xumo::SFloat.zeros(xs.shape[0], @time_length, @num_nodes)
|
31
|
-
h = (@stateful && @h) ? @h : Xumo::SFloat.zeros(xs.shape[0], @num_nodes)
|
34
|
+
h = (@stateful && @params[:h]) ? @params[:h] : Xumo::SFloat.zeros(xs.shape[0], @num_nodes)
|
32
35
|
xs.shape[1].times do |t|
|
33
36
|
x = xs[true, t, false]
|
34
37
|
h = @layers[t].forward(x, h)
|
35
38
|
hs[true, t, false] = h
|
36
39
|
end
|
37
|
-
@h = h
|
40
|
+
@params[:h] = h
|
38
41
|
@return_sequences ? hs : h
|
39
42
|
end
|
40
43
|
|
41
44
|
def backward(dh2s)
|
42
|
-
@
|
43
|
-
@
|
44
|
-
@
|
45
|
+
@weight.grad = Xumo::SFloat.zeros(*@weight.data.shape)
|
46
|
+
@weight2.grad = Xumo::SFloat.zeros(*@weight2.data.shape)
|
47
|
+
@bias.grad = Xumo::SFloat.zeros(*@bias.data.shape)
|
45
48
|
unless @return_sequences
|
46
49
|
dh = dh2s
|
47
50
|
dh2s = Xumo::SFloat.zeros(dh.shape[0], @time_length, dh.shape[1])
|
@@ -61,8 +64,7 @@ module DNN
|
|
61
64
|
hash = {
|
62
65
|
num_nodes: @num_nodes,
|
63
66
|
stateful: @stateful,
|
64
|
-
return_sequences: @return_sequences
|
65
|
-
h: @h.to_a
|
67
|
+
return_sequences: @return_sequences
|
66
68
|
}
|
67
69
|
hash.merge!(merge_hash) if merge_hash
|
68
70
|
super(hash)
|
@@ -73,12 +75,12 @@ module DNN
|
|
73
75
|
end
|
74
76
|
|
75
77
|
def reset_state
|
76
|
-
@h = @h.fill(0) if @h
|
78
|
+
@params[:h] = @params[:h].fill(0) if @params[:h]
|
77
79
|
end
|
78
80
|
|
79
81
|
def lasso
|
80
82
|
if @l1_lambda > 0
|
81
|
-
@l1_lambda * (@
|
83
|
+
@l1_lambda * (@weight.data.abs.sum + @weight2.data.abs.sum)
|
82
84
|
else
|
83
85
|
0
|
84
86
|
end
|
@@ -86,30 +88,30 @@ module DNN
|
|
86
88
|
|
87
89
|
def ridge
|
88
90
|
if @l2_lambda > 0
|
89
|
-
0.5 * (@l2_lambda * ((@
|
91
|
+
0.5 * (@l2_lambda * ((@weight.data**2).sum + (@weight2.data**2).sum))
|
90
92
|
else
|
91
93
|
0
|
92
94
|
end
|
93
95
|
end
|
94
96
|
|
95
97
|
def dlasso
|
96
|
-
dlasso = Xumo::SFloat.ones(*@
|
97
|
-
dlasso[@
|
98
|
+
dlasso = Xumo::SFloat.ones(*@weight.data.shape)
|
99
|
+
dlasso[@weight.data < 0] = -1
|
98
100
|
@l1_lambda * dlasso
|
99
101
|
end
|
100
102
|
|
101
103
|
def dridge
|
102
|
-
@l2_lambda * @
|
104
|
+
@l2_lambda * @weight.data
|
103
105
|
end
|
104
106
|
|
105
107
|
def dlasso2
|
106
|
-
dlasso = Xumo::SFloat.ones(*@
|
107
|
-
dlasso[@
|
108
|
+
dlasso = Xumo::SFloat.ones(*@weight2.data.shape)
|
109
|
+
dlasso[@weight2.data < 0] = -1
|
108
110
|
@l1_lambda * dlasso
|
109
111
|
end
|
110
112
|
|
111
113
|
def dridge2
|
112
|
-
@l2_lambda * @
|
114
|
+
@l2_lambda * @weight2.data
|
113
115
|
end
|
114
116
|
|
115
117
|
private
|
@@ -129,30 +131,32 @@ module DNN
|
|
129
131
|
def forward(x, h)
|
130
132
|
@x = x
|
131
133
|
@h = h
|
132
|
-
h2 = x.dot(@rnn.
|
134
|
+
h2 = x.dot(@rnn.weight.data) + h.dot(@rnn.weight2.data) + @rnn.bias.data
|
133
135
|
@activation.forward(h2)
|
134
136
|
end
|
135
137
|
|
136
138
|
def backward(dh2)
|
137
139
|
dh2 = @activation.backward(dh2)
|
138
|
-
@rnn.
|
139
|
-
@rnn.
|
140
|
+
@rnn.weight.grad += @x.transpose.dot(dh2)
|
141
|
+
@rnn.weight2.grad += @h.transpose.dot(dh2)
|
140
142
|
if @rnn.l1_lambda > 0
|
141
|
-
@rnn.
|
142
|
-
@rnn.
|
143
|
+
@rnn.weight.grad += dlasso
|
144
|
+
@rnn.weight2.grad += dlasso2
|
143
145
|
elsif @rnn.l2_lambda > 0
|
144
|
-
@rnn.
|
145
|
-
@
|
146
|
+
@rnn.weight.grad += dridge
|
147
|
+
@rnn.weight2.grad += dridge2
|
146
148
|
end
|
147
|
-
@rnn.
|
148
|
-
dx = dh2.dot(@rnn.
|
149
|
-
dh = dh2.dot(@rnn.
|
149
|
+
@rnn.bias.grad += dh2.sum(0)
|
150
|
+
dx = dh2.dot(@rnn.weight.data.transpose)
|
151
|
+
dh = dh2.dot(@rnn.weight2.data.transpose)
|
150
152
|
[dx, dh]
|
151
153
|
end
|
152
154
|
end
|
153
155
|
|
154
156
|
|
155
157
|
class SimpleRNN < RNN
|
158
|
+
attr_reader :activation
|
159
|
+
|
156
160
|
def self.load_hash(hash)
|
157
161
|
simple_rnn = self.new(hash[:num_nodes],
|
158
162
|
stateful: hash[:stateful],
|
@@ -162,7 +166,6 @@ module DNN
|
|
162
166
|
bias_initializer: Util.load_hash(hash[:bias_initializer]),
|
163
167
|
l1_lambda: hash[:l1_lambda],
|
164
168
|
l2_lambda: hash[:l2_lambda])
|
165
|
-
simple_rnn.h = Xumo::SFloat.cast(hash[:h])
|
166
169
|
simple_rnn
|
167
170
|
end
|
168
171
|
|
@@ -193,12 +196,12 @@ module DNN
|
|
193
196
|
def init_params
|
194
197
|
super()
|
195
198
|
num_prev_nodes = prev_layer.shape[1]
|
196
|
-
@
|
197
|
-
@
|
198
|
-
@
|
199
|
-
@weight_initializer.init_param(
|
200
|
-
@weight_initializer.init_param(
|
201
|
-
@bias_initializer.init_param(
|
199
|
+
@weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes)
|
200
|
+
@weight2.data = Xumo::SFloat.new(@num_nodes, @num_nodes)
|
201
|
+
@bias.data = Xumo::SFloat.new(@num_nodes)
|
202
|
+
@weight_initializer.init_param(@weight)
|
203
|
+
@weight_initializer.init_param(@weight2)
|
204
|
+
@bias_initializer.init_param(@bias)
|
202
205
|
@time_length.times do |t|
|
203
206
|
@layers << SimpleRNN_Dense.new(self)
|
204
207
|
end
|
@@ -221,7 +224,7 @@ module DNN
|
|
221
224
|
@h = h
|
222
225
|
@c = c
|
223
226
|
num_nodes = h.shape[1]
|
224
|
-
a = x.dot(@rnn.
|
227
|
+
a = x.dot(@rnn.weight.data) + h.dot(@rnn.weight2.data) + @rnn.bias.data
|
225
228
|
|
226
229
|
@forget = @forget_sigmoid.forward(a[true, 0...num_nodes])
|
227
230
|
@g = @g_tanh.forward(a[true, num_nodes...(num_nodes * 2)])
|
@@ -245,18 +248,18 @@ module DNN
|
|
245
248
|
|
246
249
|
da = Xumo::SFloat.hstack([dforget, dg, din, dout])
|
247
250
|
|
248
|
-
@rnn.
|
249
|
-
@rnn.
|
251
|
+
@rnn.weight.grad += @x.transpose.dot(da)
|
252
|
+
@rnn.weight2.grad += @h.transpose.dot(da)
|
250
253
|
if @rnn.l1_lambda > 0
|
251
|
-
@rnn.
|
252
|
-
@rnn.
|
254
|
+
@rnn.weight.grad += dlasso
|
255
|
+
@rnn.weight2.grad += dlasso2
|
253
256
|
elsif @rnn.l2_lambda > 0
|
254
|
-
@rnn.
|
255
|
-
@rnn.
|
257
|
+
@rnn.weight.grad += dridge
|
258
|
+
@rnn.weight2.grad += dridge2
|
256
259
|
end
|
257
|
-
@rnn.
|
258
|
-
dx = da.dot(@rnn.
|
259
|
-
dh = da.dot(@rnn.
|
260
|
+
@rnn.bias.grad += da.sum(0)
|
261
|
+
dx = da.dot(@rnn.weight.data.transpose)
|
262
|
+
dh = da.dot(@rnn.weight2.data.transpose)
|
260
263
|
dc = dc2_tmp * @forget
|
261
264
|
[dx, dh, dc]
|
262
265
|
end
|
@@ -264,8 +267,6 @@ module DNN
|
|
264
267
|
|
265
268
|
|
266
269
|
class LSTM < RNN
|
267
|
-
attr_accessor :c
|
268
|
-
|
269
270
|
def self.load_hash(hash)
|
270
271
|
lstm = self.new(hash[:num_nodes],
|
271
272
|
stateful: hash[:stateful],
|
@@ -274,8 +275,6 @@ module DNN
|
|
274
275
|
bias_initializer: Util.load_hash(hash[:bias_initializer]),
|
275
276
|
l1_lambda: hash[:l1_lambda],
|
276
277
|
l2_lambda: hash[:l2_lambda])
|
277
|
-
lstm.h = Xumo::SFloat.cast(hash[:h])
|
278
|
-
lstm.c = Xumo::SFloat.cast(hash[:c])
|
279
278
|
lstm
|
280
279
|
end
|
281
280
|
|
@@ -287,7 +286,7 @@ module DNN
|
|
287
286
|
l1_lambda: 0,
|
288
287
|
l2_lambda: 0)
|
289
288
|
super
|
290
|
-
@c = nil
|
289
|
+
@params[:c] = nil
|
291
290
|
end
|
292
291
|
|
293
292
|
def forward(xs)
|
@@ -296,8 +295,8 @@ module DNN
|
|
296
295
|
h = nil
|
297
296
|
c = nil
|
298
297
|
if @stateful
|
299
|
-
h = @h if @h
|
300
|
-
c = @c if @c
|
298
|
+
h = @params[:h] if @params[:h]
|
299
|
+
c = @params[:c] if @params[:c]
|
301
300
|
end
|
302
301
|
h ||= Xumo::SFloat.zeros(xs.shape[0], @num_nodes)
|
303
302
|
c ||= Xumo::SFloat.zeros(xs.shape[0], @num_nodes)
|
@@ -306,15 +305,15 @@ module DNN
|
|
306
305
|
h, c = @layers[t].forward(x, h, c)
|
307
306
|
hs[true, t, false] = h
|
308
307
|
end
|
309
|
-
@h = h
|
310
|
-
@c = c
|
308
|
+
@params[:h] = h
|
309
|
+
@params[:c] = c
|
311
310
|
@return_sequences ? hs : h
|
312
311
|
end
|
313
312
|
|
314
313
|
def backward(dh2s)
|
315
|
-
@
|
316
|
-
@
|
317
|
-
@
|
314
|
+
@weight.grad = Xumo::SFloat.zeros(*@weight.data.shape)
|
315
|
+
@weight2.grad = Xumo::SFloat.zeros(*@weight2.data.shape)
|
316
|
+
@bias.grad = Xumo::SFloat.zeros(*@bias.data.shape)
|
318
317
|
unless @return_sequences
|
319
318
|
dh = dh2s
|
320
319
|
dh2s = Xumo::SFloat.zeros(dh.shape[0], @time_length, dh.shape[1])
|
@@ -333,11 +332,7 @@ module DNN
|
|
333
332
|
|
334
333
|
def reset_state
|
335
334
|
super()
|
336
|
-
@c = @c.fill(0) if @c
|
337
|
-
end
|
338
|
-
|
339
|
-
def to_hash
|
340
|
-
super({c: @c.to_a})
|
335
|
+
@params[:c] = @params[:c].fill(0) if @params[:c]
|
341
336
|
end
|
342
337
|
|
343
338
|
private
|
@@ -345,12 +340,12 @@ module DNN
|
|
345
340
|
def init_params
|
346
341
|
super()
|
347
342
|
num_prev_nodes = prev_layer.shape[1]
|
348
|
-
@
|
349
|
-
@
|
350
|
-
@
|
351
|
-
@weight_initializer.init_param(
|
352
|
-
@weight_initializer.init_param(
|
353
|
-
@bias_initializer.init_param(
|
343
|
+
@weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes * 4)
|
344
|
+
@weight2.data = Xumo::SFloat.new(@num_nodes, @num_nodes * 4)
|
345
|
+
@bias.data = Xumo::SFloat.new(@num_nodes * 4)
|
346
|
+
@weight_initializer.init_param(@weight)
|
347
|
+
@weight_initializer.init_param(@weight2)
|
348
|
+
@bias_initializer.init_param(@bias)
|
354
349
|
@time_length.times do |t|
|
355
350
|
@layers << LSTM_Dense.new(self)
|
356
351
|
end
|
@@ -370,16 +365,16 @@ module DNN
|
|
370
365
|
@x = x
|
371
366
|
@h = h
|
372
367
|
num_nodes = h.shape[1]
|
373
|
-
@weight_a = @rnn.
|
374
|
-
@weight2_a = @rnn.
|
375
|
-
bias_a = @rnn.
|
368
|
+
@weight_a = @rnn.weight.data[true, 0...(num_nodes * 2)]
|
369
|
+
@weight2_a = @rnn.weight2.data[true, 0...(num_nodes * 2)]
|
370
|
+
bias_a = @rnn.bias.data[0...(num_nodes * 2)]
|
376
371
|
a = x.dot(@weight_a) + h.dot(@weight2_a) + bias_a
|
377
372
|
@update = @update_sigmoid.forward(a[true, 0...num_nodes])
|
378
373
|
@reset = @reset_sigmoid.forward(a[true, num_nodes..-1])
|
379
374
|
|
380
|
-
@weight_h = @rnn.
|
381
|
-
@weight2_h = @rnn.
|
382
|
-
bias_h = @rnn.
|
375
|
+
@weight_h = @rnn.weight.data[true, (num_nodes * 2)..-1]
|
376
|
+
@weight2_h = @rnn.weight2.data[true, (num_nodes * 2)..-1]
|
377
|
+
bias_h = @rnn.bias.data[(num_nodes * 2)..-1]
|
383
378
|
@tanh_h = @tanh.forward(x.dot(@weight_h) + (h * @reset).dot(@weight2_h) + bias_h)
|
384
379
|
h2 = (1 - @update) * h + @update * @tanh_h
|
385
380
|
h2
|
@@ -404,16 +399,16 @@ module DNN
|
|
404
399
|
dh += da.dot(@weight2_a.transpose)
|
405
400
|
dbias_a = da.sum(0)
|
406
401
|
|
407
|
-
@rnn.
|
408
|
-
@rnn.
|
402
|
+
@rnn.weight.grad += Xumo::SFloat.hstack([dweight_a, dweight_h])
|
403
|
+
@rnn.weight2.grad += Xumo::SFloat.hstack([dweight2_a, dweight2_h])
|
409
404
|
if @rnn.l1_lambda > 0
|
410
|
-
@rnn.
|
411
|
-
@rnn.
|
405
|
+
@rnn.weight.grad += dlasso
|
406
|
+
@rnn.weight2.grad += dlasso2
|
412
407
|
elsif @rnn.l2_lambda > 0
|
413
|
-
@rnn.
|
414
|
-
@rnn.
|
408
|
+
@rnn.weight.grad += dridge
|
409
|
+
@rnn.weight2.grad += dridge2
|
415
410
|
end
|
416
|
-
@rnn.
|
411
|
+
@rnn.bias.grad += Xumo::SFloat.hstack([dbias_a, dbias_h])
|
417
412
|
[dx, dh]
|
418
413
|
end
|
419
414
|
end
|
@@ -428,7 +423,6 @@ module DNN
|
|
428
423
|
bias_initializer: Util.load_hash(hash[:bias_initializer]),
|
429
424
|
l1_lambda: hash[:l1_lambda],
|
430
425
|
l2_lambda: hash[:l2_lambda])
|
431
|
-
gru.h = Xumo::SFloat.cast(hash[:h])
|
432
426
|
gru
|
433
427
|
end
|
434
428
|
|
@@ -447,12 +441,12 @@ module DNN
|
|
447
441
|
def init_params
|
448
442
|
super()
|
449
443
|
num_prev_nodes = prev_layer.shape[1]
|
450
|
-
@
|
451
|
-
@
|
452
|
-
@
|
453
|
-
@weight_initializer.init_param(
|
454
|
-
@weight_initializer.init_param(
|
455
|
-
@bias_initializer.init_param(
|
444
|
+
@weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes * 3)
|
445
|
+
@weight2.data = Xumo::SFloat.new(@num_nodes, @num_nodes * 3)
|
446
|
+
@bias.data = Xumo::SFloat.new(@num_nodes * 3)
|
447
|
+
@weight_initializer.init_param(@weight)
|
448
|
+
@weight_initializer.init_param(@weight2)
|
449
|
+
@bias_initializer.init_param(@bias)
|
456
450
|
@time_length.times do |t|
|
457
451
|
@layers << GRU_Dense.new(self)
|
458
452
|
end
|
data/lib/dnn/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-dnn
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- unagiootoro
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-01-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -111,6 +111,7 @@ files:
|
|
111
111
|
- lib/dnn/core/error.rb
|
112
112
|
- lib/dnn/core/initializers.rb
|
113
113
|
- lib/dnn/core/layers.rb
|
114
|
+
- lib/dnn/core/learning_param.rb
|
114
115
|
- lib/dnn/core/model.rb
|
115
116
|
- lib/dnn/core/optimizers.rb
|
116
117
|
- lib/dnn/core/rnn_layers.rb
|
@@ -145,8 +146,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
145
146
|
- !ruby/object:Gem::Version
|
146
147
|
version: '0'
|
147
148
|
requirements: []
|
148
|
-
|
149
|
-
rubygems_version: 2.7.6
|
149
|
+
rubygems_version: 3.0.1
|
150
150
|
signing_key:
|
151
151
|
specification_version: 4
|
152
152
|
summary: ruby deep learning library.
|