ruby-dnn 0.5.8 → 0.5.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/API-Reference.ja.md +15 -1
- data/lib/dnn/core/model.rb +11 -1
- data/lib/dnn/core/optimizers.rb +21 -0
- data/lib/dnn/core/rnn_layers.rb +160 -0
- data/lib/dnn/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c6e9ccaa1dd5279ca0c9b41ac4ae9573e6ca0ca35c0ce9450bb6e3a9024eae31
|
4
|
+
data.tar.gz: 2c1247ccf2b6a906f39fb5cbbdda9b837e54453c429ee041e8e1736513ff6ed5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 04d4050badafcd44004c92e34fec1a57c8318da220a8372eccc381bed67741b60ae01a95b57eb56a4c70c0591a10ba8ca643bf86399f471dd606acb25503bd99
|
7
|
+
data.tar.gz: 67528cd4ca207086c6e54d8e5bd232b072638a189e57b2fb45433ab7322ce35424b2d5657bb52f31188d867b9163dd4280f8505dbe61c9e11b26e3b8f0b7ffd0
|
data/API-Reference.ja.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
ruby-dnnのAPIリファレンスです。このリファレンスでは、APIを利用するうえで必要となるクラスとメソッドしか記載していません。
|
3
3
|
そのため、プログラムの詳細が必要な場合は、ソースコードを参照してください。
|
4
4
|
|
5
|
-
最終更新バージョン:0.5.
|
5
|
+
最終更新バージョン:0.5.9
|
6
6
|
|
7
7
|
# module DNN
|
8
8
|
ruby-dnnの名前空間をなすモジュールです。
|
@@ -661,6 +661,20 @@ Float momentum
|
|
661
661
|
モーメンタム係数。
|
662
662
|
|
663
663
|
|
664
|
+
# class Nesterov < SGD
|
665
|
+
Nesterovによるオプティマイザです。
|
666
|
+
|
667
|
+
## 【Instance methods】
|
668
|
+
|
669
|
+
## def initialize(learning_rate = 0.01, momentum: 0.9)
|
670
|
+
コンストラクタ。
|
671
|
+
### arguments
|
672
|
+
* Float learning_rate
|
673
|
+
学習率。
|
674
|
+
* Float momentum
|
675
|
+
モーメンタム係数。
|
676
|
+
|
677
|
+
|
664
678
|
# class AdaGrad < Optimizer
|
665
679
|
AdaGradによるオプティマイザです。
|
666
680
|
|
data/lib/dnn/core/model.rb
CHANGED
@@ -102,6 +102,9 @@ module DNN
|
|
102
102
|
verbose: true,
|
103
103
|
batch_proc: nil,
|
104
104
|
&epoch_proc)
|
105
|
+
unless compiled?
|
106
|
+
raise DNN_Error.new("The model is not compiled.")
|
107
|
+
end
|
105
108
|
@batch_size = batch_size
|
106
109
|
num_train_data = x.shape[0]
|
107
110
|
(1..epochs).each do |epoch|
|
@@ -168,7 +171,11 @@ module DNN
|
|
168
171
|
x_batch, y_batch = batch_proc.call(x_batch, y_batch) if batch_proc
|
169
172
|
out = forward(x_batch, false)
|
170
173
|
@batch_size.times do |j|
|
171
|
-
|
174
|
+
if @layers[-1].shape == [1]
|
175
|
+
correct += 1 if out[j, 0].round == y_batch[j, 0].round
|
176
|
+
else
|
177
|
+
correct += 1 if out[j, true].max_index == y_batch[j, true].max_index
|
178
|
+
end
|
172
179
|
end
|
173
180
|
end
|
174
181
|
correct.to_f / x.shape[0]
|
@@ -183,6 +190,9 @@ module DNN
|
|
183
190
|
end
|
184
191
|
|
185
192
|
def forward(x, training)
|
193
|
+
unless compiled?
|
194
|
+
raise DNN_Error.new("The model is not compiled.")
|
195
|
+
end
|
186
196
|
@training = training
|
187
197
|
@layers.each do |layer|
|
188
198
|
x = layer.forward(x)
|
data/lib/dnn/core/optimizers.rb
CHANGED
@@ -50,6 +50,27 @@ module DNN
|
|
50
50
|
super({momentum: @momentum})
|
51
51
|
end
|
52
52
|
end
|
53
|
+
|
54
|
+
|
55
|
+
class Nesterov < SGD
|
56
|
+
def self.load_hash(hash)
|
57
|
+
self.new(hash[:learning_rate], momentum: hash[:momentum])
|
58
|
+
end
|
59
|
+
|
60
|
+
def initialize(learning_rate = 0.01, momentum: 0.9)
|
61
|
+
super(learning_rate, momentum: momentum)
|
62
|
+
end
|
63
|
+
|
64
|
+
def update(layer)
|
65
|
+
@v[layer] ||= {}
|
66
|
+
layer.params.each_key do |key|
|
67
|
+
@v[layer][key] ||= 0
|
68
|
+
amount = layer.grads[key] * @learning_rate
|
69
|
+
@v[layer][key] = @v[layer][key] * @momentum - amount
|
70
|
+
layer.params[key] = (layer.params[key] + @momentum**2 * @v[layer][key]) - (1 + @momentum) * amount
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
53
74
|
|
54
75
|
|
55
76
|
class AdaGrad < Optimizer
|
data/lib/dnn/core/rnn_layers.rb
CHANGED
@@ -126,5 +126,165 @@ module DNN
|
|
126
126
|
end
|
127
127
|
end
|
128
128
|
|
129
|
+
|
130
|
+
class LSTM_Dense
|
131
|
+
include Xumo
|
132
|
+
|
133
|
+
def initialize(params, grads)
|
134
|
+
@params = params
|
135
|
+
@grads = grads
|
136
|
+
@tanh = Tanh.new
|
137
|
+
@g_tanh = Tanh.new
|
138
|
+
@forget_sigmoid = Sigmoid.new
|
139
|
+
@in_sigmoid = Sigmoid.new
|
140
|
+
@out_sigmoid = Sigmoid.new
|
141
|
+
end
|
142
|
+
|
143
|
+
def forward(x, h, cell)
|
144
|
+
@x = x
|
145
|
+
@h = h
|
146
|
+
@cell = cell
|
147
|
+
num_nodes = h.shape[1]
|
148
|
+
a = x.dot(@params[:weight]) + h.dot(@params[:weight2]) + @params[:bias]
|
149
|
+
|
150
|
+
@forget = @forget_sigmoid.forward(a[true, 0...num_nodes])
|
151
|
+
@g = @g_tanh.forward(a[(num_nodes * 2)...(num_nodes * 3)])
|
152
|
+
@in = @in_sigmoid.forward(a[true, num_nodes...(num_nodes * 2)])
|
153
|
+
@out = @out_sigmoid.forward(a[true, (num_nodes * 3)..-1])
|
154
|
+
|
155
|
+
@cell2 = @forget * cell + @g * @in
|
156
|
+
@tanh_cell2 = @tanh.forward(@cell2)
|
157
|
+
@h2 = @out * @tanh_cell2
|
158
|
+
[@h2, @cell2]
|
159
|
+
end
|
160
|
+
|
161
|
+
def backward(dh2, dcell2)
|
162
|
+
dh2_tmp = @tanh_cell2 * dh2
|
163
|
+
dcell2_tmp = @tanh.backward(@out * dh2) + dcell2
|
164
|
+
|
165
|
+
dout = @out_sigmoid.backward(dh2_tmp)
|
166
|
+
din = @in_sigmoid.backward(dcell2_tmp * @g)
|
167
|
+
dg = @g_tanh.backward(dcell2_tmp * @in)
|
168
|
+
dforget = @forget_sigmoid.backward(dcell2_tmp * @cell)
|
169
|
+
|
170
|
+
da = SFloat.hstack([dforget, dg, din, dout])
|
171
|
+
|
172
|
+
@grads[:weight] += @x.transpose.dot(da)
|
173
|
+
@grads[:weight2] += @h.transpose.dot(da)
|
174
|
+
@grads[:bias] += da.sum(0)
|
175
|
+
dx = da.dot(@params[:weight].transpose)
|
176
|
+
dh = da.dot(@params[:weight2].transpose)
|
177
|
+
dcell = dcell2_tmp * @forget
|
178
|
+
[dx, dh, dcell]
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
|
183
|
+
# In development
|
184
|
+
class LSTM < HasParamLayer
|
185
|
+
include Initializers
|
186
|
+
include Activations
|
187
|
+
|
188
|
+
attr_reader :num_nodes
|
189
|
+
attr_reader :stateful
|
190
|
+
attr_reader :weight_decay
|
191
|
+
|
192
|
+
def self.load_hash(hash)
|
193
|
+
self.new(hash[:num_nodes],
|
194
|
+
stateful: hash[:stateful],
|
195
|
+
weight_initializer: Util.load_hash(hash[:weight_initializer]),
|
196
|
+
bias_initializer: Util.load_hash(hash[:bias_initializer]),
|
197
|
+
weight_decay: hash[:weight_decay])
|
198
|
+
end
|
199
|
+
|
200
|
+
def initialize(num_nodes,
|
201
|
+
stateful: false,
|
202
|
+
weight_initializer: nil,
|
203
|
+
bias_initializer: nil,
|
204
|
+
weight_decay: 0)
|
205
|
+
super()
|
206
|
+
@num_nodes = num_nodes
|
207
|
+
@stateful = stateful
|
208
|
+
@weight_initializer = (weight_initializer || RandomNormal.new)
|
209
|
+
@bias_initializer = (bias_initializer || Zeros.new)
|
210
|
+
@weight_decay = weight_decay
|
211
|
+
@layers = []
|
212
|
+
@h = nil
|
213
|
+
@cell = nil
|
214
|
+
end
|
215
|
+
|
216
|
+
def forward(xs)
|
217
|
+
@xs_shape = xs.shape
|
218
|
+
hs = SFloat.zeros(xs.shape[0], *shape)
|
219
|
+
h = nil
|
220
|
+
cell = nil
|
221
|
+
if @stateful
|
222
|
+
h = @h if @h
|
223
|
+
cell = @cell if @cell
|
224
|
+
end
|
225
|
+
h ||= SFloat.zeros(xs.shape[0], @num_nodes)
|
226
|
+
cell ||= SFloat.zeros(xs.shape[0], @num_nodes)
|
227
|
+
xs.shape[1].times do |t|
|
228
|
+
x = xs[true, t, false]
|
229
|
+
h, cell = @layers[t].forward(x, h, cell)
|
230
|
+
hs[true, t, false] = h
|
231
|
+
end
|
232
|
+
@h = h
|
233
|
+
@cell = cell
|
234
|
+
hs
|
235
|
+
end
|
236
|
+
|
237
|
+
def backward(dh2s)
|
238
|
+
@grads[:weight] = SFloat.zeros(*@params[:weight].shape)
|
239
|
+
@grads[:weight2] = SFloat.zeros(*@params[:weight2].shape)
|
240
|
+
@grads[:bias] = SFloat.zeros(*@params[:bias].shape)
|
241
|
+
dxs = SFloat.zeros(@xs_shape)
|
242
|
+
dh = 0
|
243
|
+
dcell = 0
|
244
|
+
(0...dh2s.shape[1]).to_a.reverse.each do |t|
|
245
|
+
dh2 = dh2s[true, t, false]
|
246
|
+
dx, dh, dcell = @layers[t].backward(dh2 + dh, dcell)
|
247
|
+
dxs[true, t, false] = dx
|
248
|
+
end
|
249
|
+
dxs
|
250
|
+
end
|
251
|
+
|
252
|
+
def shape
|
253
|
+
[@time_length, @num_nodes]
|
254
|
+
end
|
255
|
+
|
256
|
+
def ridge
|
257
|
+
if @weight_decay > 0
|
258
|
+
0.5 * (@weight_decay * (@params[:weight]**2).sum + @weight_decay * (@params[:weight]**2).sum)
|
259
|
+
else
|
260
|
+
0
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
def to_hash
|
265
|
+
super({num_nodes: @num_nodes,
|
266
|
+
stateful: @stateful,
|
267
|
+
weight_initializer: @weight_initializer.to_hash,
|
268
|
+
bias_initializer: @bias_initializer.to_hash,
|
269
|
+
weight_decay: @weight_decay})
|
270
|
+
end
|
271
|
+
|
272
|
+
private
|
273
|
+
|
274
|
+
def init_params
|
275
|
+
@time_length = prev_layer.shape[0]
|
276
|
+
num_prev_nodes = prev_layer.shape[1]
|
277
|
+
@params[:weight] = SFloat.new(num_prev_nodes, @num_nodes * 4)
|
278
|
+
@params[:weight2] = SFloat.new(@num_nodes, @num_nodes * 4)
|
279
|
+
@params[:bias] = SFloat.new(@num_nodes * 4)
|
280
|
+
@weight_initializer.init_param(self, :weight)
|
281
|
+
@weight_initializer.init_param(self, :weight2)
|
282
|
+
@bias_initializer.init_param(self, :bias)
|
283
|
+
@time_length.times do |t|
|
284
|
+
@layers << LSTM_Dense.new(@params, @grads)
|
285
|
+
end
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
129
289
|
end
|
130
290
|
end
|
data/lib/dnn/version.rb
CHANGED