ruby-dnn 0.6.10 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/API-Reference.ja.md +79 -43
- data/lib/dnn.rb +10 -10
- data/lib/dnn/core/activations.rb +29 -14
- data/lib/dnn/core/cnn_layers.rb +24 -41
- data/lib/dnn/core/layers.rb +77 -27
- data/lib/dnn/core/model.rb +18 -3
- data/lib/dnn/core/rnn_layers.rb +164 -95
- data/lib/dnn/core/util.rb +0 -5
- data/lib/dnn/lib/cifar10.rb +1 -1
- data/lib/dnn/lib/image_io.rb +1 -1
- data/lib/dnn/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 16d5b6027014914e3e3356599dbf5ba9735e04b6a9824da1615a3e0b5c3e0a75
|
4
|
+
data.tar.gz: ad81eb7df3f442d0ab54b6df97f6dadfd085c8571335593c84eb6599fa4b3ea8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f8f53054e425bd8cba7a13ee99ca8c4a0ec356fd8750bca8f8ba84542b14d3f413ae385bf1aa710e289a991618cdb73d71f38ffe4f8cb61fa8d5662166360e9d
|
7
|
+
data.tar.gz: 92425a72d2cc0e9072d36f10187571eeb710869e6d7246f5a611e7f992b763aaafe541a853f1133f50c45cdda6cda2e52ee11f80b6a876f6afe1993332bb40a3
|
data/API-Reference.ja.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
ruby-dnnのAPIリファレンスです。このリファレンスでは、APIを利用するうえで必要となるクラスとメソッドしか記載していません。
|
3
3
|
そのため、プログラムの詳細が必要な場合は、ソースコードを参照してください。
|
4
4
|
|
5
|
-
最終更新バージョン:0.
|
5
|
+
最終更新バージョン:0.7.0
|
6
6
|
|
7
7
|
# module DNN
|
8
8
|
ruby-dnnの名前空間をなすモジュールです。
|
@@ -187,6 +187,35 @@ predictとは異なり、一つの入力データに対して、一つの出力
|
|
187
187
|
Numo::SFloat
|
188
188
|
推論結果を返します。
|
189
189
|
|
190
|
+
## def copy
|
191
|
+
現在のモデルをコピーした新たなモデルを生成します。
|
192
|
+
### arguments
|
193
|
+
なし。
|
194
|
+
### return
|
195
|
+
Model
|
196
|
+
コピーしたモデル。
|
197
|
+
|
198
|
+
## def get_layer(index)
|
199
|
+
indexのレイヤーを取得します。
|
200
|
+
### arguments
|
201
|
+
* Integer index
|
202
|
+
取得するレイヤーのインデックス。
|
203
|
+
### return
|
204
|
+
Layer
|
205
|
+
対象のレイヤーのインスタンス。
|
206
|
+
|
207
|
+
## def get_layer(layer_class, index)
|
208
|
+
layer_classで指定されたクラスのレイヤーをindexで取得します。
|
209
|
+
### arguments
|
210
|
+
* Layer layer_class
|
211
|
+
取得するレイヤーのクラス。
|
212
|
+
* Integer index
|
213
|
+
レイヤーのインデックス。例えば、layersが[InputLayer, Dense, Dense,SoftmaxWithLoss]のとき、
|
214
|
+
最初のDenseを取得したい場合、インデックスは0になります。
|
215
|
+
### return
|
216
|
+
Layer
|
217
|
+
対象のレイヤーのインスタンス。
|
218
|
+
|
190
219
|
|
191
220
|
# module Layers
|
192
221
|
レイヤーの名前空間をなすモジュールです。
|
@@ -274,7 +303,29 @@ Hash
|
|
274
303
|
入力層のdimentionまたはshapeを指定します。引数がIntegerだとdimentionとみなし、Arrayだとshapeとみなします。
|
275
304
|
|
276
305
|
|
277
|
-
# class
|
306
|
+
# class Connection < HasParamLayer
|
307
|
+
ニューロンを接続するすべてのレイヤーのスーパークラスです。
|
308
|
+
|
309
|
+
## 【Properties】
|
310
|
+
|
311
|
+
## attr_reader :weight_initializer
|
312
|
+
Initializer
|
313
|
+
重みの初期化に使用するイニシャライザーを取得します。
|
314
|
+
|
315
|
+
## attr_reader :bias_initializer
|
316
|
+
Initializer
|
317
|
+
バイアスの初期化に使用するイニシャライザーを取得します。
|
318
|
+
|
319
|
+
## attr_reader :l1_lambda
|
320
|
+
Float
|
321
|
+
重みのL1正則化の係数を取得します。
|
322
|
+
|
323
|
+
## attr_reader :l2_lambda
|
324
|
+
Float
|
325
|
+
重みのL2正則化の係数を取得します。
|
326
|
+
|
327
|
+
|
328
|
+
# class Dense < Connection
|
278
329
|
全結合レイヤーを扱うクラスです。
|
279
330
|
|
280
331
|
## 【Properties】
|
@@ -283,28 +334,26 @@ Hash
|
|
283
334
|
Integer
|
284
335
|
レイヤーのノード数を取得します。
|
285
336
|
|
286
|
-
## attr_reader :weight_decay
|
287
|
-
Float
|
288
|
-
重み減衰の係数を取得します。
|
289
|
-
|
290
337
|
## 【Instance methods】
|
291
338
|
|
292
|
-
## def initialize(num_nodes, weight_initializer: nil, bias_initializer: nil,
|
339
|
+
## def initialize(num_nodes, weight_initializer: nil, bias_initializer: nil, l1_lambda: 0, l2_lambda: 0)
|
293
340
|
コンストラクタ。
|
294
341
|
### arguments
|
295
342
|
* Integer num_nodes
|
296
343
|
レイヤーのノード数を設定します。
|
297
344
|
* Initializer weight_initializer: nil
|
298
|
-
|
345
|
+
重みの初期化に使用するイニシャライザーを設定します。
|
299
346
|
nilを指定すると、RandomNormalイニシャライザーが使用されます。
|
300
347
|
* Initializer bias_initializer: nil
|
301
348
|
バイアスの初期化に使用するイニシャライザーを設定します。
|
302
349
|
nilを指定すると、Zerosイニシャライザーが使用されます。
|
303
|
-
* Float
|
304
|
-
|
350
|
+
* Float l1_lambda: 0
|
351
|
+
重みのL1正則化の係数を設定します。
|
352
|
+
* Float l2_lambda: 0
|
353
|
+
重みのL2正則化の係数を設定します。
|
305
354
|
|
306
355
|
|
307
|
-
# class Conv2D <
|
356
|
+
# class Conv2D < Connection
|
308
357
|
畳み込みレイヤーを扱うクラスです。
|
309
358
|
|
310
359
|
## 【Properties】
|
@@ -323,13 +372,9 @@ Array
|
|
323
372
|
畳み込みを行う際のストライドの単位。
|
324
373
|
[Integer height, Integer width]の形式で取得します。
|
325
374
|
|
326
|
-
## attr_reader :weight_decay
|
327
|
-
Float
|
328
|
-
重み減衰を行うL2正則化項の強さを取得します。
|
329
|
-
|
330
375
|
## 【Instance methods】
|
331
376
|
|
332
|
-
## def initialize(num_filters, filter_size, weight_initializer: nil, bias_initializer: nil, strides: 1, padding false,
|
377
|
+
## def initialize(num_filters, filter_size, weight_initializer: nil, bias_initializer: nil, strides: 1, padding false, l1_lambda: 0, l2_lambda: 0)
|
333
378
|
コンストラクタ。
|
334
379
|
### arguments
|
335
380
|
* Integer num_filters
|
@@ -348,8 +393,10 @@ Arrayで指定する場合、[Integer height, Integer width]の形式で指定
|
|
348
393
|
* bool padding: true
|
349
394
|
イメージに対してゼロパディングを行うか否かを設定します。trueを設定すると、出力されるイメージのサイズが入力されたイメージと同じになるように
|
350
395
|
ゼロパディングを行います。
|
351
|
-
* Float
|
352
|
-
|
396
|
+
* Float l1_lambda: 0
|
397
|
+
重みのL1正則化の係数を設定します。
|
398
|
+
* Float l2_lambda: 0
|
399
|
+
重みのL2正則化の係数を設定します。
|
353
400
|
|
354
401
|
|
355
402
|
# class Pool2D < Layer
|
@@ -412,16 +459,9 @@ Array
|
|
412
459
|
Arrayで指定する場合、[Integer height, Integer width]の形式で指定します。
|
413
460
|
|
414
461
|
|
415
|
-
# class RNN <
|
462
|
+
# class RNN < Connection
|
416
463
|
全てのリカレントニューラルネットワークのレイヤーのスーパークラスです。
|
417
464
|
|
418
|
-
## 【Properties】
|
419
|
-
|
420
|
-
## attr_accessor :h
|
421
|
-
Numo::SFloat
|
422
|
-
中間層の現在のステートを取得します。
|
423
|
-
nilを設定することで、中間層のステートをリセットすることができます。
|
424
|
-
|
425
465
|
## attr_reader :num_nodes
|
426
466
|
Integer
|
427
467
|
レイヤーのノード数を取得します。
|
@@ -430,13 +470,9 @@ Integer
|
|
430
470
|
bool
|
431
471
|
レイヤーがステートフルであるか否かを返します。
|
432
472
|
|
433
|
-
## attr_reader :weight_decay
|
434
|
-
Float
|
435
|
-
重み減衰の係数を取得します。
|
436
|
-
|
437
473
|
## 【Instance methods】
|
438
474
|
|
439
|
-
## def initialize(num_nodes, stateful: false, return_sequences: true, weight_initializer: nil, bias_initializer: nil,
|
475
|
+
## def initialize(num_nodes, stateful: false, return_sequences: true, weight_initializer: nil, bias_initializer: nil, l1_lamda: 0, l2_lambda: 0)
|
440
476
|
コンストラクタ。
|
441
477
|
### arguments
|
442
478
|
* Integer num_nodes
|
@@ -452,8 +488,13 @@ nilを指定すると、RandomNormalイニシャライザーが使用されま
|
|
452
488
|
* Initializer bias_initializer: nil
|
453
489
|
バイアスの初期化に使用するイニシャライザーを設定します。
|
454
490
|
nilを指定すると、Zerosイニシャライザーが使用されます。
|
455
|
-
* Float
|
456
|
-
|
491
|
+
* Float l1_lambda: 0
|
492
|
+
重みのL1正則化の係数を設定します。
|
493
|
+
* Float l2_lambda: 0
|
494
|
+
重みのL2正則化の係数を設定します。
|
495
|
+
|
496
|
+
## def reset_state
|
497
|
+
中間層のステートをリセットします。
|
457
498
|
|
458
499
|
|
459
500
|
# class SimpleRNN < RNN
|
@@ -461,7 +502,7 @@ nilを指定すると、Zerosイニシャライザーが使用されます。
|
|
461
502
|
|
462
503
|
## 【Instance methods】
|
463
504
|
|
464
|
-
## def initialize(num_nodes, stateful: false, return_sequences: true, activation: nil, weight_initializer: nil, bias_initializer: nil,
|
505
|
+
## def initialize(num_nodes, stateful: false, return_sequences: true, activation: nil, weight_initializer: nil, bias_initializer: nil, l1_lamda: 0, l2_lambda: 0)
|
465
506
|
コンストラクタ。
|
466
507
|
### arguments
|
467
508
|
* Integer num_nodes
|
@@ -480,20 +521,15 @@ nilを指定すると、RandomNormalイニシャライザーが使用されま
|
|
480
521
|
* Initializer bias_initializer: nil
|
481
522
|
バイアスの初期化に使用するイニシャライザーを設定します。
|
482
523
|
nilを指定すると、Zerosイニシャライザーが使用されます。
|
483
|
-
* Float
|
484
|
-
|
524
|
+
* Float l1_lambda: 0
|
525
|
+
重みのL1正則化の係数を設定します。
|
526
|
+
* Float l2_lambda: 0
|
527
|
+
重みのL2正則化の係数を設定します。
|
485
528
|
|
486
529
|
|
487
530
|
# class LSTM < RNN
|
488
531
|
LSTMレイヤーを扱うクラスです。
|
489
532
|
|
490
|
-
## 【Properties】
|
491
|
-
|
492
|
-
## attr_accessor :cell
|
493
|
-
Numo::SFloat
|
494
|
-
中間層の現在のセルステートを取得します。
|
495
|
-
nilを設定することで、中間層のセルステートをリセットすることができます。
|
496
|
-
|
497
533
|
|
498
534
|
# class GRU < RNN
|
499
535
|
GRUレイヤーを扱うクラスです。
|
data/lib/dnn.rb
CHANGED
@@ -9,13 +9,13 @@ Xumo::SFloat.srand(rand(2**64))
|
|
9
9
|
|
10
10
|
module DNN; end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
12
|
+
require_relative "dnn/version"
|
13
|
+
require_relative "dnn/core/error"
|
14
|
+
require_relative "dnn/core/model"
|
15
|
+
require_relative "dnn/core/initializers"
|
16
|
+
require_relative "dnn/core/layers"
|
17
|
+
require_relative "dnn/core/activations"
|
18
|
+
require_relative "dnn/core/cnn_layers"
|
19
|
+
require_relative "dnn/core/rnn_layers"
|
20
|
+
require_relative "dnn/core/optimizers"
|
21
|
+
require_relative "dnn/core/util"
|
data/lib/dnn/core/activations.rb
CHANGED
@@ -2,8 +2,10 @@ module DNN
|
|
2
2
|
module Activations
|
3
3
|
|
4
4
|
class Sigmoid < Layers::Layer
|
5
|
+
NMath = Xumo::NMath
|
6
|
+
|
5
7
|
def forward(x)
|
6
|
-
@out = 1 / (1 +
|
8
|
+
@out = 1 / (1 + NMath.exp(-x))
|
7
9
|
end
|
8
10
|
|
9
11
|
def backward(dout)
|
@@ -13,8 +15,10 @@ module DNN
|
|
13
15
|
|
14
16
|
|
15
17
|
class Tanh < Layers::Layer
|
18
|
+
NMath = Xumo::NMath
|
19
|
+
|
16
20
|
def forward(x)
|
17
|
-
@out =
|
21
|
+
@out = NMath.tanh(x)
|
18
22
|
end
|
19
23
|
|
20
24
|
def backward(dout)
|
@@ -36,25 +40,29 @@ module DNN
|
|
36
40
|
|
37
41
|
|
38
42
|
class Softplus < Layers::Layer
|
43
|
+
NMath = Xumo::NMath
|
44
|
+
|
39
45
|
def forward(x)
|
40
46
|
@x = x
|
41
|
-
|
47
|
+
NMath.log(1 + NMath.exp(x))
|
42
48
|
end
|
43
49
|
|
44
50
|
def backward(dout)
|
45
|
-
dout * (1 / (1 +
|
51
|
+
dout * (1 / (1 + NMath.exp(-@x)))
|
46
52
|
end
|
47
53
|
end
|
48
54
|
|
49
55
|
|
50
56
|
class Swish < Layers::Layer
|
57
|
+
NMath = Xumo::NMath
|
58
|
+
|
51
59
|
def forward(x)
|
52
60
|
@x = x
|
53
|
-
@out = x * (1 / (1 +
|
61
|
+
@out = x * (1 / (1 + NMath.exp(-x)))
|
54
62
|
end
|
55
63
|
|
56
64
|
def backward(dout)
|
57
|
-
dout * (@out + (1 / (1 +
|
65
|
+
dout * (@out + (1 / (1 + NMath.exp(-@x))) * (1 - @out))
|
58
66
|
end
|
59
67
|
end
|
60
68
|
|
@@ -105,6 +113,8 @@ module DNN
|
|
105
113
|
|
106
114
|
|
107
115
|
class ELU < Layers::Layer
|
116
|
+
NMath = Xumo::NMath
|
117
|
+
|
108
118
|
attr_reader :alpha
|
109
119
|
|
110
120
|
def self.load_hash(hash)
|
@@ -122,7 +132,7 @@ module DNN
|
|
122
132
|
x1 *= x
|
123
133
|
x2 = Xumo::SFloat.zeros(x.shape)
|
124
134
|
x2[x < 0] = 1
|
125
|
-
x2 *= @alpha *
|
135
|
+
x2 *= @alpha * NMath.exp(x) - @alpha
|
126
136
|
x1 + x2
|
127
137
|
end
|
128
138
|
|
@@ -131,7 +141,7 @@ module DNN
|
|
131
141
|
dx[@x < 0] = 0
|
132
142
|
dx2 = Xumo::SFloat.zeros(@x.shape)
|
133
143
|
dx2[@x < 0] = 1
|
134
|
-
dx2 *= @alpha *
|
144
|
+
dx2 *= @alpha * NMath.exp(@x)
|
135
145
|
dout * (dx + dx2)
|
136
146
|
end
|
137
147
|
|
@@ -152,7 +162,7 @@ module DNN
|
|
152
162
|
|
153
163
|
def loss(y)
|
154
164
|
batch_size = y.shape[0]
|
155
|
-
0.5 * ((@out - y)**2).sum / batch_size + ridge
|
165
|
+
0.5 * ((@out - y)**2).sum / batch_size + lasso + ridge
|
156
166
|
end
|
157
167
|
end
|
158
168
|
|
@@ -171,7 +181,7 @@ module DNN
|
|
171
181
|
|
172
182
|
def loss(y)
|
173
183
|
batch_size = y.shape[0]
|
174
|
-
(@out - y).abs.sum / batch_size + ridge
|
184
|
+
(@out - y).abs.sum / batch_size + lasso + ridge
|
175
185
|
end
|
176
186
|
end
|
177
187
|
|
@@ -183,7 +193,8 @@ module DNN
|
|
183
193
|
|
184
194
|
def loss(y)
|
185
195
|
loss = loss_l1(y)
|
186
|
-
|
196
|
+
loss = loss > 1 ? loss : loss_l2(y)
|
197
|
+
@loss = loss + lasso + ridge
|
187
198
|
end
|
188
199
|
|
189
200
|
def backward(y)
|
@@ -210,8 +221,10 @@ module DNN
|
|
210
221
|
|
211
222
|
|
212
223
|
class SoftmaxWithLoss < Layers::OutputLayer
|
224
|
+
NMath = Xumo::NMath
|
225
|
+
|
213
226
|
def forward(x)
|
214
|
-
@out =
|
227
|
+
@out = NMath.exp(x) / NMath.exp(x).sum(1).reshape(x.shape[0], 1)
|
215
228
|
end
|
216
229
|
|
217
230
|
def backward(y)
|
@@ -220,12 +233,14 @@ module DNN
|
|
220
233
|
|
221
234
|
def loss(y)
|
222
235
|
batch_size = y.shape[0]
|
223
|
-
-(y *
|
236
|
+
-(y * NMath.log(@out + 1e-7)).sum / batch_size + lasso + ridge
|
224
237
|
end
|
225
238
|
end
|
226
239
|
|
227
240
|
|
228
241
|
class SigmoidWithLoss < Layers::OutputLayer
|
242
|
+
NMath = Xumo::NMath
|
243
|
+
|
229
244
|
def initialize
|
230
245
|
@sigmoid = Sigmoid.new
|
231
246
|
end
|
@@ -240,7 +255,7 @@ module DNN
|
|
240
255
|
|
241
256
|
def loss(y)
|
242
257
|
batch_size = y.shape[0]
|
243
|
-
-(y *
|
258
|
+
-(y * NMath.log(@out + 1e-7) + (1 - y) * NMath.log(1 - @out + 1e-7)).sum / batch_size + lasso + ridge
|
244
259
|
end
|
245
260
|
end
|
246
261
|
|
data/lib/dnn/core/cnn_layers.rb
CHANGED
@@ -60,29 +60,26 @@ module DNN
|
|
60
60
|
end
|
61
61
|
|
62
62
|
|
63
|
-
class Conv2D <
|
64
|
-
include Initializers
|
63
|
+
class Conv2D < Connection
|
65
64
|
include Conv2DModule
|
66
65
|
|
67
66
|
attr_reader :num_filters
|
68
67
|
attr_reader :filter_size
|
69
68
|
attr_reader :strides
|
70
|
-
attr_reader :weight_decay
|
71
69
|
|
72
70
|
def initialize(num_filters, filter_size,
|
73
71
|
weight_initializer: nil,
|
74
72
|
bias_initializer: nil,
|
75
73
|
strides: 1,
|
76
74
|
padding: false,
|
77
|
-
|
78
|
-
|
75
|
+
l1_lambda: 0,
|
76
|
+
l2_lambda: 0)
|
77
|
+
super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
|
78
|
+
l1_lambda: l1_lambda, l2_lambda: l1_lambda)
|
79
79
|
@num_filters = num_filters
|
80
80
|
@filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size
|
81
|
-
@weight_initializer = (weight_initializer || RandomNormal.new)
|
82
|
-
@bias_initializer = (bias_initializer || Zeros.new)
|
83
81
|
@strides = strides.is_a?(Integer) ? [strides, strides] : strides
|
84
82
|
@padding = padding
|
85
|
-
@weight_decay = weight_decay
|
86
83
|
end
|
87
84
|
|
88
85
|
def self.load_hash(hash)
|
@@ -91,7 +88,8 @@ module DNN
|
|
91
88
|
bias_initializer: Util.load_hash(hash[:bias_initializer]),
|
92
89
|
strides: hash[:strides],
|
93
90
|
padding: hash[:padding],
|
94
|
-
|
91
|
+
l1_lambda: hash[:l1_lambda],
|
92
|
+
l2_lambda: hash[:l2_lambda])
|
95
93
|
end
|
96
94
|
|
97
95
|
def build(model)
|
@@ -116,8 +114,9 @@ module DNN
|
|
116
114
|
def backward(dout)
|
117
115
|
dout = dout.reshape(dout.shape[0..2].reduce(:*), dout.shape[3])
|
118
116
|
@grads[:weight] = @col.transpose.dot(dout)
|
119
|
-
if @
|
120
|
-
|
117
|
+
if @l1_lambda > 0
|
118
|
+
@grads[:weight] += dlasso
|
119
|
+
elsif @l2_lambda > 0
|
121
120
|
@grads[:weight] += dridge
|
122
121
|
end
|
123
122
|
@grads[:bias] = dout.sum(0)
|
@@ -130,22 +129,11 @@ module DNN
|
|
130
129
|
[*@out_size, @num_filters]
|
131
130
|
end
|
132
131
|
|
133
|
-
def ridge
|
134
|
-
if @weight_decay > 0
|
135
|
-
0.5 * @weight_decay * (@params[:weight]**2).sum
|
136
|
-
else
|
137
|
-
0
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
141
132
|
def to_hash
|
142
133
|
super({num_filters: @num_filters,
|
143
134
|
filter_size: @filter_size,
|
144
|
-
weight_initializer: @weight_initializer.to_hash,
|
145
|
-
bias_initializer: @bias_initializer.to_hash,
|
146
135
|
strides: @strides,
|
147
|
-
padding: @padding
|
148
|
-
weight_decay: @weight_decay})
|
136
|
+
padding: @padding})
|
149
137
|
end
|
150
138
|
|
151
139
|
private
|
@@ -154,8 +142,7 @@ module DNN
|
|
154
142
|
num_prev_filter = prev_layer.shape[2]
|
155
143
|
@params[:weight] = Xumo::SFloat.new(num_prev_filter * @filter_size.reduce(:*), @num_filters)
|
156
144
|
@params[:bias] = Xumo::SFloat.new(@num_filters)
|
157
|
-
|
158
|
-
@bias_initializer.init_param(self, :bias)
|
145
|
+
super()
|
159
146
|
end
|
160
147
|
end
|
161
148
|
|
@@ -193,18 +180,6 @@ module DNN
|
|
193
180
|
end
|
194
181
|
end
|
195
182
|
|
196
|
-
def forward(x)
|
197
|
-
x = padding(x, @pad) if @padding
|
198
|
-
@x_shape = x.shape
|
199
|
-
col = im2col(x, *@out_size, *@pool_size, @strides)
|
200
|
-
col.reshape(x.shape[0] * @out_size.reduce(:*) * x.shape[3], @pool_size.reduce(:*))
|
201
|
-
end
|
202
|
-
|
203
|
-
def backward(dcol)
|
204
|
-
dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
|
205
|
-
@padding ? back_padding(dx, @pad) : dx
|
206
|
-
end
|
207
|
-
|
208
183
|
def shape
|
209
184
|
[*@out_size, @num_channel]
|
210
185
|
end
|
@@ -224,7 +199,10 @@ module DNN
|
|
224
199
|
end
|
225
200
|
|
226
201
|
def forward(x)
|
227
|
-
|
202
|
+
x = padding(x, @pad) if @padding
|
203
|
+
@x_shape = x.shape
|
204
|
+
col = im2col(x, *@out_size, *@pool_size, @strides)
|
205
|
+
col = col.reshape(x.shape[0] * @out_size.reduce(:*) * x.shape[3], @pool_size.reduce(:*))
|
228
206
|
@max_index = col.max_index(1)
|
229
207
|
col.max(1).reshape(x.shape[0], *@out_size, x.shape[3])
|
230
208
|
end
|
@@ -233,7 +211,8 @@ module DNN
|
|
233
211
|
dmax = Xumo::SFloat.zeros(dout.size * @pool_size.reduce(:*))
|
234
212
|
dmax[@max_index] = dout.flatten
|
235
213
|
dcol = dmax.reshape(dout.shape[0..2].reduce(:*), dout.shape[3] * @pool_size.reduce(:*))
|
236
|
-
|
214
|
+
dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
|
215
|
+
@padding ? back_padding(dx, @pad) : dx
|
237
216
|
end
|
238
217
|
end
|
239
218
|
|
@@ -244,7 +223,10 @@ module DNN
|
|
244
223
|
end
|
245
224
|
|
246
225
|
def forward(x)
|
247
|
-
|
226
|
+
x = padding(x, @pad) if @padding
|
227
|
+
@x_shape = x.shape
|
228
|
+
col = im2col(x, *@out_size, *@pool_size, @strides)
|
229
|
+
col = col.reshape(x.shape[0] * @out_size.reduce(:*) * x.shape[3], @pool_size.reduce(:*))
|
248
230
|
col.mean(1).reshape(x.shape[0], *@out_size, x.shape[3])
|
249
231
|
end
|
250
232
|
|
@@ -256,7 +238,8 @@ module DNN
|
|
256
238
|
davg[true, i] = dout.flatten
|
257
239
|
end
|
258
240
|
dcol = davg.reshape(dout.shape[0..2].reduce(:*), dout.shape[3] * @pool_size.reduce(:*))
|
259
|
-
|
241
|
+
dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides)
|
242
|
+
@padding ? back_padding(dx, @pad) : dx
|
260
243
|
end
|
261
244
|
end
|
262
245
|
|
data/lib/dnn/core/layers.rb
CHANGED
@@ -100,30 +100,86 @@ module DNN
|
|
100
100
|
super({shape: @shape})
|
101
101
|
end
|
102
102
|
end
|
103
|
-
|
104
|
-
|
105
|
-
class
|
103
|
+
|
104
|
+
|
105
|
+
class Connection < HasParamLayer
|
106
106
|
include Initializers
|
107
107
|
|
108
|
+
attr_reader :l1_lambda
|
109
|
+
attr_reader :l2_lambda
|
110
|
+
|
111
|
+
def initialize(weight_initializer: nil,
|
112
|
+
bias_initializer: nil,
|
113
|
+
l1_lambda: 0,
|
114
|
+
l2_lambda: 0)
|
115
|
+
super()
|
116
|
+
@weight_initializer = (weight_initializer || RandomNormal.new)
|
117
|
+
@bias_initializer = (bias_initializer || Zeros.new)
|
118
|
+
@l1_lambda = l1_lambda
|
119
|
+
@l2_lambda = l2_lambda
|
120
|
+
end
|
121
|
+
|
122
|
+
def lasso
|
123
|
+
if @l1_lambda > 0
|
124
|
+
@l1_lambda * @params[:weight].abs.sum
|
125
|
+
else
|
126
|
+
0
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def ridge
|
131
|
+
if @l2_lambda > 0
|
132
|
+
0.5 * @l2_lambda * (@params[:weight]**2).sum
|
133
|
+
else
|
134
|
+
0
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def dlasso
|
139
|
+
dlasso = Xumo::SFloat.ones(*@params[:weight].shape)
|
140
|
+
dlasso[@params[:weight] < 0] = -1
|
141
|
+
@l1_lambda * dlasso
|
142
|
+
end
|
143
|
+
|
144
|
+
def dridge
|
145
|
+
@l2_lambda * @params[:weight]
|
146
|
+
end
|
147
|
+
|
148
|
+
def to_hash(merge_hash)
|
149
|
+
super({weight_initializer: @weight_initializer.to_hash,
|
150
|
+
bias_initializer: @bias_initializer.to_hash,
|
151
|
+
l1_lambda: @l1_lambda,
|
152
|
+
l2_lambda: @l2_lambda}.merge(merge_hash))
|
153
|
+
end
|
154
|
+
|
155
|
+
private
|
156
|
+
|
157
|
+
def init_params
|
158
|
+
@weight_initializer.init_param(self, :weight)
|
159
|
+
@bias_initializer.init_param(self, :bias)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
class Dense < Connection
|
108
165
|
attr_reader :num_nodes
|
109
|
-
attr_reader :weight_decay
|
110
166
|
|
111
167
|
def self.load_hash(hash)
|
112
168
|
self.new(hash[:num_nodes],
|
113
169
|
weight_initializer: Util.load_hash(hash[:weight_initializer]),
|
114
170
|
bias_initializer: Util.load_hash(hash[:bias_initializer]),
|
115
|
-
|
171
|
+
l1_lambda: hash[:l1_lambda],
|
172
|
+
l2_lambda: hash[:l2_lambda])
|
116
173
|
end
|
117
174
|
|
118
175
|
def initialize(num_nodes,
|
119
176
|
weight_initializer: nil,
|
120
177
|
bias_initializer: nil,
|
121
|
-
|
122
|
-
|
178
|
+
l1_lambda: 0,
|
179
|
+
l2_lambda: 0)
|
180
|
+
super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
|
181
|
+
l1_lambda: l1_lambda, l2_lambda: l2_lambda)
|
123
182
|
@num_nodes = num_nodes
|
124
|
-
@weight_initializer = (weight_initializer || RandomNormal.new)
|
125
|
-
@bias_initializer = (bias_initializer || Zeros.new)
|
126
|
-
@weight_decay = weight_decay
|
127
183
|
end
|
128
184
|
|
129
185
|
def forward(x)
|
@@ -133,8 +189,9 @@ module DNN
|
|
133
189
|
|
134
190
|
def backward(dout)
|
135
191
|
@grads[:weight] = @x.transpose.dot(dout)
|
136
|
-
if @
|
137
|
-
|
192
|
+
if @l1_lambda > 0
|
193
|
+
@grads[:weight] += dlasso
|
194
|
+
elsif @l2_lambda > 0
|
138
195
|
@grads[:weight] += dridge
|
139
196
|
end
|
140
197
|
@grads[:bias] = dout.sum(0)
|
@@ -145,19 +202,8 @@ module DNN
|
|
145
202
|
[@num_nodes]
|
146
203
|
end
|
147
204
|
|
148
|
-
def ridge
|
149
|
-
if @weight_decay > 0
|
150
|
-
0.5 * @weight_decay * (@params[:weight]**2).sum
|
151
|
-
else
|
152
|
-
0
|
153
|
-
end
|
154
|
-
end
|
155
|
-
|
156
205
|
def to_hash
|
157
|
-
super({num_nodes: @num_nodes
|
158
|
-
weight_initializer: @weight_initializer.to_hash,
|
159
|
-
bias_initializer: @bias_initializer.to_hash,
|
160
|
-
weight_decay: @weight_decay})
|
206
|
+
super({num_nodes: @num_nodes})
|
161
207
|
end
|
162
208
|
|
163
209
|
private
|
@@ -166,8 +212,7 @@ module DNN
|
|
166
212
|
num_prev_nodes = prev_layer.shape[0]
|
167
213
|
@params[:weight] = Xumo::SFloat.new(num_prev_nodes, @num_nodes)
|
168
214
|
@params[:bias] = Xumo::SFloat.new(@num_nodes)
|
169
|
-
|
170
|
-
@bias_initializer.init_param(self, :bias)
|
215
|
+
super()
|
171
216
|
end
|
172
217
|
end
|
173
218
|
|
@@ -218,9 +263,14 @@ module DNN
|
|
218
263
|
|
219
264
|
class OutputLayer < Layer
|
220
265
|
private
|
266
|
+
|
267
|
+
def lasso
|
268
|
+
@model.layers.select { |layer| layer.is_a?(Connection) }
|
269
|
+
.reduce(0) { |sum, layer| sum + layer.lasso }
|
270
|
+
end
|
221
271
|
|
222
272
|
def ridge
|
223
|
-
@model.layers.select { |layer| layer.
|
273
|
+
@model.layers.select { |layer| layer.is_a?(Connection) }
|
224
274
|
.reduce(0) { |sum, layer| sum + layer.ridge }
|
225
275
|
end
|
226
276
|
end
|
data/lib/dnn/core/model.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require "json"
|
2
|
+
require "base64"
|
2
3
|
|
3
4
|
module DNN
|
4
5
|
# This class deals with the model of the network.
|
@@ -32,8 +33,9 @@ module DNN
|
|
32
33
|
@layers.each do |layer|
|
33
34
|
next unless layer.is_a?(HasParamLayer)
|
34
35
|
hash_params = has_param_layers_params[has_param_layers_index]
|
35
|
-
hash_params.each do |key,
|
36
|
-
|
36
|
+
hash_params.each do |key, (shape, base64_param)|
|
37
|
+
bin = Base64.decode64(base64_param)
|
38
|
+
layer.params[key] = Xumo::SFloat.from_binary(bin).reshape(*shape)
|
37
39
|
end
|
38
40
|
has_param_layers_index += 1
|
39
41
|
end
|
@@ -59,7 +61,10 @@ module DNN
|
|
59
61
|
def params_to_json
|
60
62
|
has_param_layers = @layers.select { |layer| layer.is_a?(HasParamLayer) }
|
61
63
|
has_param_layers_params = has_param_layers.map do |layer|
|
62
|
-
layer.params.map { |key, param|
|
64
|
+
layer.params.map { |key, param|
|
65
|
+
base64_param = Base64.encode64(param.to_binary)
|
66
|
+
[key, [param.shape, base64_param]]
|
67
|
+
}.to_h
|
63
68
|
end
|
64
69
|
JSON.dump(has_param_layers_params)
|
65
70
|
end
|
@@ -190,6 +195,16 @@ module DNN
|
|
190
195
|
def copy
|
191
196
|
Marshal.load(Marshal.dump(self))
|
192
197
|
end
|
198
|
+
|
199
|
+
def get_layer(*args)
|
200
|
+
if args.length == 1
|
201
|
+
index = args[0]
|
202
|
+
@layers[index]
|
203
|
+
else
|
204
|
+
layer_class, index = args
|
205
|
+
@layers.select { |layer| layer.is_a?(layer_class) }[index]
|
206
|
+
end
|
207
|
+
end
|
193
208
|
|
194
209
|
def forward(x, training)
|
195
210
|
@training = training
|
data/lib/dnn/core/rnn_layers.rb
CHANGED
@@ -2,28 +2,25 @@ module DNN
|
|
2
2
|
module Layers
|
3
3
|
|
4
4
|
# Super class of all RNN classes.
|
5
|
-
class RNN <
|
6
|
-
include Initializers
|
5
|
+
class RNN < Connection
|
7
6
|
include Activations
|
8
7
|
|
9
8
|
attr_accessor :h
|
10
9
|
attr_reader :num_nodes
|
11
10
|
attr_reader :stateful
|
12
|
-
attr_reader :weight_decay
|
13
11
|
|
14
12
|
def initialize(num_nodes,
|
15
13
|
stateful: false,
|
16
14
|
return_sequences: true,
|
17
15
|
weight_initializer: nil,
|
18
16
|
bias_initializer: nil,
|
19
|
-
|
20
|
-
|
17
|
+
l1_lambda: 0,
|
18
|
+
l2_lambda: 0)
|
19
|
+
super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
|
20
|
+
l1_lambda: l1_lambda, l2_lambda: l2_lambda)
|
21
21
|
@num_nodes = num_nodes
|
22
22
|
@stateful = stateful
|
23
23
|
@return_sequences = return_sequences
|
24
|
-
@weight_initializer = (weight_initializer || RandomNormal.new)
|
25
|
-
@bias_initializer = (bias_initializer || Zeros.new)
|
26
|
-
@weight_decay = weight_decay
|
27
24
|
@layers = []
|
28
25
|
@h = nil
|
29
26
|
end
|
@@ -62,30 +59,61 @@ module DNN
|
|
62
59
|
|
63
60
|
def to_hash(merge_hash = nil)
|
64
61
|
hash = {
|
65
|
-
class: self.class.name,
|
66
62
|
num_nodes: @num_nodes,
|
67
63
|
stateful: @stateful,
|
68
64
|
return_sequences: @return_sequences,
|
69
|
-
|
70
|
-
bias_initializer: @bias_initializer.to_hash,
|
71
|
-
weight_decay: @weight_decay,
|
65
|
+
h: @h.to_a
|
72
66
|
}
|
73
67
|
hash.merge!(merge_hash) if merge_hash
|
74
|
-
hash
|
68
|
+
super(hash)
|
75
69
|
end
|
76
70
|
|
77
71
|
def shape
|
78
72
|
@return_sequences ? [@time_length, @num_nodes] : [@num_nodes]
|
79
73
|
end
|
80
74
|
|
75
|
+
def reset_state
|
76
|
+
@h = @h.fill(0) if @h
|
77
|
+
end
|
78
|
+
|
79
|
+
def lasso
|
80
|
+
if @l1_lambda > 0
|
81
|
+
@l1_lambda * (@params[:weight].abs.sum + @params[:weight2].abs.sum)
|
82
|
+
else
|
83
|
+
0
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
81
87
|
def ridge
|
82
|
-
if @
|
83
|
-
0.5 * (@
|
88
|
+
if @l2_lambda > 0
|
89
|
+
0.5 * (@l2_lambda * ((@params[:weight]**2).sum + (@params[:weight2]**2).sum))
|
84
90
|
else
|
85
91
|
0
|
86
92
|
end
|
87
93
|
end
|
88
94
|
|
95
|
+
def dlasso
|
96
|
+
dlasso = Xumo::SFloat.ones(*@params[:weight].shape)
|
97
|
+
dlasso[@params[:weight] < 0] = -1
|
98
|
+
@l1_lambda * dlasso
|
99
|
+
end
|
100
|
+
|
101
|
+
def dridge
|
102
|
+
@l2_lambda * @params[:weight]
|
103
|
+
end
|
104
|
+
|
105
|
+
def dlasso2
|
106
|
+
dlasso = Xumo::SFloat.ones(*@params[:weight2].shape)
|
107
|
+
dlasso[@params[:weight2] < 0] = -1
|
108
|
+
@l1_lambda * dlasso
|
109
|
+
end
|
110
|
+
|
111
|
+
def dridge2
|
112
|
+
@l2_lambda * @params[:weight2]
|
113
|
+
end
|
114
|
+
|
115
|
+
private
|
116
|
+
|
89
117
|
def init_params
|
90
118
|
@time_length = prev_layer.shape[0]
|
91
119
|
end
|
@@ -93,26 +121,32 @@ module DNN
|
|
93
121
|
|
94
122
|
|
95
123
|
class SimpleRNN_Dense
|
96
|
-
def initialize(
|
97
|
-
@
|
98
|
-
@
|
99
|
-
@activation = activation
|
124
|
+
def initialize(rnn)
|
125
|
+
@rnn = rnn
|
126
|
+
@activation = rnn.activation.clone
|
100
127
|
end
|
101
128
|
|
102
129
|
def forward(x, h)
|
103
130
|
@x = x
|
104
131
|
@h = h
|
105
|
-
h2 = x.dot(@params[:weight]) + h.dot(@params[:weight2]) + @params[:bias]
|
132
|
+
h2 = x.dot(@rnn.params[:weight]) + h.dot(@rnn.params[:weight2]) + @rnn.params[:bias]
|
106
133
|
@activation.forward(h2)
|
107
134
|
end
|
108
135
|
|
109
136
|
def backward(dh2)
|
110
137
|
dh2 = @activation.backward(dh2)
|
111
|
-
@grads[:weight] += @x.transpose.dot(dh2)
|
112
|
-
@grads[:weight2] += @h.transpose.dot(dh2)
|
113
|
-
@
|
114
|
-
|
115
|
-
|
138
|
+
@rnn.grads[:weight] += @x.transpose.dot(dh2)
|
139
|
+
@rnn.grads[:weight2] += @h.transpose.dot(dh2)
|
140
|
+
if @rnn.l1_lambda > 0
|
141
|
+
@rnn.grads[:weight] += dlasso
|
142
|
+
@rnn.grads[:weight2] += dlasso2
|
143
|
+
elsif @rnn.l2_lambda > 0
|
144
|
+
@rnn.grads[:weight] += dridge
|
145
|
+
@grads[:weight2] += dridge2
|
146
|
+
end
|
147
|
+
@rnn.grads[:bias] += dh2.sum(0)
|
148
|
+
dx = dh2.dot(@rnn.params[:weight].transpose)
|
149
|
+
dh = dh2.dot(@rnn.params[:weight2].transpose)
|
116
150
|
[dx, dh]
|
117
151
|
end
|
118
152
|
end
|
@@ -120,13 +154,16 @@ module DNN
|
|
120
154
|
|
121
155
|
class SimpleRNN < RNN
|
122
156
|
def self.load_hash(hash)
|
123
|
-
self.new(hash[:num_nodes],
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
157
|
+
simple_rnn = self.new(hash[:num_nodes],
|
158
|
+
stateful: hash[:stateful],
|
159
|
+
return_sequences: hash[:return_sequences],
|
160
|
+
activation: Util.load_hash(hash[:activation]),
|
161
|
+
weight_initializer: Util.load_hash(hash[:weight_initializer]),
|
162
|
+
bias_initializer: Util.load_hash(hash[:bias_initializer]),
|
163
|
+
l1_lambda: hash[:l1_lambda],
|
164
|
+
l2_lambda: hash[:l2_lambda])
|
165
|
+
simple_rnn.h = Xumo::SFloat.cast(hash[:h])
|
166
|
+
simple_rnn
|
130
167
|
end
|
131
168
|
|
132
169
|
def initialize(num_nodes,
|
@@ -135,13 +172,15 @@ module DNN
|
|
135
172
|
activation: nil,
|
136
173
|
weight_initializer: nil,
|
137
174
|
bias_initializer: nil,
|
138
|
-
|
175
|
+
l1_lambda: 0,
|
176
|
+
l2_lambda: 0)
|
139
177
|
super(num_nodes,
|
140
178
|
stateful: stateful,
|
141
179
|
return_sequences: return_sequences,
|
142
180
|
weight_initializer: weight_initializer,
|
143
181
|
bias_initializer: bias_initializer,
|
144
|
-
|
182
|
+
l1_lambda: 0,
|
183
|
+
l2_lambda: 0)
|
145
184
|
@activation = (activation || Tanh.new)
|
146
185
|
end
|
147
186
|
|
@@ -161,16 +200,15 @@ module DNN
|
|
161
200
|
@weight_initializer.init_param(self, :weight2)
|
162
201
|
@bias_initializer.init_param(self, :bias)
|
163
202
|
@time_length.times do |t|
|
164
|
-
@layers << SimpleRNN_Dense.new(
|
203
|
+
@layers << SimpleRNN_Dense.new(self)
|
165
204
|
end
|
166
205
|
end
|
167
206
|
end
|
168
207
|
|
169
208
|
|
170
209
|
class LSTM_Dense
|
171
|
-
def initialize(
|
172
|
-
@
|
173
|
-
@grads = grads
|
210
|
+
def initialize(rnn)
|
211
|
+
@rnn = rnn
|
174
212
|
@tanh = Tanh.new
|
175
213
|
@g_tanh = Tanh.new
|
176
214
|
@forget_sigmoid = Sigmoid.new
|
@@ -178,56 +216,67 @@ module DNN
|
|
178
216
|
@out_sigmoid = Sigmoid.new
|
179
217
|
end
|
180
218
|
|
181
|
-
def forward(x, h,
|
219
|
+
def forward(x, h, c)
|
182
220
|
@x = x
|
183
221
|
@h = h
|
184
|
-
@
|
222
|
+
@c = c
|
185
223
|
num_nodes = h.shape[1]
|
186
|
-
a = x.dot(@params[:weight]) + h.dot(@params[:weight2]) + @params[:bias]
|
224
|
+
a = x.dot(@rnn.params[:weight]) + h.dot(@rnn.params[:weight2]) + @rnn.params[:bias]
|
187
225
|
|
188
226
|
@forget = @forget_sigmoid.forward(a[true, 0...num_nodes])
|
189
227
|
@g = @g_tanh.forward(a[true, num_nodes...(num_nodes * 2)])
|
190
228
|
@in = @in_sigmoid.forward(a[true, (num_nodes * 2)...(num_nodes * 3)])
|
191
229
|
@out = @out_sigmoid.forward(a[true, (num_nodes * 3)..-1])
|
192
230
|
|
193
|
-
|
194
|
-
@
|
195
|
-
h2 = @out * @
|
196
|
-
[h2,
|
231
|
+
c2 = @forget * c + @g * @in
|
232
|
+
@tanh_c2 = @tanh.forward(c2)
|
233
|
+
h2 = @out * @tanh_c2
|
234
|
+
[h2, c2]
|
197
235
|
end
|
198
236
|
|
199
|
-
def backward(dh2,
|
200
|
-
dh2_tmp = @
|
201
|
-
|
237
|
+
def backward(dh2, dc2)
|
238
|
+
dh2_tmp = @tanh_c2 * dh2
|
239
|
+
dc2_tmp = @tanh.backward(@out * dh2) + dc2
|
202
240
|
|
203
241
|
dout = @out_sigmoid.backward(dh2_tmp)
|
204
|
-
din = @in_sigmoid.backward(
|
205
|
-
dg = @g_tanh.backward(
|
206
|
-
dforget = @forget_sigmoid.backward(
|
242
|
+
din = @in_sigmoid.backward(dc2_tmp * @g)
|
243
|
+
dg = @g_tanh.backward(dc2_tmp * @in)
|
244
|
+
dforget = @forget_sigmoid.backward(dc2_tmp * @c)
|
207
245
|
|
208
246
|
da = Xumo::SFloat.hstack([dforget, dg, din, dout])
|
209
247
|
|
210
|
-
@grads[:weight] += @x.transpose.dot(da)
|
211
|
-
@grads[:weight2] += @h.transpose.dot(da)
|
212
|
-
@
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
248
|
+
@rnn.grads[:weight] += @x.transpose.dot(da)
|
249
|
+
@rnn.grads[:weight2] += @h.transpose.dot(da)
|
250
|
+
if @rnn.l1_lambda > 0
|
251
|
+
@rnn.grads[:weight] += dlasso
|
252
|
+
@rnn.grads[:weight2] += dlasso2
|
253
|
+
elsif @rnn.l2_lambda > 0
|
254
|
+
@rnn.grads[:weight] += dridge
|
255
|
+
@rnn.grads[:weight2] += dridge2
|
256
|
+
end
|
257
|
+
@rnn.grads[:bias] += da.sum(0)
|
258
|
+
dx = da.dot(@rnn.params[:weight].transpose)
|
259
|
+
dh = da.dot(@rnn.params[:weight2].transpose)
|
260
|
+
dc = dc2_tmp * @forget
|
261
|
+
[dx, dh, dc]
|
217
262
|
end
|
218
263
|
end
|
219
264
|
|
220
265
|
|
221
266
|
class LSTM < RNN
|
222
|
-
attr_accessor :
|
267
|
+
attr_accessor :c
|
223
268
|
|
224
269
|
def self.load_hash(hash)
|
225
|
-
self.new(hash[:num_nodes],
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
270
|
+
lstm = self.new(hash[:num_nodes],
|
271
|
+
stateful: hash[:stateful],
|
272
|
+
return_sequences: hash[:return_sequences],
|
273
|
+
weight_initializer: Util.load_hash(hash[:weight_initializer]),
|
274
|
+
bias_initializer: Util.load_hash(hash[:bias_initializer]),
|
275
|
+
l1_lambda: hash[:l1_lambda],
|
276
|
+
l2_lambda: hash[:l2_lambda])
|
277
|
+
lstm.h = Xumo::SFloat.cast(hash[:h])
|
278
|
+
lstm.c = Xumo::SFloat.cast(hash[:c])
|
279
|
+
lstm
|
231
280
|
end
|
232
281
|
|
233
282
|
def initialize(num_nodes,
|
@@ -235,29 +284,30 @@ module DNN
|
|
235
284
|
return_sequences: true,
|
236
285
|
weight_initializer: nil,
|
237
286
|
bias_initializer: nil,
|
238
|
-
|
287
|
+
l1_lambda: 0,
|
288
|
+
l2_lambda: 0)
|
239
289
|
super
|
240
|
-
@
|
290
|
+
@c = nil
|
241
291
|
end
|
242
292
|
|
243
293
|
def forward(xs)
|
244
294
|
@xs_shape = xs.shape
|
245
295
|
hs = Xumo::SFloat.zeros(xs.shape[0], @time_length, @num_nodes)
|
246
296
|
h = nil
|
247
|
-
|
297
|
+
c = nil
|
248
298
|
if @stateful
|
249
299
|
h = @h if @h
|
250
|
-
|
300
|
+
c = @c if @c
|
251
301
|
end
|
252
302
|
h ||= Xumo::SFloat.zeros(xs.shape[0], @num_nodes)
|
253
|
-
|
303
|
+
c ||= Xumo::SFloat.zeros(xs.shape[0], @num_nodes)
|
254
304
|
xs.shape[1].times do |t|
|
255
305
|
x = xs[true, t, false]
|
256
|
-
h,
|
306
|
+
h, c = @layers[t].forward(x, h, c)
|
257
307
|
hs[true, t, false] = h
|
258
308
|
end
|
259
309
|
@h = h
|
260
|
-
@
|
310
|
+
@c = c
|
261
311
|
@return_sequences ? hs : h
|
262
312
|
end
|
263
313
|
|
@@ -272,15 +322,24 @@ module DNN
|
|
272
322
|
end
|
273
323
|
dxs = Xumo::SFloat.zeros(@xs_shape)
|
274
324
|
dh = 0
|
275
|
-
|
325
|
+
dc = 0
|
276
326
|
(0...dh2s.shape[1]).to_a.reverse.each do |t|
|
277
327
|
dh2 = dh2s[true, t, false]
|
278
|
-
dx, dh,
|
328
|
+
dx, dh, dc = @layers[t].backward(dh2 + dh, dc)
|
279
329
|
dxs[true, t, false] = dx
|
280
330
|
end
|
281
331
|
dxs
|
282
332
|
end
|
283
333
|
|
334
|
+
def reset_state
|
335
|
+
super()
|
336
|
+
@c = @c.fill(0) if @c
|
337
|
+
end
|
338
|
+
|
339
|
+
def to_hash
|
340
|
+
super({c: @c.to_a})
|
341
|
+
end
|
342
|
+
|
284
343
|
private
|
285
344
|
|
286
345
|
def init_params
|
@@ -293,16 +352,15 @@ module DNN
|
|
293
352
|
@weight_initializer.init_param(self, :weight2)
|
294
353
|
@bias_initializer.init_param(self, :bias)
|
295
354
|
@time_length.times do |t|
|
296
|
-
@layers << LSTM_Dense.new(
|
355
|
+
@layers << LSTM_Dense.new(self)
|
297
356
|
end
|
298
357
|
end
|
299
358
|
end
|
300
359
|
|
301
360
|
|
302
361
|
class GRU_Dense
|
303
|
-
def initialize(
|
304
|
-
@
|
305
|
-
@grads = grads
|
362
|
+
def initialize(rnn)
|
363
|
+
@rnn = rnn
|
306
364
|
@update_sigmoid = Sigmoid.new
|
307
365
|
@reset_sigmoid = Sigmoid.new
|
308
366
|
@tanh = Tanh.new
|
@@ -312,16 +370,16 @@ module DNN
|
|
312
370
|
@x = x
|
313
371
|
@h = h
|
314
372
|
num_nodes = h.shape[1]
|
315
|
-
@weight_a = @params[:weight][true, 0...(num_nodes * 2)]
|
316
|
-
@weight2_a = @params[:weight2][true, 0...(num_nodes * 2)]
|
317
|
-
bias_a = @params[:bias][0...(num_nodes * 2)]
|
373
|
+
@weight_a = @rnn.params[:weight][true, 0...(num_nodes * 2)]
|
374
|
+
@weight2_a = @rnn.params[:weight2][true, 0...(num_nodes * 2)]
|
375
|
+
bias_a = @rnn.params[:bias][0...(num_nodes * 2)]
|
318
376
|
a = x.dot(@weight_a) + h.dot(@weight2_a) + bias_a
|
319
377
|
@update = @update_sigmoid.forward(a[true, 0...num_nodes])
|
320
378
|
@reset = @reset_sigmoid.forward(a[true, num_nodes..-1])
|
321
379
|
|
322
|
-
@weight_h = @params[:weight][true, (num_nodes * 2)..-1]
|
323
|
-
@weight2_h = @params[:weight2][true, (num_nodes * 2)..-1]
|
324
|
-
bias_h = @params[:bias][(num_nodes * 2)..-1]
|
380
|
+
@weight_h = @rnn.params[:weight][true, (num_nodes * 2)..-1]
|
381
|
+
@weight2_h = @rnn.params[:weight2][true, (num_nodes * 2)..-1]
|
382
|
+
bias_h = @rnn.params[:bias][(num_nodes * 2)..-1]
|
325
383
|
@tanh_h = @tanh.forward(x.dot(@weight_h) + (h * @reset).dot(@weight2_h) + bias_h)
|
326
384
|
h2 = (1 - @update) * h + @update * @tanh_h
|
327
385
|
h2
|
@@ -346,9 +404,16 @@ module DNN
|
|
346
404
|
dh += da.dot(@weight2_a.transpose)
|
347
405
|
dbias_a = da.sum(0)
|
348
406
|
|
349
|
-
@grads[:weight] += Xumo::SFloat.hstack([dweight_a, dweight_h])
|
350
|
-
@grads[:weight2] += Xumo::SFloat.hstack([dweight2_a, dweight2_h])
|
351
|
-
@
|
407
|
+
@rnn.grads[:weight] += Xumo::SFloat.hstack([dweight_a, dweight_h])
|
408
|
+
@rnn.grads[:weight2] += Xumo::SFloat.hstack([dweight2_a, dweight2_h])
|
409
|
+
if @rnn.l1_lambda > 0
|
410
|
+
@rnn.grads[:weight] += dlasso
|
411
|
+
@rnn.grads[:weight2] += dlasso2
|
412
|
+
elsif @rnn.l2_lambda > 0
|
413
|
+
@rnn.grads[:weight] += dridge
|
414
|
+
@rnn.grads[:weight2] += dridge2
|
415
|
+
end
|
416
|
+
@rnn.grads[:bias] += Xumo::SFloat.hstack([dbias_a, dbias_h])
|
352
417
|
[dx, dh]
|
353
418
|
end
|
354
419
|
end
|
@@ -356,12 +421,15 @@ module DNN
|
|
356
421
|
|
357
422
|
class GRU < RNN
|
358
423
|
def self.load_hash(hash)
|
359
|
-
self.new(hash[:num_nodes],
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
424
|
+
gru = self.new(hash[:num_nodes],
|
425
|
+
stateful: hash[:stateful],
|
426
|
+
return_sequences: hash[:return_sequences],
|
427
|
+
weight_initializer: Util.load_hash(hash[:weight_initializer]),
|
428
|
+
bias_initializer: Util.load_hash(hash[:bias_initializer]),
|
429
|
+
l1_lambda: hash[:l1_lambda],
|
430
|
+
l2_lambda: hash[:l2_lambda])
|
431
|
+
gru.h = Xumo::SFloat.cast(hash[:h])
|
432
|
+
gru
|
365
433
|
end
|
366
434
|
|
367
435
|
def initialize(num_nodes,
|
@@ -369,7 +437,8 @@ module DNN
|
|
369
437
|
return_sequences: true,
|
370
438
|
weight_initializer: nil,
|
371
439
|
bias_initializer: nil,
|
372
|
-
|
440
|
+
l1_lambda: 0,
|
441
|
+
l2_lambda: 0)
|
373
442
|
super
|
374
443
|
end
|
375
444
|
|
@@ -385,7 +454,7 @@ module DNN
|
|
385
454
|
@weight_initializer.init_param(self, :weight2)
|
386
455
|
@bias_initializer.init_param(self, :bias)
|
387
456
|
@time_length.times do |t|
|
388
|
-
@layers << GRU_Dense.new(
|
457
|
+
@layers << GRU_Dense.new(self)
|
389
458
|
end
|
390
459
|
end
|
391
460
|
end
|
data/lib/dnn/core/util.rb
CHANGED
data/lib/dnn/lib/cifar10.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
require "dnn"
|
2
|
-
require "dnn/ext/cifar10_loader/cifar10_loader"
|
3
2
|
require "open-uri"
|
4
3
|
require "zlib"
|
5
4
|
require "archive/tar/minitar"
|
5
|
+
require_relative "dnn/ext/cifar10_loader/cifar10_loader"
|
6
6
|
|
7
7
|
URL_CIFAR10 = "https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz"
|
8
8
|
CIFAR10_DIR = "cifar-10-batches-bin"
|
data/lib/dnn/lib/image_io.rb
CHANGED
data/lib/dnn/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-dnn
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- unagiootoro
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-09-
|
11
|
+
date: 2018-09-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|