ruby-dnn 0.15.3 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +1 -9
- data/examples/api-examples/early_stopping_example.rb +1 -1
- data/examples/api-examples/initializer_example.rb +1 -1
- data/examples/api-examples/regularizer_example.rb +1 -1
- data/examples/api-examples/save_example.rb +1 -1
- data/examples/dcgan/dcgan.rb +3 -3
- data/examples/iris_example.rb +41 -17
- data/examples/mnist_define_by_run.rb +1 -1
- data/examples/pix2pix/dcgan.rb +157 -0
- data/examples/pix2pix/imgen.rb +27 -0
- data/examples/pix2pix/train.rb +52 -0
- data/lib/dnn.rb +2 -0
- data/lib/dnn/core/layers/activations.rb +37 -19
- data/lib/dnn/core/layers/basic_layers.rb +110 -25
- data/lib/dnn/core/layers/cnn_layers.rb +19 -21
- data/lib/dnn/core/layers/embedding.rb +3 -3
- data/lib/dnn/core/layers/math_layers.rb +169 -0
- data/lib/dnn/core/layers/merge_layers.rb +29 -24
- data/lib/dnn/core/layers/normalizations.rb +4 -2
- data/lib/dnn/core/layers/rnn_layers.rb +44 -36
- data/lib/dnn/core/link.rb +7 -2
- data/lib/dnn/core/losses.rb +54 -30
- data/lib/dnn/core/models.rb +47 -47
- data/lib/dnn/core/monkey_patch.rb +75 -0
- data/lib/dnn/core/optimizers.rb +10 -6
- data/lib/dnn/core/param.rb +17 -0
- data/lib/dnn/core/regularizers.rb +35 -33
- data/lib/dnn/core/tensor.rb +40 -0
- data/lib/dnn/core/utils.rb +1 -1
- data/lib/dnn/datasets/cifar10.rb +10 -9
- data/lib/dnn/datasets/cifar100.rb +10 -9
- data/lib/dnn/datasets/downloader.rb +1 -5
- data/lib/dnn/datasets/fashion-mnist.rb +4 -12
- data/lib/dnn/datasets/iris.rb +9 -9
- data/lib/dnn/datasets/mnist.rb +4 -12
- data/lib/dnn/datasets/stl-10.rb +6 -8
- data/lib/dnn/version.rb +1 -1
- data/ruby-dnn.gemspec +1 -1
- metadata +7 -5
- data/ext/cifar_loader/cifar_loader.c +0 -77
- data/ext/cifar_loader/extconf.rb +0 -3
@@ -1,41 +1,46 @@
|
|
1
1
|
module DNN
|
2
2
|
module Layers
|
3
3
|
|
4
|
-
|
5
|
-
def
|
6
|
-
new(*args).call(x1, x2)
|
7
|
-
end
|
8
|
-
|
9
|
-
def call(input_tensor1, input_tensor2)
|
4
|
+
module MergeLayerNode
|
5
|
+
def forward(input_tensor1, input_tensor2)
|
10
6
|
x1 = input_tensor1.data
|
11
7
|
x2 = input_tensor2.data
|
12
|
-
prev_link1 = input_tensor1.link
|
13
|
-
prev_link2 = input_tensor2.link
|
14
|
-
|
15
|
-
y = forward(x1, x2)
|
8
|
+
prev_link1 = (input_tensor1.is_a?(Tensor) ? input_tensor1.link : input_tensor1)
|
9
|
+
prev_link2 = (input_tensor2.is_a?(Tensor) ? input_tensor2.link : input_tensor2)
|
10
|
+
y = forward_node(x1, x2)
|
16
11
|
link = TwoInputLink.new(prev_link1, prev_link2, self)
|
17
12
|
Tensor.new(y, link)
|
18
13
|
end
|
19
|
-
end
|
20
14
|
|
21
|
-
|
22
|
-
|
23
|
-
x1 + x2
|
15
|
+
def backward(dy)
|
16
|
+
backward_node(dy)
|
24
17
|
end
|
25
18
|
|
26
|
-
def
|
27
|
-
|
19
|
+
def forward_node(x1, x2)
|
20
|
+
raise NotImplementedError, "Class '#{self.class.name}' has implement method 'forward_node'"
|
21
|
+
end
|
22
|
+
|
23
|
+
def backward_node(dy)
|
24
|
+
raise NotImplementedError, "Class '#{self.class.name}' has implement method 'backward_node'"
|
28
25
|
end
|
29
26
|
end
|
30
27
|
|
31
|
-
class
|
32
|
-
|
33
|
-
|
34
|
-
|
28
|
+
class MergeLayer < Layers::Layer
|
29
|
+
include MergeLayerNode
|
30
|
+
|
31
|
+
def self.call(x1, x2, *args)
|
32
|
+
new(*args).call(x1, x2)
|
35
33
|
end
|
36
34
|
|
37
|
-
def
|
38
|
-
|
35
|
+
def call(input_tensor1, input_tensor2)
|
36
|
+
input_tensor1 = Tensor.new(input_tensor1) if !input_tensor1.is_a?(Tensor) && !input_tensor1.is_a?(Param)
|
37
|
+
input_tensor2 = Tensor.new(input_tensor2) if !input_tensor2.is_a?(Tensor) && !input_tensor2.is_a?(Param)
|
38
|
+
if input_tensor1.data.is_a?(Numo::NArray)
|
39
|
+
build(input_tensor1.data.shape[1..-1]) unless built?
|
40
|
+
else
|
41
|
+
build([1]) unless built?
|
42
|
+
end
|
43
|
+
forward(input_tensor1, input_tensor2)
|
39
44
|
end
|
40
45
|
end
|
41
46
|
|
@@ -47,13 +52,13 @@ module DNN
|
|
47
52
|
@axis = axis
|
48
53
|
end
|
49
54
|
|
50
|
-
def
|
55
|
+
def forward_node(x1, x2)
|
51
56
|
@x1_dim = x1.shape[@axis]
|
52
57
|
@x2_dim = x2.shape[@axis]
|
53
58
|
x1.concatenate(x2, axis: @axis)
|
54
59
|
end
|
55
60
|
|
56
|
-
def
|
61
|
+
def backward_node(dy)
|
57
62
|
dy.split([@x1_dim, @x1_dim + @x2_dim], axis: @axis)
|
58
63
|
end
|
59
64
|
|
@@ -2,6 +2,8 @@ module DNN
|
|
2
2
|
module Layers
|
3
3
|
|
4
4
|
class BatchNormalization < TrainableLayer
|
5
|
+
include LayerNode
|
6
|
+
|
5
7
|
attr_reader :gamma
|
6
8
|
attr_reader :beta
|
7
9
|
attr_reader :running_mean
|
@@ -32,7 +34,7 @@ module DNN
|
|
32
34
|
@running_var.data = Xumo::SFloat.zeros(*output_shape)
|
33
35
|
end
|
34
36
|
|
35
|
-
def
|
37
|
+
def forward_node(x)
|
36
38
|
if DNN.learning_phase
|
37
39
|
mean = x.mean(axis: @axis, keepdims: true)
|
38
40
|
@xc = x - mean
|
@@ -49,7 +51,7 @@ module DNN
|
|
49
51
|
@gamma.data * xn + @beta.data
|
50
52
|
end
|
51
53
|
|
52
|
-
def
|
54
|
+
def backward_node(dy)
|
53
55
|
batch_size = dy.shape[@axis]
|
54
56
|
if @trainable
|
55
57
|
@beta.grad = dy.sum(axis: @axis, keepdims: true)
|
@@ -3,6 +3,8 @@ module DNN
|
|
3
3
|
|
4
4
|
# Super class of all RNN classes.
|
5
5
|
class RNN < Connection
|
6
|
+
include LayerNode
|
7
|
+
|
6
8
|
attr_reader :num_nodes
|
7
9
|
attr_reader :recurrent_weight
|
8
10
|
attr_reader :hidden
|
@@ -50,7 +52,7 @@ module DNN
|
|
50
52
|
raise NotImplementedError, "Class '#{self.class.name}' has implement method 'create_hidden_layer'"
|
51
53
|
end
|
52
54
|
|
53
|
-
def
|
55
|
+
def forward_node(xs)
|
54
56
|
create_hidden_layer
|
55
57
|
@xs_shape = xs.shape
|
56
58
|
hs = Xumo::SFloat.zeros(xs.shape[0], @time_length, @num_nodes)
|
@@ -58,14 +60,14 @@ module DNN
|
|
58
60
|
xs.shape[1].times do |t|
|
59
61
|
x = xs[true, t, false]
|
60
62
|
@hidden_layers[t].trainable = @trainable
|
61
|
-
h = @hidden_layers[t].
|
63
|
+
h = @hidden_layers[t].forward_node(x, h)
|
62
64
|
hs[true, t, false] = h
|
63
65
|
end
|
64
66
|
@hidden.data = h
|
65
67
|
@return_sequences ? hs : h
|
66
68
|
end
|
67
69
|
|
68
|
-
def
|
70
|
+
def backward_node(dh2s)
|
69
71
|
unless @return_sequences
|
70
72
|
dh = dh2s
|
71
73
|
dh2s = Xumo::SFloat.zeros(dh.shape[0], @time_length, dh.shape[1])
|
@@ -75,7 +77,7 @@ module DNN
|
|
75
77
|
dh = 0
|
76
78
|
(dh2s.shape[1] - 1).downto(0) do |t|
|
77
79
|
dh2 = dh2s[true, t, false]
|
78
|
-
dx, dh = @hidden_layers[t].
|
80
|
+
dx, dh = @hidden_layers[t].backward_node(dh2 + dh)
|
79
81
|
dxs[true, t, false] = dx
|
80
82
|
end
|
81
83
|
dxs
|
@@ -134,7 +136,9 @@ module DNN
|
|
134
136
|
end
|
135
137
|
end
|
136
138
|
|
137
|
-
class SimpleRNNDense
|
139
|
+
class SimpleRNNDense < Layer
|
140
|
+
include LayerNode
|
141
|
+
|
138
142
|
attr_accessor :trainable
|
139
143
|
|
140
144
|
def initialize(weight, recurrent_weight, bias, activation)
|
@@ -145,16 +149,16 @@ module DNN
|
|
145
149
|
@trainable = true
|
146
150
|
end
|
147
151
|
|
148
|
-
def
|
152
|
+
def forward_node(x, h)
|
149
153
|
@x = x
|
150
154
|
@h = h
|
151
155
|
h2 = x.dot(@weight.data) + h.dot(@recurrent_weight.data)
|
152
156
|
h2 += @bias.data if @bias
|
153
|
-
@activation.
|
157
|
+
@activation.forward_node(h2)
|
154
158
|
end
|
155
159
|
|
156
|
-
def
|
157
|
-
dh2 = @activation.
|
160
|
+
def backward_node(dh2)
|
161
|
+
dh2 = @activation.backward_node(dh2)
|
158
162
|
if @trainable
|
159
163
|
@weight.grad += @x.transpose.dot(dh2)
|
160
164
|
@recurrent_weight.grad += @h.transpose.dot(dh2)
|
@@ -226,7 +230,9 @@ module DNN
|
|
226
230
|
end
|
227
231
|
end
|
228
232
|
|
229
|
-
class LSTMDense
|
233
|
+
class LSTMDense < Layer
|
234
|
+
include LayerNode
|
235
|
+
|
230
236
|
attr_accessor :trainable
|
231
237
|
|
232
238
|
def initialize(weight, recurrent_weight, bias)
|
@@ -241,7 +247,7 @@ module DNN
|
|
241
247
|
@trainable = true
|
242
248
|
end
|
243
249
|
|
244
|
-
def
|
250
|
+
def forward_node(x, h, c)
|
245
251
|
@x = x
|
246
252
|
@h = h
|
247
253
|
@c = c
|
@@ -249,25 +255,25 @@ module DNN
|
|
249
255
|
a = x.dot(@weight.data) + h.dot(@recurrent_weight.data)
|
250
256
|
a += @bias.data if @bias
|
251
257
|
|
252
|
-
@forget = @forget_sigmoid.
|
253
|
-
@g = @g_tanh.
|
254
|
-
@in = @in_sigmoid.
|
255
|
-
@out = @out_sigmoid.
|
258
|
+
@forget = @forget_sigmoid.forward_node(a[true, 0...num_nodes])
|
259
|
+
@g = @g_tanh.forward_node(a[true, num_nodes...(num_nodes * 2)])
|
260
|
+
@in = @in_sigmoid.forward_node(a[true, (num_nodes * 2)...(num_nodes * 3)])
|
261
|
+
@out = @out_sigmoid.forward_node(a[true, (num_nodes * 3)..-1])
|
256
262
|
|
257
263
|
c2 = @forget * c + @g * @in
|
258
|
-
@tanh_c2 = @tanh.
|
264
|
+
@tanh_c2 = @tanh.forward_node(c2)
|
259
265
|
h2 = @out * @tanh_c2
|
260
266
|
[h2, c2]
|
261
267
|
end
|
262
268
|
|
263
|
-
def
|
269
|
+
def backward_node(dh2, dc2)
|
264
270
|
dh2_tmp = @tanh_c2 * dh2
|
265
|
-
dc2_tmp = @tanh.
|
271
|
+
dc2_tmp = @tanh.backward_node(@out * dh2) + dc2
|
266
272
|
|
267
|
-
dout = @out_sigmoid.
|
268
|
-
din = @in_sigmoid.
|
269
|
-
dg = @g_tanh.
|
270
|
-
dforget = @forget_sigmoid.
|
273
|
+
dout = @out_sigmoid.backward_node(dh2_tmp)
|
274
|
+
din = @in_sigmoid.backward_node(dc2_tmp * @g)
|
275
|
+
dg = @g_tanh.backward_node(dc2_tmp * @in)
|
276
|
+
dforget = @forget_sigmoid.backward_node(dc2_tmp * @c)
|
271
277
|
|
272
278
|
da = Xumo::SFloat.hstack([dforget, dg, din, dout])
|
273
279
|
|
@@ -313,7 +319,7 @@ module DNN
|
|
313
319
|
@hidden_layers = Array.new(@time_length) { LSTMDense.new(@weight, @recurrent_weight, @bias) }
|
314
320
|
end
|
315
321
|
|
316
|
-
def
|
322
|
+
def forward_node(xs)
|
317
323
|
create_hidden_layer
|
318
324
|
@xs_shape = xs.shape
|
319
325
|
hs = Xumo::SFloat.zeros(xs.shape[0], @time_length, @num_nodes)
|
@@ -328,7 +334,7 @@ module DNN
|
|
328
334
|
xs.shape[1].times do |t|
|
329
335
|
x = xs[true, t, false]
|
330
336
|
@hidden_layers[t].trainable = @trainable
|
331
|
-
h, c = @hidden_layers[t].
|
337
|
+
h, c = @hidden_layers[t].forward_node(x, h, c)
|
332
338
|
hs[true, t, false] = h
|
333
339
|
end
|
334
340
|
@hidden.data = h
|
@@ -336,7 +342,7 @@ module DNN
|
|
336
342
|
@return_sequences ? hs : h
|
337
343
|
end
|
338
344
|
|
339
|
-
def
|
345
|
+
def backward_node(dh2s)
|
340
346
|
unless @return_sequences
|
341
347
|
dh = dh2s
|
342
348
|
dh2s = Xumo::SFloat.zeros(dh.shape[0], @time_length, dh.shape[1])
|
@@ -347,7 +353,7 @@ module DNN
|
|
347
353
|
dc = 0
|
348
354
|
(dh2s.shape[1] - 1).downto(0) do |t|
|
349
355
|
dh2 = dh2s[true, t, false]
|
350
|
-
dx, dh, dc = @hidden_layers[t].
|
356
|
+
dx, dh, dc = @hidden_layers[t].backward_node(dh2 + dh, dc)
|
351
357
|
dxs[true, t, false] = dx
|
352
358
|
end
|
353
359
|
dxs
|
@@ -363,7 +369,9 @@ module DNN
|
|
363
369
|
end
|
364
370
|
end
|
365
371
|
|
366
|
-
class GRUDense
|
372
|
+
class GRUDense < Layer
|
373
|
+
include LayerNode
|
374
|
+
|
367
375
|
attr_accessor :trainable
|
368
376
|
|
369
377
|
def initialize(weight, recurrent_weight, bias)
|
@@ -376,7 +384,7 @@ module DNN
|
|
376
384
|
@trainable = true
|
377
385
|
end
|
378
386
|
|
379
|
-
def
|
387
|
+
def forward_node(x, h)
|
380
388
|
@x = x
|
381
389
|
@h = h
|
382
390
|
num_nodes = h.shape[1]
|
@@ -384,23 +392,23 @@ module DNN
|
|
384
392
|
@weight2_a = @recurrent_weight.data[true, 0...(num_nodes * 2)]
|
385
393
|
a = x.dot(@weight_a) + h.dot(@weight2_a)
|
386
394
|
a += @bias.data[0...(num_nodes * 2)] if @bias
|
387
|
-
@update = @update_sigmoid.
|
388
|
-
@reset = @reset_sigmoid.
|
395
|
+
@update = @update_sigmoid.forward_node(a[true, 0...num_nodes])
|
396
|
+
@reset = @reset_sigmoid.forward_node(a[true, num_nodes..-1])
|
389
397
|
|
390
398
|
@weight_h = @weight.data[true, (num_nodes * 2)..-1]
|
391
399
|
@weight2_h = @recurrent_weight.data[true, (num_nodes * 2)..-1]
|
392
400
|
@tanh_h = if @bias
|
393
401
|
bias_h = @bias.data[(num_nodes * 2)..-1]
|
394
|
-
@tanh.
|
402
|
+
@tanh.forward_node(x.dot(@weight_h) + (h * @reset).dot(@weight2_h) + bias_h)
|
395
403
|
else
|
396
|
-
@tanh.
|
404
|
+
@tanh.forward_node(x.dot(@weight_h) + (h * @reset).dot(@weight2_h))
|
397
405
|
end
|
398
406
|
h2 = (1 - @update) * @tanh_h + @update * h
|
399
407
|
h2
|
400
408
|
end
|
401
409
|
|
402
|
-
def
|
403
|
-
dtanh_h = @tanh.
|
410
|
+
def backward_node(dh2)
|
411
|
+
dtanh_h = @tanh.backward_node(dh2 * (1 - @update))
|
404
412
|
dh = dh2 * @update
|
405
413
|
|
406
414
|
if @trainable
|
@@ -411,8 +419,8 @@ module DNN
|
|
411
419
|
dx = dtanh_h.dot(@weight_h.transpose)
|
412
420
|
dh += dtanh_h.dot(@weight2_h.transpose) * @reset
|
413
421
|
|
414
|
-
dreset = @reset_sigmoid.
|
415
|
-
dupdate = @update_sigmoid.
|
422
|
+
dreset = @reset_sigmoid.backward_node(dtanh_h.dot(@weight2_h.transpose) * @h)
|
423
|
+
dupdate = @update_sigmoid.backward_node(dh2 * @h - dh2 * @tanh_h)
|
416
424
|
da = Xumo::SFloat.hstack([dupdate, dreset])
|
417
425
|
if @trainable
|
418
426
|
dweight_a = @x.transpose.dot(da)
|
data/lib/dnn/core/link.rb
CHANGED
@@ -26,9 +26,14 @@ module DNN
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def backward(dy)
|
29
|
-
|
29
|
+
dys = @layer.backward(dy)
|
30
|
+
if dys.is_a?(Array)
|
31
|
+
dy1, dy2 = *dys
|
32
|
+
else
|
33
|
+
dy1 = dys
|
34
|
+
end
|
30
35
|
@prev1&.backward(dy1)
|
31
|
-
@prev2&.backward(dy2)
|
36
|
+
@prev2&.backward(dy2) if dy2
|
32
37
|
end
|
33
38
|
end
|
34
39
|
end
|
data/lib/dnn/core/losses.rb
CHANGED
@@ -2,6 +2,10 @@ module DNN
|
|
2
2
|
module Losses
|
3
3
|
|
4
4
|
class Loss
|
5
|
+
def self.call(y, t, *args)
|
6
|
+
new(*args).(y, t)
|
7
|
+
end
|
8
|
+
|
5
9
|
def self.from_hash(hash)
|
6
10
|
return nil unless hash
|
7
11
|
loss_class = DNN.const_get(hash[:class])
|
@@ -11,31 +15,30 @@ module DNN
|
|
11
15
|
loss
|
12
16
|
end
|
13
17
|
|
18
|
+
def call(y, t)
|
19
|
+
forward(y, t)
|
20
|
+
end
|
21
|
+
|
14
22
|
def loss(y, t, layers = nil)
|
15
23
|
unless y.shape == t.shape
|
16
24
|
raise DNN_ShapeError, "The shape of y does not match the t shape. y shape is #{y.shape}, but t shape is #{t.shape}."
|
17
25
|
end
|
18
|
-
|
19
|
-
|
20
|
-
|
26
|
+
loss = call(y, t)
|
27
|
+
loss = regularizers_forward(loss, layers) if layers
|
28
|
+
loss
|
21
29
|
end
|
22
30
|
|
23
31
|
def forward(y, t)
|
24
32
|
raise NotImplementedError, "Class '#{self.class.name}' has implement method 'forward'"
|
25
33
|
end
|
26
34
|
|
27
|
-
def
|
28
|
-
raise NotImplementedError, "Class '#{self.class.name}' has implement method 'backward'"
|
29
|
-
end
|
30
|
-
|
31
|
-
def regularizers_forward(layers)
|
32
|
-
loss_value = 0
|
35
|
+
def regularizers_forward(loss, layers)
|
33
36
|
regularizers = layers.select { |layer| layer.respond_to?(:regularizers) }
|
34
37
|
.map(&:regularizers).flatten
|
35
38
|
regularizers.each do |regularizer|
|
36
|
-
|
39
|
+
loss = regularizer.forward(loss)
|
37
40
|
end
|
38
|
-
|
41
|
+
loss
|
39
42
|
end
|
40
43
|
|
41
44
|
def regularizers_backward(layers)
|
@@ -64,22 +67,30 @@ module DNN
|
|
64
67
|
end
|
65
68
|
|
66
69
|
class MeanSquaredError < Loss
|
67
|
-
|
70
|
+
include Layers::MergeLayerNode
|
71
|
+
|
72
|
+
def forward_node(y, t)
|
73
|
+
@y = y
|
74
|
+
@t = t
|
68
75
|
0.5 * ((y - t)**2).mean(0).sum
|
69
76
|
end
|
70
77
|
|
71
|
-
def
|
72
|
-
y - t
|
78
|
+
def backward_node(dy)
|
79
|
+
@y - @t
|
73
80
|
end
|
74
81
|
end
|
75
82
|
|
76
83
|
class MeanAbsoluteError < Loss
|
77
|
-
|
84
|
+
include Layers::MergeLayerNode
|
85
|
+
|
86
|
+
def forward_node(y, t)
|
87
|
+
@y = y
|
88
|
+
@t = t
|
78
89
|
(y - t).abs.mean(0).sum
|
79
90
|
end
|
80
91
|
|
81
|
-
def
|
82
|
-
dy = y - t
|
92
|
+
def backward_node(d)
|
93
|
+
dy = @y - @t
|
83
94
|
dy[dy >= 0] = 1
|
84
95
|
dy[dy < 0] = -1
|
85
96
|
dy
|
@@ -87,26 +98,33 @@ module DNN
|
|
87
98
|
end
|
88
99
|
|
89
100
|
class Hinge < Loss
|
90
|
-
|
101
|
+
include Layers::MergeLayerNode
|
102
|
+
|
103
|
+
def forward_node(y, t)
|
104
|
+
@t = t
|
91
105
|
@a = 1 - y * t
|
92
106
|
Xumo::SFloat.maximum(0, @a).mean(0).sum
|
93
107
|
end
|
94
108
|
|
95
|
-
def
|
109
|
+
def backward_node(d)
|
96
110
|
a = Xumo::SFloat.ones(*@a.shape)
|
97
111
|
a[@a <= 0] = 0
|
98
|
-
a *
|
112
|
+
a * -@t
|
99
113
|
end
|
100
114
|
end
|
101
115
|
|
102
116
|
class HuberLoss < Loss
|
103
|
-
|
117
|
+
include Layers::MergeLayerNode
|
118
|
+
|
119
|
+
def forward_node(y, t)
|
120
|
+
@y = y
|
121
|
+
@t = t
|
104
122
|
loss_l1_value = loss_l1(y, t)
|
105
123
|
@loss_value = loss_l1_value > 1 ? loss_l1_value : loss_l2(y, t)
|
106
124
|
end
|
107
125
|
|
108
|
-
def
|
109
|
-
dy = y - t
|
126
|
+
def backward_node(d)
|
127
|
+
dy = @y - @t
|
110
128
|
if @loss_value > 1
|
111
129
|
dy[dy >= 0] = 1
|
112
130
|
dy[dy < 0] = -1
|
@@ -126,6 +144,8 @@ module DNN
|
|
126
144
|
end
|
127
145
|
|
128
146
|
class SoftmaxCrossEntropy < Loss
|
147
|
+
include Layers::MergeLayerNode
|
148
|
+
|
129
149
|
attr_accessor :eps
|
130
150
|
|
131
151
|
class << self
|
@@ -141,13 +161,14 @@ module DNN
|
|
141
161
|
@eps = eps
|
142
162
|
end
|
143
163
|
|
144
|
-
def
|
164
|
+
def forward_node(y, t)
|
165
|
+
@t = t
|
145
166
|
@x = SoftmaxCrossEntropy.softmax(y)
|
146
167
|
-(t * Xumo::NMath.log(@x + @eps)).mean(0).sum
|
147
168
|
end
|
148
169
|
|
149
|
-
def
|
150
|
-
@x - t
|
170
|
+
def backward_node(d)
|
171
|
+
@x - @t
|
151
172
|
end
|
152
173
|
|
153
174
|
def to_hash
|
@@ -160,11 +181,13 @@ module DNN
|
|
160
181
|
end
|
161
182
|
|
162
183
|
class SigmoidCrossEntropy < Loss
|
184
|
+
include Layers::MergeLayerNode
|
185
|
+
|
163
186
|
attr_accessor :eps
|
164
187
|
|
165
188
|
class << self
|
166
189
|
def sigmoid(y)
|
167
|
-
Layers::Sigmoid.new.
|
190
|
+
Layers::Sigmoid.new.forward_node(y)
|
168
191
|
end
|
169
192
|
|
170
193
|
alias activation sigmoid
|
@@ -175,13 +198,14 @@ module DNN
|
|
175
198
|
@eps = eps
|
176
199
|
end
|
177
200
|
|
178
|
-
def
|
201
|
+
def forward_node(y, t)
|
202
|
+
@t = t
|
179
203
|
@x = SigmoidCrossEntropy.sigmoid(y)
|
180
204
|
-(t * Xumo::NMath.log(@x + @eps) + (1 - t) * Xumo::NMath.log(1 - @x + @eps)).mean(0).sum
|
181
205
|
end
|
182
206
|
|
183
|
-
def
|
184
|
-
@x - t
|
207
|
+
def backward_node(d)
|
208
|
+
@x - @t
|
185
209
|
end
|
186
210
|
|
187
211
|
def to_hash
|