rann 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -5
- data/bench/xor_benchmark.rb +55 -0
- data/lib/rann/backprop.rb +106 -101
- data/lib/rann/gradient_checker.rb +3 -1
- data/lib/rann/lstm.rb +28 -27
- data/lib/rann/network.rb +4 -1
- data/lib/rann/neuron.rb +1 -1
- data/lib/rann/optimisers/adagrad.rb +21 -0
- data/lib/rann/optimisers/rmsprop.rb +22 -0
- data/lib/rann/product_neuron.rb +4 -2
- data/lib/rann/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 199b82878ecd7e186aca7df6529af6b725b049a5
|
4
|
+
data.tar.gz: 5a61a9fb308c3c54055a61e516aa370f152170e8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d02e3a788f9be66b2912edc21b0b807a6ba16ea867ac6ecb03454988e35e11971f8e39d1938324b834a5241df1c85775240b7665fa566b109ed2578d5fb23e3
|
7
|
+
data.tar.gz: 07b770013594c1b6e9a42ecdd812f2b2e12b3d902d2608d23ad24b73d44e270fb5bfb84636018405d5af7ea94580f89d5d2a09dc6f00917c987c336116a08442
|
data/README.md
CHANGED
@@ -34,17 +34,16 @@ ruby examples/xor.rb
|
|
34
34
|
|
35
35
|
## TODO
|
36
36
|
|
37
|
-
So much.
|
37
|
+
So much.
|
38
38
|
|
39
|
+
- Matrix calculations (WIP)
|
39
40
|
- Convenience methods for setting up standard network topologies, crucially,
|
40
|
-
layers
|
41
|
-
- Batch normalization/drop out/early stopping
|
41
|
+
layers (WIP)
|
42
|
+
- Batch normalization/drop out/early stopping (WIP, dep. on matrix)
|
42
43
|
- Hyperparameter optimisation
|
43
44
|
- Other adaptive learning rate algorithms (Adadelta, Adam, etc?)
|
44
45
|
- Explore matrix operations and other ways to optimise performance of algorithms
|
45
46
|
- RPROP?
|
46
|
-
- Use enumerable-statistics gem?
|
47
|
-
- Speed up by adding a reduce step to the parallel gem?
|
48
47
|
- More examples
|
49
48
|
- Tests
|
50
49
|
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require "bundler/setup"
|
2
|
+
require "rann"
|
3
|
+
|
4
|
+
xor_inputs = [[0,0],[0,1],[1,0],[1,1]]
|
5
|
+
xor_targets = [[0],[1],[1],[0]]
|
6
|
+
|
7
|
+
time = Time.now.to_i
|
8
|
+
results =
|
9
|
+
Array.new(100) do |j|
|
10
|
+
# inputs
|
11
|
+
inputs = Array.new(2){ |i| RANN::Neuron.new "input #{i}", 0, :input }
|
12
|
+
|
13
|
+
# hidden layer
|
14
|
+
hiddens = Array.new(3){ |i| RANN::Neuron.new "hidden #{i}", 3 }
|
15
|
+
bias = RANN::Neuron.new "bias", 0, :bias
|
16
|
+
|
17
|
+
# output layer
|
18
|
+
output = RANN::Neuron.new "output", 3, :output, :sig
|
19
|
+
|
20
|
+
# connect it all
|
21
|
+
connections = []
|
22
|
+
hiddens.each do |h|
|
23
|
+
inputs.each do |i|
|
24
|
+
connections.push RANN::Connection.new i, h
|
25
|
+
end
|
26
|
+
connections.push RANN::Connection.new bias, h
|
27
|
+
connections.push RANN::Connection.new h, output
|
28
|
+
end
|
29
|
+
|
30
|
+
network = RANN::Network.new connections
|
31
|
+
backprop = RANN::Backprop.new network
|
32
|
+
|
33
|
+
i = 0
|
34
|
+
loop do
|
35
|
+
i += 1
|
36
|
+
sample_index = (rand * xor_inputs.size).to_i
|
37
|
+
|
38
|
+
avg_error =
|
39
|
+
backprop.run_batch(
|
40
|
+
[xor_inputs[sample_index].map(&:to_d)],
|
41
|
+
[xor_targets[sample_index].map(&:to_d)],
|
42
|
+
processes: 0,
|
43
|
+
checking: false
|
44
|
+
)
|
45
|
+
|
46
|
+
break if avg_error < 0.0001
|
47
|
+
end
|
48
|
+
|
49
|
+
puts j
|
50
|
+
i
|
51
|
+
end
|
52
|
+
|
53
|
+
taken = Time.now.to_i - time
|
54
|
+
puts results.reduce(:+).fdiv(results.size).round(2)
|
55
|
+
puts "in #{taken}s"
|
data/lib/rann/backprop.rb
CHANGED
@@ -1,6 +1,10 @@
|
|
1
|
+
require "bigdecimal"
|
2
|
+
require "bigdecimal/util"
|
1
3
|
require "parallel"
|
2
4
|
require "rann/gradient_checker"
|
3
5
|
require "rann/util/array_ext"
|
6
|
+
require "rann/optimisers/adagrad"
|
7
|
+
require "rann/optimisers/rmsprop"
|
4
8
|
|
5
9
|
module RANN
|
6
10
|
class Backprop
|
@@ -14,27 +18,16 @@ module RANN
|
|
14
18
|
step: ->(_){ 0.to_d },
|
15
19
|
}
|
16
20
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
attr_accessor :network, :lr, :velocities
|
25
|
-
|
26
|
-
def initialize network, restore = {}
|
27
|
-
@network = network
|
28
|
-
@connections_hash = network.connections.each.with_object({}){ |c, h| h[c.id] = c }
|
29
|
-
@lr = LEARNING_RATE
|
30
|
-
@friction = FRICTION
|
31
|
-
@velocities = Hash.new(BigDecimal.new('0'))
|
32
|
-
@historical_gradient = (restore[:historical_gradient] || {}).tap{ |h| h.default = 0.to_d }
|
33
|
-
@historical_update = Hash.new(MASTER_STEP_SIZE)
|
34
|
-
@batch_count = BigDecimal.new('0')
|
21
|
+
attr_accessor :network
|
22
|
+
|
23
|
+
def initialize network, opts = {}, restore = {}
|
24
|
+
@network = network
|
25
|
+
@connections_hash = network.connections.each.with_object({}){ |c, h| h[c.id] = c }
|
26
|
+
@optimiser = RANN::Optimisers.const_get(opts[:optimiser] || 'RMSProp').new opts, restore
|
27
|
+
@batch_count = 0.to_d
|
35
28
|
end
|
36
29
|
|
37
|
-
def run_batch
|
30
|
+
def run_batch inputs, targets, opts = {}
|
38
31
|
@batch_count += 1
|
39
32
|
|
40
33
|
batch_size = inputs.size
|
@@ -43,37 +36,47 @@ module RANN
|
|
43
36
|
|
44
37
|
# force longer bits of work per iteration, to maximise CPU usage
|
45
38
|
# less marshalling data etc, more work.
|
46
|
-
grouped_inputs = in_groups
|
47
|
-
|
48
|
-
|
49
|
-
group_avg_gradients
|
50
|
-
group_avg_error = 0.to_d
|
39
|
+
grouped_inputs = in_groups(inputs, [1, opts[:processes]].max * 10, false).reject &:empty?
|
40
|
+
reduce_proc =
|
41
|
+
lambda do |_, _, result|
|
42
|
+
group_avg_gradients, group_avg_error = result
|
51
43
|
|
52
|
-
|
53
|
-
|
44
|
+
avg_gradients.merge!(group_avg_gradients){ |_, o, n| o + n }
|
45
|
+
avg_batch_error += group_avg_error
|
46
|
+
end
|
54
47
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
48
|
+
Parallel.each_with_index(
|
49
|
+
grouped_inputs,
|
50
|
+
in_processes: opts[:processes],
|
51
|
+
finish: reduce_proc
|
52
|
+
) do |inputs, i|
|
53
|
+
group_avg_gradients = Hash.new{ |h, k| h[k] = 0.to_d }
|
54
|
+
group_avg_error = 0.to_d
|
60
55
|
|
61
|
-
|
62
|
-
|
56
|
+
inputs.each_with_index do |input, j|
|
57
|
+
gradients, error = Backprop.run_single network, input, targets[i + j]
|
58
|
+
|
59
|
+
gradients.each do |cid, g|
|
60
|
+
group_avg_gradients[cid] += g.div batch_size, 10
|
61
|
+
end
|
62
|
+
group_avg_error += error.div batch_size, 10
|
63
63
|
end
|
64
64
|
|
65
|
-
|
66
|
-
|
67
|
-
avg_batch_error += group_avg_error
|
65
|
+
group_avg_gradients.default_proc = nil
|
66
|
+
[group_avg_gradients, group_avg_error]
|
68
67
|
end
|
69
68
|
|
70
69
|
if opts[:checking]
|
71
70
|
# check assumes batchsize 1 for now
|
72
71
|
sorted_gradients = avg_gradients.values_at *network.connections.map(&:id)
|
73
|
-
|
72
|
+
invalid = GradientChecker.check network, inputs.first, targets.first, sorted_gradients
|
73
|
+
if invalid.empty?
|
74
74
|
puts "gradient valid"
|
75
75
|
else
|
76
|
-
puts "
|
76
|
+
puts "gradients INVALID for connections:"
|
77
|
+
invalid.each do |i|
|
78
|
+
puts "#{network.connections[i].input_neuron.name} -> #{network.connections[i].output_neuron.name}"
|
79
|
+
end
|
77
80
|
end
|
78
81
|
end
|
79
82
|
|
@@ -81,7 +84,7 @@ module RANN
|
|
81
84
|
con = @connections_hash[con_id]
|
82
85
|
next if con.locked?
|
83
86
|
|
84
|
-
update =
|
87
|
+
update = @optimiser.update gradient, con.id
|
85
88
|
|
86
89
|
con.weight += update
|
87
90
|
end
|
@@ -107,60 +110,68 @@ module RANN
|
|
107
110
|
error = mse targets, outputs
|
108
111
|
|
109
112
|
# backward pass with unravelling for recurrent networks
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
113
|
+
node_deltas = Hash.new{ |h, k| h[k] = Hash.new(0.to_d) }
|
114
|
+
gradients = Hash.new(0)
|
115
|
+
|
116
|
+
initial_timestep = inputs.size - 1
|
117
|
+
neuron_stack = network.output_neurons.map{ |n| [n, initial_timestep] }
|
118
|
+
|
119
|
+
while current = neuron_stack.shift
|
120
|
+
neuron, timestep = current
|
121
|
+
next if node_deltas[timestep].key? neuron
|
122
|
+
|
123
|
+
from_here = bptt_connecting_to neuron, network, timestep
|
124
|
+
neuron_stack.push *from_here
|
125
|
+
|
126
|
+
# neuron delta is summation of neuron deltas deltas for the connections
|
127
|
+
# from this neuron
|
128
|
+
node_delta =
|
129
|
+
if neuron.output?
|
130
|
+
output_index = network.output_neurons.index neuron
|
131
|
+
activation_derivative = ACTIVATION_DERIVATIVES[neuron.activation_function]
|
132
|
+
mse_delta targets[output_index], outputs[output_index], activation_derivative
|
133
|
+
else
|
134
|
+
sum_of_deltas =
|
135
|
+
network.connections_from(neuron).reduce 0.to_d do |m, c|
|
136
|
+
out_timestep = c.output_neuron.context? ? timestep + 1 : timestep
|
137
|
+
output_node_delta = node_deltas[out_timestep][c.output_neuron.id]
|
138
|
+
|
139
|
+
# connection delta is the output neuron delta multiplied by the
|
140
|
+
# connection's weight
|
141
|
+
connection_delta =
|
142
|
+
if c.output_neuron.is_a? ProductNeuron
|
143
|
+
intermediate = states[out_timestep][:intermediates][c.output_neuron.id]
|
144
|
+
output_node_delta.mult intermediate.div(states[timestep][:values][c.input_neuron.id], 10), 10
|
145
|
+
else
|
146
|
+
output_node_delta.mult c.weight, 10
|
147
|
+
end
|
148
|
+
|
149
|
+
m + connection_delta
|
150
|
+
end
|
137
151
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
.mult(sum_of_deltas, 10)
|
142
|
-
end
|
152
|
+
ACTIVATION_DERIVATIVES[neuron.activation_function]
|
153
|
+
.call(states[timestep][:values][neuron.id])
|
154
|
+
.mult(sum_of_deltas, 10)
|
143
155
|
end
|
144
|
-
end
|
145
|
-
end
|
146
156
|
|
147
|
-
|
157
|
+
node_deltas[timestep][neuron.id] = node_delta
|
148
158
|
|
149
|
-
|
150
|
-
|
151
|
-
next if con.output_neuron.context?
|
159
|
+
network.connections_to(neuron).each do |c|
|
160
|
+
in_timestep = neuron.context? ? timestep - 1 : timestep
|
152
161
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
162
|
+
# connection gradient is the output neuron delta multipled by the
|
163
|
+
# connection's input neuron value.
|
164
|
+
gradient =
|
165
|
+
if c.output_neuron.is_a? ProductNeuron
|
166
|
+
intermediate = states[timestep][:intermediates][c.output_neuron.id]
|
167
|
+
node_delta.mult intermediate.div(c.weight, 10), 10
|
168
|
+
elsif c.input_neuron.context? && timestep == 0
|
169
|
+
0.to_d
|
170
|
+
else
|
171
|
+
node_delta.mult states[in_timestep][:values][c.input_neuron.id], 10
|
172
|
+
end
|
161
173
|
|
162
|
-
|
163
|
-
end
|
174
|
+
gradients[c.id] += gradient
|
164
175
|
end
|
165
176
|
end
|
166
177
|
|
@@ -200,22 +211,16 @@ module RANN
|
|
200
211
|
step_one.mult step_two, 10
|
201
212
|
end
|
202
213
|
|
203
|
-
def self.
|
204
|
-
# halt traversal if
|
205
|
-
return if
|
206
|
-
|
207
|
-
bptt_connecting_to(from, network, timestep, deltas).each do |n, c|
|
208
|
-
yield n, c
|
214
|
+
def self.bptt_connecting_to neuron, network, timestep
|
215
|
+
# halt traversal if we're at a context and we're at the base timestep
|
216
|
+
return [] if neuron.context? && timestep == 0
|
209
217
|
|
210
|
-
traverse from: n, network: network, timestep: timestep, deltas: deltas, &block
|
211
|
-
end
|
212
|
-
end
|
213
|
-
|
214
|
-
def self.bptt_connecting_to neuron, network, timestep, deltas
|
215
218
|
network.connections_to(neuron).each.with_object [] do |c, a|
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
+
# don't enqueue connections from inputs
|
220
|
+
next if c.input_neuron.input?
|
221
|
+
|
222
|
+
timestep -= timestep if neuron.context?
|
223
|
+
a << [c.input_neuron, timestep]
|
219
224
|
end
|
220
225
|
end
|
221
226
|
end
|
@@ -24,7 +24,9 @@ module RANN
|
|
24
24
|
gradapprox[i] = (error_thetaplus - error_thetaminus).div(EPSILON.mult(2, 10), 10)
|
25
25
|
end
|
26
26
|
|
27
|
-
gradapprox.each.with_index.
|
27
|
+
gradapprox.each.with_index.with_object [] do |(ga, i), res|
|
28
|
+
res << i unless in_epsilon? ga, dvec[i]
|
29
|
+
end
|
28
30
|
end
|
29
31
|
|
30
32
|
def self.error outputs, targets
|
data/lib/rann/lstm.rb
CHANGED
@@ -8,44 +8,46 @@ module RANN
|
|
8
8
|
class LSTM
|
9
9
|
attr_reader :network, :inputs, :outputs, :name
|
10
10
|
|
11
|
-
def initialize name
|
11
|
+
def initialize name, size
|
12
12
|
@name = name
|
13
13
|
@network = RANN::Network.new
|
14
14
|
@inputs = []
|
15
15
|
@outputs = []
|
16
|
+
@size = size
|
17
|
+
init
|
16
18
|
end
|
17
19
|
|
18
20
|
def init
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
input_bias = RANN::Neuron.new("LSTM #{name} Input Bias", 0, :bias).tap{ |n| @network.add n }
|
22
|
+
@size.times do |j|
|
23
|
+
input = RANN::Neuron.new("LSTM #{name} Input #{j}", 0, :standard).tap{ |n| @network.add n }
|
24
|
+
@inputs << input
|
25
|
+
|
26
|
+
f = RANN::Neuron.new("LSTM #{name} F #{j}", 3, :standard, :sig).tap{ |n| @network.add n }
|
27
|
+
i = RANN::Neuron.new("LSTM #{name} I #{j}", 4, :standard, :sig).tap{ |n| @network.add n }
|
28
|
+
g = RANN::Neuron.new("LSTM #{name} G #{j}", 3, :standard, :tanh).tap{ |n| @network.add n }
|
29
|
+
o = RANN::Neuron.new("LSTM #{name} O #{j}", 3, :standard, :sig).tap{ |n| @network.add n }
|
30
|
+
bias_f = RANN::Neuron.new("LSTM #{name} Bias F #{j}", 0, :bias).tap do |n|
|
25
31
|
@network.add n
|
26
|
-
n.value = 1.to_d
|
27
32
|
end
|
28
|
-
bias_i = RANN::Neuron.new("LSTM #{name} Bias I #{
|
33
|
+
bias_i = RANN::Neuron.new("LSTM #{name} Bias I #{j}", 0, :bias).tap do |n|
|
29
34
|
@network.add n
|
30
|
-
n.value = 1.to_d
|
31
35
|
end
|
32
|
-
bias_g = RANN::Neuron.new("LSTM #{name} Bias G #{
|
36
|
+
bias_g = RANN::Neuron.new("LSTM #{name} Bias G #{j}", 0, :bias).tap do |n|
|
33
37
|
@network.add n
|
34
|
-
n.value = 1.to_d
|
35
38
|
end
|
36
|
-
bias_o = RANN::Neuron.new("LSTM #{name} Bias O #{
|
39
|
+
bias_o = RANN::Neuron.new("LSTM #{name} Bias O #{j}", 0, :bias).tap do |n|
|
37
40
|
@network.add n
|
38
|
-
n.value = 1.to_d
|
39
41
|
end
|
40
|
-
memory_product = RANN::ProductNeuron.new("LSTM #{name} Mem Product #{
|
41
|
-
i_g_product = RANN::ProductNeuron.new("LSTM #{name} Hidden 2/3 Product #{
|
42
|
-
memory_standard = RANN::Neuron.new("LSTM #{name} Mem Standard #{
|
43
|
-
memory_tanh = RANN::Neuron.new("LSTM #{name} Mem Tanh #{
|
44
|
-
memory_o_product = RANN::ProductNeuron.new("LSTM #{name} Mem/Hidden 4 Product #{
|
45
|
-
output = RANN::Neuron.new("LSTM #{name} Output #{
|
42
|
+
memory_product = RANN::ProductNeuron.new("LSTM #{name} Mem Product #{j}", 2, :standard, :linear).tap{ |n| @network.add n }
|
43
|
+
i_g_product = RANN::ProductNeuron.new("LSTM #{name} Hidden 2/3 Product #{j}", 2, :standard, :linear).tap{ |n| @network.add n }
|
44
|
+
memory_standard = RANN::Neuron.new("LSTM #{name} Mem Standard #{j}", 2, :standard, :linear).tap{ |n| @network.add n }
|
45
|
+
memory_tanh = RANN::Neuron.new("LSTM #{name} Mem Tanh #{j}", 1, :standard, :tanh).tap{ |n| @network.add n }
|
46
|
+
memory_o_product = RANN::ProductNeuron.new("LSTM #{name} Mem/Hidden 4 Product #{j}", 2, :standard, :linear).tap{ |n| @network.add n }
|
47
|
+
output = RANN::Neuron.new("LSTM #{name} Output #{j}", 1, :standard, :linear).tap{ |n| @network.add n }
|
46
48
|
@outputs << output
|
47
|
-
memory_context = RANN::Neuron.new("LSTM #{name} Mem Context #{
|
48
|
-
output_context = RANN::Neuron.new("LSTM #{name} Output Context #{
|
49
|
+
memory_context = RANN::Neuron.new("LSTM #{name} Mem Context #{j}", 1, :context).tap{ |n| @network.add n }
|
50
|
+
output_context = RANN::Neuron.new("LSTM #{name} Output Context #{j}", 1, :context).tap{ |n| @network.add n }
|
49
51
|
|
50
52
|
@network.add RANN::LockedConnection.new input, f, 1
|
51
53
|
@network.add RANN::LockedConnection.new input, i, 1
|
@@ -72,15 +74,14 @@ module RANN
|
|
72
74
|
@network.add RANN::Connection.new bias_i, i
|
73
75
|
@network.add RANN::Connection.new bias_g, g
|
74
76
|
@network.add RANN::Connection.new bias_o, o
|
77
|
+
@network.add RANN::Connection.new input_bias, input
|
75
78
|
end
|
76
79
|
end
|
77
80
|
|
78
81
|
def add_input neuron
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
connection = RANN::Connection.new neuron, input
|
83
|
-
@network.add connection
|
82
|
+
@inputs.each do |input|
|
83
|
+
@network.add RANN::Connection.new neuron, input
|
84
|
+
end
|
84
85
|
end
|
85
86
|
end
|
86
87
|
end
|
data/lib/rann/network.rb
CHANGED
@@ -97,7 +97,10 @@ module RANN
|
|
97
97
|
end
|
98
98
|
|
99
99
|
def state
|
100
|
-
|
100
|
+
{
|
101
|
+
values: neurons.each.with_object({}){ |n, s| s[n.id] = n.value },
|
102
|
+
intermediates: neurons.select{ |n| n.is_a? ProductNeuron }.each.with_object({}){ |n, s| s[n.id] = n.intermediate }
|
103
|
+
}
|
101
104
|
end
|
102
105
|
|
103
106
|
def connections_to neuron
|
data/lib/rann/neuron.rb
CHANGED
@@ -5,7 +5,7 @@ require "bigdecimal/util"
|
|
5
5
|
module RANN
|
6
6
|
class Neuron
|
7
7
|
ACTIVATION_FUNCTIONS = {
|
8
|
-
sig: ->(v){ 1.to_d.div(1 +
|
8
|
+
sig: ->(v){ 1.to_d.div(1 + Math::E.to_d.power(-v, 10), 10) },
|
9
9
|
tanh: ->(v){ Math.tanh(v).to_d(10) },
|
10
10
|
relu: ->(v){ [0.to_d, v].max },
|
11
11
|
linear: ->(v){ v },
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require "bigdecimal"
|
2
|
+
require "bigdecimal/util"
|
3
|
+
|
4
|
+
# refactor to matrix stuff blah blah
|
5
|
+
module RANN
|
6
|
+
module Optimisers
|
7
|
+
class AdaGrad
|
8
|
+
def initialize opts = {}, restore = {}
|
9
|
+
@fudge_factor = opts[:fudge_factor] || 0.00000001.to_d
|
10
|
+
@learning_rate = opts[:learning_rate] || 0.1.to_d
|
11
|
+
@historical_gradient = (restore[:historical_gradient] || {}).tap{ |h| h.default = 0.to_d }
|
12
|
+
end
|
13
|
+
|
14
|
+
def update grad, cid
|
15
|
+
@historical_gradient[cid] = @historical_gradient[cid] + grad.power(2, 10)
|
16
|
+
|
17
|
+
grad.mult(- @learning_rate.div(@fudge_factor + @historical_gradient[cid].sqrt(10), 10), 10)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require "bigdecimal"
|
2
|
+
require "bigdecimal/util"
|
3
|
+
|
4
|
+
# refactor to matrix stuff blah blah
|
5
|
+
module RANN
|
6
|
+
module Optimisers
|
7
|
+
class RMSProp
|
8
|
+
def initialize opts = {}, restore = {}
|
9
|
+
@decay = opts[:decay] || 0.9.to_d
|
10
|
+
@fudge_factor = opts[:fudge_factor] || 0.00000001.to_d
|
11
|
+
@learning_rate = opts[:learning_rate] || 0.01.to_d
|
12
|
+
@historical_gradient = (restore[:historical_gradient] || {}).tap{ |h| h.default = 0.to_d }
|
13
|
+
end
|
14
|
+
|
15
|
+
def update grad, cid
|
16
|
+
@historical_gradient[cid] = @decay.mult(@historical_gradient[cid], 10) + (1 - @decay).mult(grad.power(2, 10), 10)
|
17
|
+
|
18
|
+
grad.mult(- @learning_rate.div(@fudge_factor + @historical_gradient[cid].sqrt(10), 10), 10)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/rann/product_neuron.rb
CHANGED
@@ -2,9 +2,11 @@ require "rann/neuron"
|
|
2
2
|
|
3
3
|
module RANN
|
4
4
|
class ProductNeuron < Neuron
|
5
|
+
attr_accessor :intermediate
|
6
|
+
|
5
7
|
def set_value!
|
6
|
-
intermediate = incoming.reduce{ |i, m| m.mult
|
7
|
-
self.value
|
8
|
+
@intermediate = incoming.reduce{ |i, m| m.mult(i, 10) }
|
9
|
+
self.value = ACTIVATION_FUNCTIONS[activation_function].call @intermediate
|
8
10
|
end
|
9
11
|
end
|
10
12
|
end
|
data/lib/rann/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rann
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Campbell
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-11-
|
11
|
+
date: 2017-11-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: parallel
|
@@ -95,6 +95,7 @@ files:
|
|
95
95
|
- LICENCE
|
96
96
|
- README.md
|
97
97
|
- Rakefile
|
98
|
+
- bench/xor_benchmark.rb
|
98
99
|
- bin/console
|
99
100
|
- bin/setup
|
100
101
|
- examples/xor.rb
|
@@ -106,6 +107,8 @@ files:
|
|
106
107
|
- lib/rann/lstm.rb
|
107
108
|
- lib/rann/network.rb
|
108
109
|
- lib/rann/neuron.rb
|
110
|
+
- lib/rann/optimisers/adagrad.rb
|
111
|
+
- lib/rann/optimisers/rmsprop.rb
|
109
112
|
- lib/rann/product_neuron.rb
|
110
113
|
- lib/rann/util/array_ext.rb
|
111
114
|
- lib/rann/version.rb
|