rann 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -5
- data/bench/xor_benchmark.rb +55 -0
- data/lib/rann/backprop.rb +106 -101
- data/lib/rann/gradient_checker.rb +3 -1
- data/lib/rann/lstm.rb +28 -27
- data/lib/rann/network.rb +4 -1
- data/lib/rann/neuron.rb +1 -1
- data/lib/rann/optimisers/adagrad.rb +21 -0
- data/lib/rann/optimisers/rmsprop.rb +22 -0
- data/lib/rann/product_neuron.rb +4 -2
- data/lib/rann/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 199b82878ecd7e186aca7df6529af6b725b049a5
|
4
|
+
data.tar.gz: 5a61a9fb308c3c54055a61e516aa370f152170e8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d02e3a788f9be66b2912edc21b0b807a6ba16ea867ac6ecb03454988e35e11971f8e39d1938324b834a5241df1c85775240b7665fa566b109ed2578d5fb23e3
|
7
|
+
data.tar.gz: 07b770013594c1b6e9a42ecdd812f2b2e12b3d902d2608d23ad24b73d44e270fb5bfb84636018405d5af7ea94580f89d5d2a09dc6f00917c987c336116a08442
|
data/README.md
CHANGED
@@ -34,17 +34,16 @@ ruby examples/xor.rb
|
|
34
34
|
|
35
35
|
## TODO
|
36
36
|
|
37
|
-
So much.
|
37
|
+
So much.
|
38
38
|
|
39
|
+
- Matrix calculations (WIP)
|
39
40
|
- Convenience methods for setting up standard network topologies, crucially,
|
40
|
-
layers
|
41
|
-
- Batch normalization/drop out/early stopping
|
41
|
+
layers (WIP)
|
42
|
+
- Batch normalization/drop out/early stopping (WIP, dep. on matrix)
|
42
43
|
- Hyperparameter optimisation
|
43
44
|
- Other adaptive learning rate algorithms (Adadelta, Adam, etc?)
|
44
45
|
- Explore matrix operations and other ways to optimise performance of algorithms
|
45
46
|
- RPROP?
|
46
|
-
- Use enumerable-statistics gem?
|
47
|
-
- Speed up by adding a reduce step to the parallel gem?
|
48
47
|
- More examples
|
49
48
|
- Tests
|
50
49
|
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require "bundler/setup"
|
2
|
+
require "rann"
|
3
|
+
|
4
|
+
xor_inputs = [[0,0],[0,1],[1,0],[1,1]]
|
5
|
+
xor_targets = [[0],[1],[1],[0]]
|
6
|
+
|
7
|
+
time = Time.now.to_i
|
8
|
+
results =
|
9
|
+
Array.new(100) do |j|
|
10
|
+
# inputs
|
11
|
+
inputs = Array.new(2){ |i| RANN::Neuron.new "input #{i}", 0, :input }
|
12
|
+
|
13
|
+
# hidden layer
|
14
|
+
hiddens = Array.new(3){ |i| RANN::Neuron.new "hidden #{i}", 3 }
|
15
|
+
bias = RANN::Neuron.new "bias", 0, :bias
|
16
|
+
|
17
|
+
# output layer
|
18
|
+
output = RANN::Neuron.new "output", 3, :output, :sig
|
19
|
+
|
20
|
+
# connect it all
|
21
|
+
connections = []
|
22
|
+
hiddens.each do |h|
|
23
|
+
inputs.each do |i|
|
24
|
+
connections.push RANN::Connection.new i, h
|
25
|
+
end
|
26
|
+
connections.push RANN::Connection.new bias, h
|
27
|
+
connections.push RANN::Connection.new h, output
|
28
|
+
end
|
29
|
+
|
30
|
+
network = RANN::Network.new connections
|
31
|
+
backprop = RANN::Backprop.new network
|
32
|
+
|
33
|
+
i = 0
|
34
|
+
loop do
|
35
|
+
i += 1
|
36
|
+
sample_index = (rand * xor_inputs.size).to_i
|
37
|
+
|
38
|
+
avg_error =
|
39
|
+
backprop.run_batch(
|
40
|
+
[xor_inputs[sample_index].map(&:to_d)],
|
41
|
+
[xor_targets[sample_index].map(&:to_d)],
|
42
|
+
processes: 0,
|
43
|
+
checking: false
|
44
|
+
)
|
45
|
+
|
46
|
+
break if avg_error < 0.0001
|
47
|
+
end
|
48
|
+
|
49
|
+
puts j
|
50
|
+
i
|
51
|
+
end
|
52
|
+
|
53
|
+
taken = Time.now.to_i - time
|
54
|
+
puts results.reduce(:+).fdiv(results.size).round(2)
|
55
|
+
puts "in #{taken}s"
|
data/lib/rann/backprop.rb
CHANGED
@@ -1,6 +1,10 @@
|
|
1
|
+
require "bigdecimal"
|
2
|
+
require "bigdecimal/util"
|
1
3
|
require "parallel"
|
2
4
|
require "rann/gradient_checker"
|
3
5
|
require "rann/util/array_ext"
|
6
|
+
require "rann/optimisers/adagrad"
|
7
|
+
require "rann/optimisers/rmsprop"
|
4
8
|
|
5
9
|
module RANN
|
6
10
|
class Backprop
|
@@ -14,27 +18,16 @@ module RANN
|
|
14
18
|
step: ->(_){ 0.to_d },
|
15
19
|
}
|
16
20
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
attr_accessor :network, :lr, :velocities
|
25
|
-
|
26
|
-
def initialize network, restore = {}
|
27
|
-
@network = network
|
28
|
-
@connections_hash = network.connections.each.with_object({}){ |c, h| h[c.id] = c }
|
29
|
-
@lr = LEARNING_RATE
|
30
|
-
@friction = FRICTION
|
31
|
-
@velocities = Hash.new(BigDecimal.new('0'))
|
32
|
-
@historical_gradient = (restore[:historical_gradient] || {}).tap{ |h| h.default = 0.to_d }
|
33
|
-
@historical_update = Hash.new(MASTER_STEP_SIZE)
|
34
|
-
@batch_count = BigDecimal.new('0')
|
21
|
+
attr_accessor :network
|
22
|
+
|
23
|
+
def initialize network, opts = {}, restore = {}
|
24
|
+
@network = network
|
25
|
+
@connections_hash = network.connections.each.with_object({}){ |c, h| h[c.id] = c }
|
26
|
+
@optimiser = RANN::Optimisers.const_get(opts[:optimiser] || 'RMSProp').new opts, restore
|
27
|
+
@batch_count = 0.to_d
|
35
28
|
end
|
36
29
|
|
37
|
-
def run_batch
|
30
|
+
def run_batch inputs, targets, opts = {}
|
38
31
|
@batch_count += 1
|
39
32
|
|
40
33
|
batch_size = inputs.size
|
@@ -43,37 +36,47 @@ module RANN
|
|
43
36
|
|
44
37
|
# force longer bits of work per iteration, to maximise CPU usage
|
45
38
|
# less marshalling data etc, more work.
|
46
|
-
grouped_inputs = in_groups
|
47
|
-
|
48
|
-
|
49
|
-
group_avg_gradients
|
50
|
-
group_avg_error = 0.to_d
|
39
|
+
grouped_inputs = in_groups(inputs, [1, opts[:processes]].max * 10, false).reject &:empty?
|
40
|
+
reduce_proc =
|
41
|
+
lambda do |_, _, result|
|
42
|
+
group_avg_gradients, group_avg_error = result
|
51
43
|
|
52
|
-
|
53
|
-
|
44
|
+
avg_gradients.merge!(group_avg_gradients){ |_, o, n| o + n }
|
45
|
+
avg_batch_error += group_avg_error
|
46
|
+
end
|
54
47
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
48
|
+
Parallel.each_with_index(
|
49
|
+
grouped_inputs,
|
50
|
+
in_processes: opts[:processes],
|
51
|
+
finish: reduce_proc
|
52
|
+
) do |inputs, i|
|
53
|
+
group_avg_gradients = Hash.new{ |h, k| h[k] = 0.to_d }
|
54
|
+
group_avg_error = 0.to_d
|
60
55
|
|
61
|
-
|
62
|
-
|
56
|
+
inputs.each_with_index do |input, j|
|
57
|
+
gradients, error = Backprop.run_single network, input, targets[i + j]
|
58
|
+
|
59
|
+
gradients.each do |cid, g|
|
60
|
+
group_avg_gradients[cid] += g.div batch_size, 10
|
61
|
+
end
|
62
|
+
group_avg_error += error.div batch_size, 10
|
63
63
|
end
|
64
64
|
|
65
|
-
|
66
|
-
|
67
|
-
avg_batch_error += group_avg_error
|
65
|
+
group_avg_gradients.default_proc = nil
|
66
|
+
[group_avg_gradients, group_avg_error]
|
68
67
|
end
|
69
68
|
|
70
69
|
if opts[:checking]
|
71
70
|
# check assumes batchsize 1 for now
|
72
71
|
sorted_gradients = avg_gradients.values_at *network.connections.map(&:id)
|
73
|
-
|
72
|
+
invalid = GradientChecker.check network, inputs.first, targets.first, sorted_gradients
|
73
|
+
if invalid.empty?
|
74
74
|
puts "gradient valid"
|
75
75
|
else
|
76
|
-
puts "
|
76
|
+
puts "gradients INVALID for connections:"
|
77
|
+
invalid.each do |i|
|
78
|
+
puts "#{network.connections[i].input_neuron.name} -> #{network.connections[i].output_neuron.name}"
|
79
|
+
end
|
77
80
|
end
|
78
81
|
end
|
79
82
|
|
@@ -81,7 +84,7 @@ module RANN
|
|
81
84
|
con = @connections_hash[con_id]
|
82
85
|
next if con.locked?
|
83
86
|
|
84
|
-
update =
|
87
|
+
update = @optimiser.update gradient, con.id
|
85
88
|
|
86
89
|
con.weight += update
|
87
90
|
end
|
@@ -107,60 +110,68 @@ module RANN
|
|
107
110
|
error = mse targets, outputs
|
108
111
|
|
109
112
|
# backward pass with unravelling for recurrent networks
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
113
|
+
node_deltas = Hash.new{ |h, k| h[k] = Hash.new(0.to_d) }
|
114
|
+
gradients = Hash.new(0)
|
115
|
+
|
116
|
+
initial_timestep = inputs.size - 1
|
117
|
+
neuron_stack = network.output_neurons.map{ |n| [n, initial_timestep] }
|
118
|
+
|
119
|
+
while current = neuron_stack.shift
|
120
|
+
neuron, timestep = current
|
121
|
+
next if node_deltas[timestep].key? neuron
|
122
|
+
|
123
|
+
from_here = bptt_connecting_to neuron, network, timestep
|
124
|
+
neuron_stack.push *from_here
|
125
|
+
|
126
|
+
# neuron delta is summation of neuron deltas deltas for the connections
|
127
|
+
# from this neuron
|
128
|
+
node_delta =
|
129
|
+
if neuron.output?
|
130
|
+
output_index = network.output_neurons.index neuron
|
131
|
+
activation_derivative = ACTIVATION_DERIVATIVES[neuron.activation_function]
|
132
|
+
mse_delta targets[output_index], outputs[output_index], activation_derivative
|
133
|
+
else
|
134
|
+
sum_of_deltas =
|
135
|
+
network.connections_from(neuron).reduce 0.to_d do |m, c|
|
136
|
+
out_timestep = c.output_neuron.context? ? timestep + 1 : timestep
|
137
|
+
output_node_delta = node_deltas[out_timestep][c.output_neuron.id]
|
138
|
+
|
139
|
+
# connection delta is the output neuron delta multiplied by the
|
140
|
+
# connection's weight
|
141
|
+
connection_delta =
|
142
|
+
if c.output_neuron.is_a? ProductNeuron
|
143
|
+
intermediate = states[out_timestep][:intermediates][c.output_neuron.id]
|
144
|
+
output_node_delta.mult intermediate.div(states[timestep][:values][c.input_neuron.id], 10), 10
|
145
|
+
else
|
146
|
+
output_node_delta.mult c.weight, 10
|
147
|
+
end
|
148
|
+
|
149
|
+
m + connection_delta
|
150
|
+
end
|
137
151
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
.mult(sum_of_deltas, 10)
|
142
|
-
end
|
152
|
+
ACTIVATION_DERIVATIVES[neuron.activation_function]
|
153
|
+
.call(states[timestep][:values][neuron.id])
|
154
|
+
.mult(sum_of_deltas, 10)
|
143
155
|
end
|
144
|
-
end
|
145
|
-
end
|
146
156
|
|
147
|
-
|
157
|
+
node_deltas[timestep][neuron.id] = node_delta
|
148
158
|
|
149
|
-
|
150
|
-
|
151
|
-
next if con.output_neuron.context?
|
159
|
+
network.connections_to(neuron).each do |c|
|
160
|
+
in_timestep = neuron.context? ? timestep - 1 : timestep
|
152
161
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
162
|
+
# connection gradient is the output neuron delta multipled by the
|
163
|
+
# connection's input neuron value.
|
164
|
+
gradient =
|
165
|
+
if c.output_neuron.is_a? ProductNeuron
|
166
|
+
intermediate = states[timestep][:intermediates][c.output_neuron.id]
|
167
|
+
node_delta.mult intermediate.div(c.weight, 10), 10
|
168
|
+
elsif c.input_neuron.context? && timestep == 0
|
169
|
+
0.to_d
|
170
|
+
else
|
171
|
+
node_delta.mult states[in_timestep][:values][c.input_neuron.id], 10
|
172
|
+
end
|
161
173
|
|
162
|
-
|
163
|
-
end
|
174
|
+
gradients[c.id] += gradient
|
164
175
|
end
|
165
176
|
end
|
166
177
|
|
@@ -200,22 +211,16 @@ module RANN
|
|
200
211
|
step_one.mult step_two, 10
|
201
212
|
end
|
202
213
|
|
203
|
-
def self.
|
204
|
-
# halt traversal if
|
205
|
-
return if
|
206
|
-
|
207
|
-
bptt_connecting_to(from, network, timestep, deltas).each do |n, c|
|
208
|
-
yield n, c
|
214
|
+
def self.bptt_connecting_to neuron, network, timestep
|
215
|
+
# halt traversal if we're at a context and we're at the base timestep
|
216
|
+
return [] if neuron.context? && timestep == 0
|
209
217
|
|
210
|
-
traverse from: n, network: network, timestep: timestep, deltas: deltas, &block
|
211
|
-
end
|
212
|
-
end
|
213
|
-
|
214
|
-
def self.bptt_connecting_to neuron, network, timestep, deltas
|
215
218
|
network.connections_to(neuron).each.with_object [] do |c, a|
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
+
# don't enqueue connections from inputs
|
220
|
+
next if c.input_neuron.input?
|
221
|
+
|
222
|
+
timestep -= timestep if neuron.context?
|
223
|
+
a << [c.input_neuron, timestep]
|
219
224
|
end
|
220
225
|
end
|
221
226
|
end
|
@@ -24,7 +24,9 @@ module RANN
|
|
24
24
|
gradapprox[i] = (error_thetaplus - error_thetaminus).div(EPSILON.mult(2, 10), 10)
|
25
25
|
end
|
26
26
|
|
27
|
-
gradapprox.each.with_index.
|
27
|
+
gradapprox.each.with_index.with_object [] do |(ga, i), res|
|
28
|
+
res << i unless in_epsilon? ga, dvec[i]
|
29
|
+
end
|
28
30
|
end
|
29
31
|
|
30
32
|
def self.error outputs, targets
|
data/lib/rann/lstm.rb
CHANGED
@@ -8,44 +8,46 @@ module RANN
|
|
8
8
|
class LSTM
|
9
9
|
attr_reader :network, :inputs, :outputs, :name
|
10
10
|
|
11
|
-
def initialize name
|
11
|
+
def initialize name, size
|
12
12
|
@name = name
|
13
13
|
@network = RANN::Network.new
|
14
14
|
@inputs = []
|
15
15
|
@outputs = []
|
16
|
+
@size = size
|
17
|
+
init
|
16
18
|
end
|
17
19
|
|
18
20
|
def init
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
input_bias = RANN::Neuron.new("LSTM #{name} Input Bias", 0, :bias).tap{ |n| @network.add n }
|
22
|
+
@size.times do |j|
|
23
|
+
input = RANN::Neuron.new("LSTM #{name} Input #{j}", 0, :standard).tap{ |n| @network.add n }
|
24
|
+
@inputs << input
|
25
|
+
|
26
|
+
f = RANN::Neuron.new("LSTM #{name} F #{j}", 3, :standard, :sig).tap{ |n| @network.add n }
|
27
|
+
i = RANN::Neuron.new("LSTM #{name} I #{j}", 4, :standard, :sig).tap{ |n| @network.add n }
|
28
|
+
g = RANN::Neuron.new("LSTM #{name} G #{j}", 3, :standard, :tanh).tap{ |n| @network.add n }
|
29
|
+
o = RANN::Neuron.new("LSTM #{name} O #{j}", 3, :standard, :sig).tap{ |n| @network.add n }
|
30
|
+
bias_f = RANN::Neuron.new("LSTM #{name} Bias F #{j}", 0, :bias).tap do |n|
|
25
31
|
@network.add n
|
26
|
-
n.value = 1.to_d
|
27
32
|
end
|
28
|
-
bias_i = RANN::Neuron.new("LSTM #{name} Bias I #{
|
33
|
+
bias_i = RANN::Neuron.new("LSTM #{name} Bias I #{j}", 0, :bias).tap do |n|
|
29
34
|
@network.add n
|
30
|
-
n.value = 1.to_d
|
31
35
|
end
|
32
|
-
bias_g = RANN::Neuron.new("LSTM #{name} Bias G #{
|
36
|
+
bias_g = RANN::Neuron.new("LSTM #{name} Bias G #{j}", 0, :bias).tap do |n|
|
33
37
|
@network.add n
|
34
|
-
n.value = 1.to_d
|
35
38
|
end
|
36
|
-
bias_o = RANN::Neuron.new("LSTM #{name} Bias O #{
|
39
|
+
bias_o = RANN::Neuron.new("LSTM #{name} Bias O #{j}", 0, :bias).tap do |n|
|
37
40
|
@network.add n
|
38
|
-
n.value = 1.to_d
|
39
41
|
end
|
40
|
-
memory_product = RANN::ProductNeuron.new("LSTM #{name} Mem Product #{
|
41
|
-
i_g_product = RANN::ProductNeuron.new("LSTM #{name} Hidden 2/3 Product #{
|
42
|
-
memory_standard = RANN::Neuron.new("LSTM #{name} Mem Standard #{
|
43
|
-
memory_tanh = RANN::Neuron.new("LSTM #{name} Mem Tanh #{
|
44
|
-
memory_o_product = RANN::ProductNeuron.new("LSTM #{name} Mem/Hidden 4 Product #{
|
45
|
-
output = RANN::Neuron.new("LSTM #{name} Output #{
|
42
|
+
memory_product = RANN::ProductNeuron.new("LSTM #{name} Mem Product #{j}", 2, :standard, :linear).tap{ |n| @network.add n }
|
43
|
+
i_g_product = RANN::ProductNeuron.new("LSTM #{name} Hidden 2/3 Product #{j}", 2, :standard, :linear).tap{ |n| @network.add n }
|
44
|
+
memory_standard = RANN::Neuron.new("LSTM #{name} Mem Standard #{j}", 2, :standard, :linear).tap{ |n| @network.add n }
|
45
|
+
memory_tanh = RANN::Neuron.new("LSTM #{name} Mem Tanh #{j}", 1, :standard, :tanh).tap{ |n| @network.add n }
|
46
|
+
memory_o_product = RANN::ProductNeuron.new("LSTM #{name} Mem/Hidden 4 Product #{j}", 2, :standard, :linear).tap{ |n| @network.add n }
|
47
|
+
output = RANN::Neuron.new("LSTM #{name} Output #{j}", 1, :standard, :linear).tap{ |n| @network.add n }
|
46
48
|
@outputs << output
|
47
|
-
memory_context = RANN::Neuron.new("LSTM #{name} Mem Context #{
|
48
|
-
output_context = RANN::Neuron.new("LSTM #{name} Output Context #{
|
49
|
+
memory_context = RANN::Neuron.new("LSTM #{name} Mem Context #{j}", 1, :context).tap{ |n| @network.add n }
|
50
|
+
output_context = RANN::Neuron.new("LSTM #{name} Output Context #{j}", 1, :context).tap{ |n| @network.add n }
|
49
51
|
|
50
52
|
@network.add RANN::LockedConnection.new input, f, 1
|
51
53
|
@network.add RANN::LockedConnection.new input, i, 1
|
@@ -72,15 +74,14 @@ module RANN
|
|
72
74
|
@network.add RANN::Connection.new bias_i, i
|
73
75
|
@network.add RANN::Connection.new bias_g, g
|
74
76
|
@network.add RANN::Connection.new bias_o, o
|
77
|
+
@network.add RANN::Connection.new input_bias, input
|
75
78
|
end
|
76
79
|
end
|
77
80
|
|
78
81
|
def add_input neuron
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
connection = RANN::Connection.new neuron, input
|
83
|
-
@network.add connection
|
82
|
+
@inputs.each do |input|
|
83
|
+
@network.add RANN::Connection.new neuron, input
|
84
|
+
end
|
84
85
|
end
|
85
86
|
end
|
86
87
|
end
|
data/lib/rann/network.rb
CHANGED
@@ -97,7 +97,10 @@ module RANN
|
|
97
97
|
end
|
98
98
|
|
99
99
|
def state
|
100
|
-
|
100
|
+
{
|
101
|
+
values: neurons.each.with_object({}){ |n, s| s[n.id] = n.value },
|
102
|
+
intermediates: neurons.select{ |n| n.is_a? ProductNeuron }.each.with_object({}){ |n, s| s[n.id] = n.intermediate }
|
103
|
+
}
|
101
104
|
end
|
102
105
|
|
103
106
|
def connections_to neuron
|
data/lib/rann/neuron.rb
CHANGED
@@ -5,7 +5,7 @@ require "bigdecimal/util"
|
|
5
5
|
module RANN
|
6
6
|
class Neuron
|
7
7
|
ACTIVATION_FUNCTIONS = {
|
8
|
-
sig: ->(v){ 1.to_d.div(1 +
|
8
|
+
sig: ->(v){ 1.to_d.div(1 + Math::E.to_d.power(-v, 10), 10) },
|
9
9
|
tanh: ->(v){ Math.tanh(v).to_d(10) },
|
10
10
|
relu: ->(v){ [0.to_d, v].max },
|
11
11
|
linear: ->(v){ v },
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require "bigdecimal"
|
2
|
+
require "bigdecimal/util"
|
3
|
+
|
4
|
+
# refactor to matrix stuff blah blah
|
5
|
+
module RANN
|
6
|
+
module Optimisers
|
7
|
+
class AdaGrad
|
8
|
+
def initialize opts = {}, restore = {}
|
9
|
+
@fudge_factor = opts[:fudge_factor] || 0.00000001.to_d
|
10
|
+
@learning_rate = opts[:learning_rate] || 0.1.to_d
|
11
|
+
@historical_gradient = (restore[:historical_gradient] || {}).tap{ |h| h.default = 0.to_d }
|
12
|
+
end
|
13
|
+
|
14
|
+
def update grad, cid
|
15
|
+
@historical_gradient[cid] = @historical_gradient[cid] + grad.power(2, 10)
|
16
|
+
|
17
|
+
grad.mult(- @learning_rate.div(@fudge_factor + @historical_gradient[cid].sqrt(10), 10), 10)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require "bigdecimal"
|
2
|
+
require "bigdecimal/util"
|
3
|
+
|
4
|
+
# refactor to matrix stuff blah blah
|
5
|
+
module RANN
|
6
|
+
module Optimisers
|
7
|
+
class RMSProp
|
8
|
+
def initialize opts = {}, restore = {}
|
9
|
+
@decay = opts[:decay] || 0.9.to_d
|
10
|
+
@fudge_factor = opts[:fudge_factor] || 0.00000001.to_d
|
11
|
+
@learning_rate = opts[:learning_rate] || 0.01.to_d
|
12
|
+
@historical_gradient = (restore[:historical_gradient] || {}).tap{ |h| h.default = 0.to_d }
|
13
|
+
end
|
14
|
+
|
15
|
+
def update grad, cid
|
16
|
+
@historical_gradient[cid] = @decay.mult(@historical_gradient[cid], 10) + (1 - @decay).mult(grad.power(2, 10), 10)
|
17
|
+
|
18
|
+
grad.mult(- @learning_rate.div(@fudge_factor + @historical_gradient[cid].sqrt(10), 10), 10)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/rann/product_neuron.rb
CHANGED
@@ -2,9 +2,11 @@ require "rann/neuron"
|
|
2
2
|
|
3
3
|
module RANN
|
4
4
|
class ProductNeuron < Neuron
|
5
|
+
attr_accessor :intermediate
|
6
|
+
|
5
7
|
def set_value!
|
6
|
-
intermediate = incoming.reduce{ |i, m| m.mult
|
7
|
-
self.value
|
8
|
+
@intermediate = incoming.reduce{ |i, m| m.mult(i, 10) }
|
9
|
+
self.value = ACTIVATION_FUNCTIONS[activation_function].call @intermediate
|
8
10
|
end
|
9
11
|
end
|
10
12
|
end
|
data/lib/rann/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rann
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Campbell
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-11-
|
11
|
+
date: 2017-11-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: parallel
|
@@ -95,6 +95,7 @@ files:
|
|
95
95
|
- LICENCE
|
96
96
|
- README.md
|
97
97
|
- Rakefile
|
98
|
+
- bench/xor_benchmark.rb
|
98
99
|
- bin/console
|
99
100
|
- bin/setup
|
100
101
|
- examples/xor.rb
|
@@ -106,6 +107,8 @@ files:
|
|
106
107
|
- lib/rann/lstm.rb
|
107
108
|
- lib/rann/network.rb
|
108
109
|
- lib/rann/neuron.rb
|
110
|
+
- lib/rann/optimisers/adagrad.rb
|
111
|
+
- lib/rann/optimisers/rmsprop.rb
|
109
112
|
- lib/rann/product_neuron.rb
|
110
113
|
- lib/rann/util/array_ext.rb
|
111
114
|
- lib/rann/version.rb
|