rann 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b474ce98e49bb5067d2fc8ddba98a9c6f887c618
4
- data.tar.gz: feb50a3a4804494c0a4c2551e7a2b3807b89dd69
3
+ metadata.gz: 199b82878ecd7e186aca7df6529af6b725b049a5
4
+ data.tar.gz: 5a61a9fb308c3c54055a61e516aa370f152170e8
5
5
  SHA512:
6
- metadata.gz: fc4f352df3b64af7d6b4c513d0aefd343ec35b4b6b599e7154cdbb7eefd06365023c2192c1736e3d7298a6895b388db3047387cfb4e52e7136f97eb09a320e5d
7
- data.tar.gz: eec1f19c738594b4b29c5de6da0268d249c4eaaddec7f243919f96ee73331025e571a52d215e894255c79f2451b55de5eb43caaa53a91d898048548190ee355e
6
+ metadata.gz: 8d02e3a788f9be66b2912edc21b0b807a6ba16ea867ac6ecb03454988e35e11971f8e39d1938324b834a5241df1c85775240b7665fa566b109ed2578d5fb23e3
7
+ data.tar.gz: 07b770013594c1b6e9a42ecdd812f2b2e12b3d902d2608d23ad24b73d44e270fb5bfb84636018405d5af7ea94580f89d5d2a09dc6f00917c987c336116a08442
data/README.md CHANGED
@@ -34,17 +34,16 @@ ruby examples/xor.rb
34
34
 
35
35
  ## TODO
36
36
 
37
- So much. So much.
37
+ So much.
38
38
 
39
+ - Matrix calculations (WIP)
39
40
  - Convenience methods for setting up standard network topologies, crucially,
40
- layers
41
- - Batch normalization/drop out/early stopping
41
+ layers (WIP)
42
+ - Batch normalization/drop out/early stopping (WIP, dep. on matrix)
42
43
  - Hyperparameter optimisation
43
44
  - Other adaptive learning rate algorithms (Adadelta, Adam, etc?)
44
45
  - Explore matrix operations and other ways to optimise performance of algorithms
45
46
  - RPROP?
46
- - Use enumerable-statistics gem?
47
- - Speed up by adding a reduce step to the parallel gem?
48
47
  - More examples
49
48
  - Tests
50
49
 
@@ -0,0 +1,55 @@
1
+ require "bundler/setup"
2
+ require "rann"
3
+
4
+ xor_inputs = [[0,0],[0,1],[1,0],[1,1]]
5
+ xor_targets = [[0],[1],[1],[0]]
6
+
7
+ time = Time.now.to_i
8
+ results =
9
+ Array.new(100) do |j|
10
+ # inputs
11
+ inputs = Array.new(2){ |i| RANN::Neuron.new "input #{i}", 0, :input }
12
+
13
+ # hidden layer
14
+ hiddens = Array.new(3){ |i| RANN::Neuron.new "hidden #{i}", 3 }
15
+ bias = RANN::Neuron.new "bias", 0, :bias
16
+
17
+ # output layer
18
+ output = RANN::Neuron.new "output", 3, :output, :sig
19
+
20
+ # connect it all
21
+ connections = []
22
+ hiddens.each do |h|
23
+ inputs.each do |i|
24
+ connections.push RANN::Connection.new i, h
25
+ end
26
+ connections.push RANN::Connection.new bias, h
27
+ connections.push RANN::Connection.new h, output
28
+ end
29
+
30
+ network = RANN::Network.new connections
31
+ backprop = RANN::Backprop.new network
32
+
33
+ i = 0
34
+ loop do
35
+ i += 1
36
+ sample_index = (rand * xor_inputs.size).to_i
37
+
38
+ avg_error =
39
+ backprop.run_batch(
40
+ [xor_inputs[sample_index].map(&:to_d)],
41
+ [xor_targets[sample_index].map(&:to_d)],
42
+ processes: 0,
43
+ checking: false
44
+ )
45
+
46
+ break if avg_error < 0.0001
47
+ end
48
+
49
+ puts j
50
+ i
51
+ end
52
+
53
+ taken = Time.now.to_i - time
54
+ puts results.reduce(:+).fdiv(results.size).round(2)
55
+ puts "in #{taken}s"
data/lib/rann/backprop.rb CHANGED
@@ -1,6 +1,10 @@
1
+ require "bigdecimal"
2
+ require "bigdecimal/util"
1
3
  require "parallel"
2
4
  require "rann/gradient_checker"
3
5
  require "rann/util/array_ext"
6
+ require "rann/optimisers/adagrad"
7
+ require "rann/optimisers/rmsprop"
4
8
 
5
9
  module RANN
6
10
  class Backprop
@@ -14,27 +18,16 @@ module RANN
14
18
  step: ->(_){ 0.to_d },
15
19
  }
16
20
 
17
- DECAY = BigDecimal.new('0.9')
18
- MASTER_STEP_SIZE = BigDecimal.new('0.01')
19
- FUDGE_FACTOR = BigDecimal.new('0.00000001')
20
- LEARNING_RATE = BigDecimal.new('0.01')
21
- FRICTION = BigDecimal.new('0.8')
22
- NUM_ITERATIONS_BEFORE_LR_ANNEALING = BigDecimal.new('10')
23
-
24
- attr_accessor :network, :lr, :velocities
25
-
26
- def initialize network, restore = {}
27
- @network = network
28
- @connections_hash = network.connections.each.with_object({}){ |c, h| h[c.id] = c }
29
- @lr = LEARNING_RATE
30
- @friction = FRICTION
31
- @velocities = Hash.new(BigDecimal.new('0'))
32
- @historical_gradient = (restore[:historical_gradient] || {}).tap{ |h| h.default = 0.to_d }
33
- @historical_update = Hash.new(MASTER_STEP_SIZE)
34
- @batch_count = BigDecimal.new('0')
21
+ attr_accessor :network
22
+
23
+ def initialize network, opts = {}, restore = {}
24
+ @network = network
25
+ @connections_hash = network.connections.each.with_object({}){ |c, h| h[c.id] = c }
26
+ @optimiser = RANN::Optimisers.const_get(opts[:optimiser] || 'RMSProp').new opts, restore
27
+ @batch_count = 0.to_d
35
28
  end
36
29
 
37
- def run_batch(inputs, targets, opts = {})
30
+ def run_batch inputs, targets, opts = {}
38
31
  @batch_count += 1
39
32
 
40
33
  batch_size = inputs.size
@@ -43,37 +36,47 @@ module RANN
43
36
 
44
37
  # force longer bits of work per iteration, to maximise CPU usage
45
38
  # less marshalling data etc, more work.
46
- grouped_inputs = in_groups inputs, [1, opts[:processes]].max * 10, false
47
- grouped_results =
48
- Parallel.map_with_index grouped_inputs, in_processes: opts[:processes] do |inputs, i|
49
- group_avg_gradients = Hash.new{ |h, k| h[k] = 0.to_d }
50
- group_avg_error = 0.to_d
39
+ grouped_inputs = in_groups(inputs, [1, opts[:processes]].max * 10, false).reject &:empty?
40
+ reduce_proc =
41
+ lambda do |_, _, result|
42
+ group_avg_gradients, group_avg_error = result
51
43
 
52
- inputs.each do |input|
53
- gradients, error = Backprop.run_single(network, input, targets[i])
44
+ avg_gradients.merge!(group_avg_gradients){ |_, o, n| o + n }
45
+ avg_batch_error += group_avg_error
46
+ end
54
47
 
55
- gradients.each do |cid, g|
56
- group_avg_gradients[cid] += g.div batch_size, 10
57
- end
58
- group_avg_error += error.div batch_size, 10
59
- end
48
+ Parallel.each_with_index(
49
+ grouped_inputs,
50
+ in_processes: opts[:processes],
51
+ finish: reduce_proc
52
+ ) do |inputs, i|
53
+ group_avg_gradients = Hash.new{ |h, k| h[k] = 0.to_d }
54
+ group_avg_error = 0.to_d
60
55
 
61
- group_avg_gradients.default_proc = nil
62
- [group_avg_gradients, group_avg_error]
56
+ inputs.each_with_index do |input, j|
57
+ gradients, error = Backprop.run_single network, input, targets[i + j]
58
+
59
+ gradients.each do |cid, g|
60
+ group_avg_gradients[cid] += g.div batch_size, 10
61
+ end
62
+ group_avg_error += error.div batch_size, 10
63
63
  end
64
64
 
65
- grouped_results.each do |group_avg_gradients, group_avg_error|
66
- avg_gradients.merge!(group_avg_gradients){ |_, o, n| o + n }
67
- avg_batch_error += group_avg_error
65
+ group_avg_gradients.default_proc = nil
66
+ [group_avg_gradients, group_avg_error]
68
67
  end
69
68
 
70
69
  if opts[:checking]
71
70
  # check assumes batchsize 1 for now
72
71
  sorted_gradients = avg_gradients.values_at *network.connections.map(&:id)
73
- if GradientChecker.check network, inputs.first, targets.first, sorted_gradients
72
+ invalid = GradientChecker.check network, inputs.first, targets.first, sorted_gradients
73
+ if invalid.empty?
74
74
  puts "gradient valid"
75
75
  else
76
- puts "gradient INVALID"
76
+ puts "gradients INVALID for connections:"
77
+ invalid.each do |i|
78
+ puts "#{network.connections[i].input_neuron.name} -> #{network.connections[i].output_neuron.name}"
79
+ end
77
80
  end
78
81
  end
79
82
 
@@ -81,7 +84,7 @@ module RANN
81
84
  con = @connections_hash[con_id]
82
85
  next if con.locked?
83
86
 
84
- update = adagrad gradient, con.id
87
+ update = @optimiser.update gradient, con.id
85
88
 
86
89
  con.weight += update
87
90
  end
@@ -107,60 +110,68 @@ module RANN
107
110
  error = mse targets, outputs
108
111
 
109
112
  # backward pass with unravelling for recurrent networks
110
- deltas = Hash.new{ |h, k| h[k] = Hash.new(0.to_d) }
111
-
112
- # outputs first
113
- network.output_neurons.each.with_index do |o, i|
114
- activation_derivative = ACTIVATION_DERIVATIVES[o.activation_function]
115
-
116
- deltas[0][o.id] = mse_delta(targets[i], outputs[i], activation_derivative)
117
- end
118
-
119
- # remove this push mechanism, shouldn't be necessary and uses extra memory.
120
- incoming_deltas = Hash.new{ |h, k| h[k] = Hash.new{ |h, k| h[k] = [] } }
121
- # each timestep backwards through time
122
- (inputs.size - 1).downto 0 do |t|
123
- network.output_neurons.each do |o|
124
- traverse from: o, network: network, timestep: t, deltas: deltas do |other, con|
125
- if other.context?
126
- this_t = t - 1
127
- other = o
128
- else
129
- this_t = t
130
- end
131
-
132
- incoming_deltas[this_t][other.id] <<
133
- deltas[t][o.id].mult(con.weight, 10)
134
-
135
- if incoming_deltas[this_t][other.id].size == network.connections_from(other).size
136
- sum_of_deltas = incoming_deltas[this_t][other.id].reduce(:+)
113
+ node_deltas = Hash.new{ |h, k| h[k] = Hash.new(0.to_d) }
114
+ gradients = Hash.new(0)
115
+
116
+ initial_timestep = inputs.size - 1
117
+ neuron_stack = network.output_neurons.map{ |n| [n, initial_timestep] }
118
+
119
+ while current = neuron_stack.shift
120
+ neuron, timestep = current
121
+ next if node_deltas[timestep].key? neuron
122
+
123
+ from_here = bptt_connecting_to neuron, network, timestep
124
+ neuron_stack.push *from_here
125
+
126
+ # neuron delta is summation of neuron deltas deltas for the connections
127
+ # from this neuron
128
+ node_delta =
129
+ if neuron.output?
130
+ output_index = network.output_neurons.index neuron
131
+ activation_derivative = ACTIVATION_DERIVATIVES[neuron.activation_function]
132
+ mse_delta targets[output_index], outputs[output_index], activation_derivative
133
+ else
134
+ sum_of_deltas =
135
+ network.connections_from(neuron).reduce 0.to_d do |m, c|
136
+ out_timestep = c.output_neuron.context? ? timestep + 1 : timestep
137
+ output_node_delta = node_deltas[out_timestep][c.output_neuron.id]
138
+
139
+ # connection delta is the output neuron delta multiplied by the
140
+ # connection's weight
141
+ connection_delta =
142
+ if c.output_neuron.is_a? ProductNeuron
143
+ intermediate = states[out_timestep][:intermediates][c.output_neuron.id]
144
+ output_node_delta.mult intermediate.div(states[timestep][:values][c.input_neuron.id], 10), 10
145
+ else
146
+ output_node_delta.mult c.weight, 10
147
+ end
148
+
149
+ m + connection_delta
150
+ end
137
151
 
138
- deltas[this_t][other.id] =
139
- ACTIVATION_DERIVATIVES[other.activation_function]
140
- .call(states[this_t][other.id])
141
- .mult(sum_of_deltas, 10)
142
- end
152
+ ACTIVATION_DERIVATIVES[neuron.activation_function]
153
+ .call(states[timestep][:values][neuron.id])
154
+ .mult(sum_of_deltas, 10)
143
155
  end
144
- end
145
- end
146
156
 
147
- gradients = {}
157
+ node_deltas[timestep][neuron.id] = node_delta
148
158
 
149
- network.connections.each_with_index do |con, i|
150
- gradients[con.id] = 0.to_d
151
- next if con.output_neuron.context?
159
+ network.connections_to(neuron).each do |c|
160
+ in_timestep = neuron.context? ? timestep - 1 : timestep
152
161
 
153
- (inputs.size - 1).downto 0 do |t|
154
- if nd = deltas[t][con.output_neuron.id]
155
- gradient =
156
- if con.input_neuron.context?
157
- t == 0 ? 0.to_d : nd.mult(states[t - 1][con.input_neuron.id], 10)
158
- else
159
- nd.mult states[t][con.input_neuron.id], 10
160
- end
162
+ # connection gradient is the output neuron delta multipled by the
163
+ # connection's input neuron value.
164
+ gradient =
165
+ if c.output_neuron.is_a? ProductNeuron
166
+ intermediate = states[timestep][:intermediates][c.output_neuron.id]
167
+ node_delta.mult intermediate.div(c.weight, 10), 10
168
+ elsif c.input_neuron.context? && timestep == 0
169
+ 0.to_d
170
+ else
171
+ node_delta.mult states[in_timestep][:values][c.input_neuron.id], 10
172
+ end
161
173
 
162
- gradients[con.id] += gradient
163
- end
174
+ gradients[c.id] += gradient
164
175
  end
165
176
  end
166
177
 
@@ -200,22 +211,16 @@ module RANN
200
211
  step_one.mult step_two, 10
201
212
  end
202
213
 
203
- def self.traverse from:, network:, timestep:, deltas:, &block
204
- # halt traversal if reached next timestep.
205
- return if from.context?
206
-
207
- bptt_connecting_to(from, network, timestep, deltas).each do |n, c|
208
- yield n, c
214
+ def self.bptt_connecting_to neuron, network, timestep
215
+ # halt traversal if we're at a context and we're at the base timestep
216
+ return [] if neuron.context? && timestep == 0
209
217
 
210
- traverse from: n, network: network, timestep: timestep, deltas: deltas, &block
211
- end
212
- end
213
-
214
- def self.bptt_connecting_to neuron, network, timestep, deltas
215
218
  network.connections_to(neuron).each.with_object [] do |c, a|
216
- unless c.input_neuron.input? || deltas[timestep].key?(c.input_neuron.id)
217
- a << [c.input_neuron, c]
218
- end
219
+ # don't enqueue connections from inputs
220
+ next if c.input_neuron.input?
221
+
222
+ timestep -= timestep if neuron.context?
223
+ a << [c.input_neuron, timestep]
219
224
  end
220
225
  end
221
226
  end
@@ -24,7 +24,9 @@ module RANN
24
24
  gradapprox[i] = (error_thetaplus - error_thetaminus).div(EPSILON.mult(2, 10), 10)
25
25
  end
26
26
 
27
- gradapprox.each.with_index.all?{ |ga, i| in_epsilon? ga, dvec[i] }
27
+ gradapprox.each.with_index.with_object [] do |(ga, i), res|
28
+ res << i unless in_epsilon? ga, dvec[i]
29
+ end
28
30
  end
29
31
 
30
32
  def self.error outputs, targets
data/lib/rann/lstm.rb CHANGED
@@ -8,44 +8,46 @@ module RANN
8
8
  class LSTM
9
9
  attr_reader :network, :inputs, :outputs, :name
10
10
 
11
- def initialize name
11
+ def initialize name, size
12
12
  @name = name
13
13
  @network = RANN::Network.new
14
14
  @inputs = []
15
15
  @outputs = []
16
+ @size = size
17
+ init
16
18
  end
17
19
 
18
20
  def init
19
- @inputs.each.with_index do |input, i|
20
- f = RANN::Neuron.new("LSTM #{name} F #{i}", 3, :standard, :sig).tap{ |n| @network.add n }
21
- i = RANN::Neuron.new("LSTM #{name} I #{i}", 4, :standard, :sig).tap{ |n| @network.add n }
22
- g = RANN::Neuron.new("LSTM #{name} G #{i}", 3, :standard, :tanh).tap{ |n| @network.add n }
23
- o = RANN::Neuron.new("LSTM #{name} O #{i}", 3, :standard, :sig).tap{ |n| @network.add n }
24
- bias_f = RANN::Neuron.new("LSTM #{name} Bias F #{i}", 0, :bias).tap do |n|
21
+ input_bias = RANN::Neuron.new("LSTM #{name} Input Bias", 0, :bias).tap{ |n| @network.add n }
22
+ @size.times do |j|
23
+ input = RANN::Neuron.new("LSTM #{name} Input #{j}", 0, :standard).tap{ |n| @network.add n }
24
+ @inputs << input
25
+
26
+ f = RANN::Neuron.new("LSTM #{name} F #{j}", 3, :standard, :sig).tap{ |n| @network.add n }
27
+ i = RANN::Neuron.new("LSTM #{name} I #{j}", 4, :standard, :sig).tap{ |n| @network.add n }
28
+ g = RANN::Neuron.new("LSTM #{name} G #{j}", 3, :standard, :tanh).tap{ |n| @network.add n }
29
+ o = RANN::Neuron.new("LSTM #{name} O #{j}", 3, :standard, :sig).tap{ |n| @network.add n }
30
+ bias_f = RANN::Neuron.new("LSTM #{name} Bias F #{j}", 0, :bias).tap do |n|
25
31
  @network.add n
26
- n.value = 1.to_d
27
32
  end
28
- bias_i = RANN::Neuron.new("LSTM #{name} Bias I #{i}", 0, :bias).tap do |n|
33
+ bias_i = RANN::Neuron.new("LSTM #{name} Bias I #{j}", 0, :bias).tap do |n|
29
34
  @network.add n
30
- n.value = 1.to_d
31
35
  end
32
- bias_g = RANN::Neuron.new("LSTM #{name} Bias G #{i}", 0, :bias).tap do |n|
36
+ bias_g = RANN::Neuron.new("LSTM #{name} Bias G #{j}", 0, :bias).tap do |n|
33
37
  @network.add n
34
- n.value = 1.to_d
35
38
  end
36
- bias_o = RANN::Neuron.new("LSTM #{name} Bias O #{i}", 0, :bias).tap do |n|
39
+ bias_o = RANN::Neuron.new("LSTM #{name} Bias O #{j}", 0, :bias).tap do |n|
37
40
  @network.add n
38
- n.value = 1.to_d
39
41
  end
40
- memory_product = RANN::ProductNeuron.new("LSTM #{name} Mem Product #{i}", 2, :standard, :linear).tap{ |n| @network.add n }
41
- i_g_product = RANN::ProductNeuron.new("LSTM #{name} Hidden 2/3 Product #{i}", 2, :standard, :linear).tap{ |n| @network.add n }
42
- memory_standard = RANN::Neuron.new("LSTM #{name} Mem Standard #{i}", 2, :standard, :linear).tap{ |n| @network.add n }
43
- memory_tanh = RANN::Neuron.new("LSTM #{name} Mem Tanh #{i}", 1, :standard, :tanh).tap{ |n| @network.add n }
44
- memory_o_product = RANN::ProductNeuron.new("LSTM #{name} Mem/Hidden 4 Product #{i}", 2, :standard, :linear).tap{ |n| @network.add n }
45
- output = RANN::Neuron.new("LSTM #{name} Output #{i}", 1, :standard, :linear).tap{ |n| @network.add n }
42
+ memory_product = RANN::ProductNeuron.new("LSTM #{name} Mem Product #{j}", 2, :standard, :linear).tap{ |n| @network.add n }
43
+ i_g_product = RANN::ProductNeuron.new("LSTM #{name} Hidden 2/3 Product #{j}", 2, :standard, :linear).tap{ |n| @network.add n }
44
+ memory_standard = RANN::Neuron.new("LSTM #{name} Mem Standard #{j}", 2, :standard, :linear).tap{ |n| @network.add n }
45
+ memory_tanh = RANN::Neuron.new("LSTM #{name} Mem Tanh #{j}", 1, :standard, :tanh).tap{ |n| @network.add n }
46
+ memory_o_product = RANN::ProductNeuron.new("LSTM #{name} Mem/Hidden 4 Product #{j}", 2, :standard, :linear).tap{ |n| @network.add n }
47
+ output = RANN::Neuron.new("LSTM #{name} Output #{j}", 1, :standard, :linear).tap{ |n| @network.add n }
46
48
  @outputs << output
47
- memory_context = RANN::Neuron.new("LSTM #{name} Mem Context #{i}", 1, :context).tap{ |n| @network.add n }
48
- output_context = RANN::Neuron.new("LSTM #{name} Output Context #{i}", 1, :context).tap{ |n| @network.add n }
49
+ memory_context = RANN::Neuron.new("LSTM #{name} Mem Context #{j}", 1, :context).tap{ |n| @network.add n }
50
+ output_context = RANN::Neuron.new("LSTM #{name} Output Context #{j}", 1, :context).tap{ |n| @network.add n }
49
51
 
50
52
  @network.add RANN::LockedConnection.new input, f, 1
51
53
  @network.add RANN::LockedConnection.new input, i, 1
@@ -72,15 +74,14 @@ module RANN
72
74
  @network.add RANN::Connection.new bias_i, i
73
75
  @network.add RANN::Connection.new bias_g, g
74
76
  @network.add RANN::Connection.new bias_o, o
77
+ @network.add RANN::Connection.new input_bias, input
75
78
  end
76
79
  end
77
80
 
78
81
  def add_input neuron
79
- input = RANN::Neuron.new "LSTM #{name} Input #{neuron.name}", 0, :standard, :linear
80
- @network.add input
81
- @inputs << input
82
- connection = RANN::Connection.new neuron, input
83
- @network.add connection
82
+ @inputs.each do |input|
83
+ @network.add RANN::Connection.new neuron, input
84
+ end
84
85
  end
85
86
  end
86
87
  end
data/lib/rann/network.rb CHANGED
@@ -97,7 +97,10 @@ module RANN
97
97
  end
98
98
 
99
99
  def state
100
- neurons.each.with_object({}){ |n, s| s[n.id] = n.value }
100
+ {
101
+ values: neurons.each.with_object({}){ |n, s| s[n.id] = n.value },
102
+ intermediates: neurons.select{ |n| n.is_a? ProductNeuron }.each.with_object({}){ |n, s| s[n.id] = n.intermediate }
103
+ }
101
104
  end
102
105
 
103
106
  def connections_to neuron
data/lib/rann/neuron.rb CHANGED
@@ -5,7 +5,7 @@ require "bigdecimal/util"
5
5
  module RANN
6
6
  class Neuron
7
7
  ACTIVATION_FUNCTIONS = {
8
- sig: ->(v){ 1.to_d.div(1 + (Math::E ** -v), 10) },
8
+ sig: ->(v){ 1.to_d.div(1 + Math::E.to_d.power(-v, 10), 10) },
9
9
  tanh: ->(v){ Math.tanh(v).to_d(10) },
10
10
  relu: ->(v){ [0.to_d, v].max },
11
11
  linear: ->(v){ v },
@@ -0,0 +1,21 @@
1
+ require "bigdecimal"
2
+ require "bigdecimal/util"
3
+
4
+ # refactor to matrix stuff blah blah
5
+ module RANN
6
+ module Optimisers
7
+ class AdaGrad
8
+ def initialize opts = {}, restore = {}
9
+ @fudge_factor = opts[:fudge_factor] || 0.00000001.to_d
10
+ @learning_rate = opts[:learning_rate] || 0.1.to_d
11
+ @historical_gradient = (restore[:historical_gradient] || {}).tap{ |h| h.default = 0.to_d }
12
+ end
13
+
14
+ def update grad, cid
15
+ @historical_gradient[cid] = @historical_gradient[cid] + grad.power(2, 10)
16
+
17
+ grad.mult(- @learning_rate.div(@fudge_factor + @historical_gradient[cid].sqrt(10), 10), 10)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,22 @@
1
+ require "bigdecimal"
2
+ require "bigdecimal/util"
3
+
4
+ # refactor to matrix stuff blah blah
5
+ module RANN
6
+ module Optimisers
7
+ class RMSProp
8
+ def initialize opts = {}, restore = {}
9
+ @decay = opts[:decay] || 0.9.to_d
10
+ @fudge_factor = opts[:fudge_factor] || 0.00000001.to_d
11
+ @learning_rate = opts[:learning_rate] || 0.01.to_d
12
+ @historical_gradient = (restore[:historical_gradient] || {}).tap{ |h| h.default = 0.to_d }
13
+ end
14
+
15
+ def update grad, cid
16
+ @historical_gradient[cid] = @decay.mult(@historical_gradient[cid], 10) + (1 - @decay).mult(grad.power(2, 10), 10)
17
+
18
+ grad.mult(- @learning_rate.div(@fudge_factor + @historical_gradient[cid].sqrt(10), 10), 10)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -2,9 +2,11 @@ require "rann/neuron"
2
2
 
3
3
  module RANN
4
4
  class ProductNeuron < Neuron
5
+ attr_accessor :intermediate
6
+
5
7
  def set_value!
6
- intermediate = incoming.reduce{ |i, m| m.mult i, 10 }
7
- self.value = ACTIVATION_FUNCTIONS[activation_function].call intermediate
8
+ @intermediate = incoming.reduce{ |i, m| m.mult(i, 10) }
9
+ self.value = ACTIVATION_FUNCTIONS[activation_function].call @intermediate
8
10
  end
9
11
  end
10
12
  end
data/lib/rann/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module RANN
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rann
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Campbell
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-11-08 00:00:00.000000000 Z
11
+ date: 2017-11-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: parallel
@@ -95,6 +95,7 @@ files:
95
95
  - LICENCE
96
96
  - README.md
97
97
  - Rakefile
98
+ - bench/xor_benchmark.rb
98
99
  - bin/console
99
100
  - bin/setup
100
101
  - examples/xor.rb
@@ -106,6 +107,8 @@ files:
106
107
  - lib/rann/lstm.rb
107
108
  - lib/rann/network.rb
108
109
  - lib/rann/neuron.rb
110
+ - lib/rann/optimisers/adagrad.rb
111
+ - lib/rann/optimisers/rmsprop.rb
109
112
  - lib/rann/product_neuron.rb
110
113
  - lib/rann/util/array_ext.rb
111
114
  - lib/rann/version.rb