rann 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b474ce98e49bb5067d2fc8ddba98a9c6f887c618
4
- data.tar.gz: feb50a3a4804494c0a4c2551e7a2b3807b89dd69
3
+ metadata.gz: 199b82878ecd7e186aca7df6529af6b725b049a5
4
+ data.tar.gz: 5a61a9fb308c3c54055a61e516aa370f152170e8
5
5
  SHA512:
6
- metadata.gz: fc4f352df3b64af7d6b4c513d0aefd343ec35b4b6b599e7154cdbb7eefd06365023c2192c1736e3d7298a6895b388db3047387cfb4e52e7136f97eb09a320e5d
7
- data.tar.gz: eec1f19c738594b4b29c5de6da0268d249c4eaaddec7f243919f96ee73331025e571a52d215e894255c79f2451b55de5eb43caaa53a91d898048548190ee355e
6
+ metadata.gz: 8d02e3a788f9be66b2912edc21b0b807a6ba16ea867ac6ecb03454988e35e11971f8e39d1938324b834a5241df1c85775240b7665fa566b109ed2578d5fb23e3
7
+ data.tar.gz: 07b770013594c1b6e9a42ecdd812f2b2e12b3d902d2608d23ad24b73d44e270fb5bfb84636018405d5af7ea94580f89d5d2a09dc6f00917c987c336116a08442
data/README.md CHANGED
@@ -34,17 +34,16 @@ ruby examples/xor.rb
34
34
 
35
35
  ## TODO
36
36
 
37
- So much. So much.
37
+ So much.
38
38
 
39
+ - Matrix calculations (WIP)
39
40
  - Convenience methods for setting up standard network topologies, crucially,
40
- layers
41
- - Batch normalization/drop out/early stopping
41
+ layers (WIP)
42
+ - Batch normalization/drop out/early stopping (WIP, dep. on matrix)
42
43
  - Hyperparameter optimisation
43
44
  - Other adaptive learning rate algorithms (Adadelta, Adam, etc?)
44
45
  - Explore matrix operations and other ways to optimise performance of algorithms
45
46
  - RPROP?
46
- - Use enumerable-statistics gem?
47
- - Speed up by adding a reduce step to the parallel gem?
48
47
  - More examples
49
48
  - Tests
50
49
 
@@ -0,0 +1,55 @@
1
+ require "bundler/setup"
2
+ require "rann"
3
+
4
+ xor_inputs = [[0,0],[0,1],[1,0],[1,1]]
5
+ xor_targets = [[0],[1],[1],[0]]
6
+
7
+ time = Time.now.to_i
8
+ results =
9
+ Array.new(100) do |j|
10
+ # inputs
11
+ inputs = Array.new(2){ |i| RANN::Neuron.new "input #{i}", 0, :input }
12
+
13
+ # hidden layer
14
+ hiddens = Array.new(3){ |i| RANN::Neuron.new "hidden #{i}", 3 }
15
+ bias = RANN::Neuron.new "bias", 0, :bias
16
+
17
+ # output layer
18
+ output = RANN::Neuron.new "output", 3, :output, :sig
19
+
20
+ # connect it all
21
+ connections = []
22
+ hiddens.each do |h|
23
+ inputs.each do |i|
24
+ connections.push RANN::Connection.new i, h
25
+ end
26
+ connections.push RANN::Connection.new bias, h
27
+ connections.push RANN::Connection.new h, output
28
+ end
29
+
30
+ network = RANN::Network.new connections
31
+ backprop = RANN::Backprop.new network
32
+
33
+ i = 0
34
+ loop do
35
+ i += 1
36
+ sample_index = (rand * xor_inputs.size).to_i
37
+
38
+ avg_error =
39
+ backprop.run_batch(
40
+ [xor_inputs[sample_index].map(&:to_d)],
41
+ [xor_targets[sample_index].map(&:to_d)],
42
+ processes: 0,
43
+ checking: false
44
+ )
45
+
46
+ break if avg_error < 0.0001
47
+ end
48
+
49
+ puts j
50
+ i
51
+ end
52
+
53
+ taken = Time.now.to_i - time
54
+ puts results.reduce(:+).fdiv(results.size).round(2)
55
+ puts "in #{taken}s"
data/lib/rann/backprop.rb CHANGED
@@ -1,6 +1,10 @@
1
+ require "bigdecimal"
2
+ require "bigdecimal/util"
1
3
  require "parallel"
2
4
  require "rann/gradient_checker"
3
5
  require "rann/util/array_ext"
6
+ require "rann/optimisers/adagrad"
7
+ require "rann/optimisers/rmsprop"
4
8
 
5
9
  module RANN
6
10
  class Backprop
@@ -14,27 +18,16 @@ module RANN
14
18
  step: ->(_){ 0.to_d },
15
19
  }
16
20
 
17
- DECAY = BigDecimal.new('0.9')
18
- MASTER_STEP_SIZE = BigDecimal.new('0.01')
19
- FUDGE_FACTOR = BigDecimal.new('0.00000001')
20
- LEARNING_RATE = BigDecimal.new('0.01')
21
- FRICTION = BigDecimal.new('0.8')
22
- NUM_ITERATIONS_BEFORE_LR_ANNEALING = BigDecimal.new('10')
23
-
24
- attr_accessor :network, :lr, :velocities
25
-
26
- def initialize network, restore = {}
27
- @network = network
28
- @connections_hash = network.connections.each.with_object({}){ |c, h| h[c.id] = c }
29
- @lr = LEARNING_RATE
30
- @friction = FRICTION
31
- @velocities = Hash.new(BigDecimal.new('0'))
32
- @historical_gradient = (restore[:historical_gradient] || {}).tap{ |h| h.default = 0.to_d }
33
- @historical_update = Hash.new(MASTER_STEP_SIZE)
34
- @batch_count = BigDecimal.new('0')
21
+ attr_accessor :network
22
+
23
+ def initialize network, opts = {}, restore = {}
24
+ @network = network
25
+ @connections_hash = network.connections.each.with_object({}){ |c, h| h[c.id] = c }
26
+ @optimiser = RANN::Optimisers.const_get(opts[:optimiser] || 'RMSProp').new opts, restore
27
+ @batch_count = 0.to_d
35
28
  end
36
29
 
37
- def run_batch(inputs, targets, opts = {})
30
+ def run_batch inputs, targets, opts = {}
38
31
  @batch_count += 1
39
32
 
40
33
  batch_size = inputs.size
@@ -43,37 +36,47 @@ module RANN
43
36
 
44
37
  # force longer bits of work per iteration, to maximise CPU usage
45
38
  # less marshalling data etc, more work.
46
- grouped_inputs = in_groups inputs, [1, opts[:processes]].max * 10, false
47
- grouped_results =
48
- Parallel.map_with_index grouped_inputs, in_processes: opts[:processes] do |inputs, i|
49
- group_avg_gradients = Hash.new{ |h, k| h[k] = 0.to_d }
50
- group_avg_error = 0.to_d
39
+ grouped_inputs = in_groups(inputs, [1, opts[:processes]].max * 10, false).reject &:empty?
40
+ reduce_proc =
41
+ lambda do |_, _, result|
42
+ group_avg_gradients, group_avg_error = result
51
43
 
52
- inputs.each do |input|
53
- gradients, error = Backprop.run_single(network, input, targets[i])
44
+ avg_gradients.merge!(group_avg_gradients){ |_, o, n| o + n }
45
+ avg_batch_error += group_avg_error
46
+ end
54
47
 
55
- gradients.each do |cid, g|
56
- group_avg_gradients[cid] += g.div batch_size, 10
57
- end
58
- group_avg_error += error.div batch_size, 10
59
- end
48
+ Parallel.each_with_index(
49
+ grouped_inputs,
50
+ in_processes: opts[:processes],
51
+ finish: reduce_proc
52
+ ) do |inputs, i|
53
+ group_avg_gradients = Hash.new{ |h, k| h[k] = 0.to_d }
54
+ group_avg_error = 0.to_d
60
55
 
61
- group_avg_gradients.default_proc = nil
62
- [group_avg_gradients, group_avg_error]
56
+ inputs.each_with_index do |input, j|
57
+ gradients, error = Backprop.run_single network, input, targets[i + j]
58
+
59
+ gradients.each do |cid, g|
60
+ group_avg_gradients[cid] += g.div batch_size, 10
61
+ end
62
+ group_avg_error += error.div batch_size, 10
63
63
  end
64
64
 
65
- grouped_results.each do |group_avg_gradients, group_avg_error|
66
- avg_gradients.merge!(group_avg_gradients){ |_, o, n| o + n }
67
- avg_batch_error += group_avg_error
65
+ group_avg_gradients.default_proc = nil
66
+ [group_avg_gradients, group_avg_error]
68
67
  end
69
68
 
70
69
  if opts[:checking]
71
70
  # check assumes batchsize 1 for now
72
71
  sorted_gradients = avg_gradients.values_at *network.connections.map(&:id)
73
- if GradientChecker.check network, inputs.first, targets.first, sorted_gradients
72
+ invalid = GradientChecker.check network, inputs.first, targets.first, sorted_gradients
73
+ if invalid.empty?
74
74
  puts "gradient valid"
75
75
  else
76
- puts "gradient INVALID"
76
+ puts "gradients INVALID for connections:"
77
+ invalid.each do |i|
78
+ puts "#{network.connections[i].input_neuron.name} -> #{network.connections[i].output_neuron.name}"
79
+ end
77
80
  end
78
81
  end
79
82
 
@@ -81,7 +84,7 @@ module RANN
81
84
  con = @connections_hash[con_id]
82
85
  next if con.locked?
83
86
 
84
- update = adagrad gradient, con.id
87
+ update = @optimiser.update gradient, con.id
85
88
 
86
89
  con.weight += update
87
90
  end
@@ -107,60 +110,68 @@ module RANN
107
110
  error = mse targets, outputs
108
111
 
109
112
  # backward pass with unravelling for recurrent networks
110
- deltas = Hash.new{ |h, k| h[k] = Hash.new(0.to_d) }
111
-
112
- # outputs first
113
- network.output_neurons.each.with_index do |o, i|
114
- activation_derivative = ACTIVATION_DERIVATIVES[o.activation_function]
115
-
116
- deltas[0][o.id] = mse_delta(targets[i], outputs[i], activation_derivative)
117
- end
118
-
119
- # remove this push mechanism, shouldn't be necessary and uses extra memory.
120
- incoming_deltas = Hash.new{ |h, k| h[k] = Hash.new{ |h, k| h[k] = [] } }
121
- # each timestep backwards through time
122
- (inputs.size - 1).downto 0 do |t|
123
- network.output_neurons.each do |o|
124
- traverse from: o, network: network, timestep: t, deltas: deltas do |other, con|
125
- if other.context?
126
- this_t = t - 1
127
- other = o
128
- else
129
- this_t = t
130
- end
131
-
132
- incoming_deltas[this_t][other.id] <<
133
- deltas[t][o.id].mult(con.weight, 10)
134
-
135
- if incoming_deltas[this_t][other.id].size == network.connections_from(other).size
136
- sum_of_deltas = incoming_deltas[this_t][other.id].reduce(:+)
113
+ node_deltas = Hash.new{ |h, k| h[k] = Hash.new(0.to_d) }
114
+ gradients = Hash.new(0)
115
+
116
+ initial_timestep = inputs.size - 1
117
+ neuron_stack = network.output_neurons.map{ |n| [n, initial_timestep] }
118
+
119
+ while current = neuron_stack.shift
120
+ neuron, timestep = current
121
+ next if node_deltas[timestep].key? neuron
122
+
123
+ from_here = bptt_connecting_to neuron, network, timestep
124
+ neuron_stack.push *from_here
125
+
126
+ # neuron delta is summation of neuron deltas deltas for the connections
127
+ # from this neuron
128
+ node_delta =
129
+ if neuron.output?
130
+ output_index = network.output_neurons.index neuron
131
+ activation_derivative = ACTIVATION_DERIVATIVES[neuron.activation_function]
132
+ mse_delta targets[output_index], outputs[output_index], activation_derivative
133
+ else
134
+ sum_of_deltas =
135
+ network.connections_from(neuron).reduce 0.to_d do |m, c|
136
+ out_timestep = c.output_neuron.context? ? timestep + 1 : timestep
137
+ output_node_delta = node_deltas[out_timestep][c.output_neuron.id]
138
+
139
+ # connection delta is the output neuron delta multiplied by the
140
+ # connection's weight
141
+ connection_delta =
142
+ if c.output_neuron.is_a? ProductNeuron
143
+ intermediate = states[out_timestep][:intermediates][c.output_neuron.id]
144
+ output_node_delta.mult intermediate.div(states[timestep][:values][c.input_neuron.id], 10), 10
145
+ else
146
+ output_node_delta.mult c.weight, 10
147
+ end
148
+
149
+ m + connection_delta
150
+ end
137
151
 
138
- deltas[this_t][other.id] =
139
- ACTIVATION_DERIVATIVES[other.activation_function]
140
- .call(states[this_t][other.id])
141
- .mult(sum_of_deltas, 10)
142
- end
152
+ ACTIVATION_DERIVATIVES[neuron.activation_function]
153
+ .call(states[timestep][:values][neuron.id])
154
+ .mult(sum_of_deltas, 10)
143
155
  end
144
- end
145
- end
146
156
 
147
- gradients = {}
157
+ node_deltas[timestep][neuron.id] = node_delta
148
158
 
149
- network.connections.each_with_index do |con, i|
150
- gradients[con.id] = 0.to_d
151
- next if con.output_neuron.context?
159
+ network.connections_to(neuron).each do |c|
160
+ in_timestep = neuron.context? ? timestep - 1 : timestep
152
161
 
153
- (inputs.size - 1).downto 0 do |t|
154
- if nd = deltas[t][con.output_neuron.id]
155
- gradient =
156
- if con.input_neuron.context?
157
- t == 0 ? 0.to_d : nd.mult(states[t - 1][con.input_neuron.id], 10)
158
- else
159
- nd.mult states[t][con.input_neuron.id], 10
160
- end
162
+ # connection gradient is the output neuron delta multipled by the
163
+ # connection's input neuron value.
164
+ gradient =
165
+ if c.output_neuron.is_a? ProductNeuron
166
+ intermediate = states[timestep][:intermediates][c.output_neuron.id]
167
+ node_delta.mult intermediate.div(c.weight, 10), 10
168
+ elsif c.input_neuron.context? && timestep == 0
169
+ 0.to_d
170
+ else
171
+ node_delta.mult states[in_timestep][:values][c.input_neuron.id], 10
172
+ end
161
173
 
162
- gradients[con.id] += gradient
163
- end
174
+ gradients[c.id] += gradient
164
175
  end
165
176
  end
166
177
 
@@ -200,22 +211,16 @@ module RANN
200
211
  step_one.mult step_two, 10
201
212
  end
202
213
 
203
- def self.traverse from:, network:, timestep:, deltas:, &block
204
- # halt traversal if reached next timestep.
205
- return if from.context?
206
-
207
- bptt_connecting_to(from, network, timestep, deltas).each do |n, c|
208
- yield n, c
214
+ def self.bptt_connecting_to neuron, network, timestep
215
+ # halt traversal if we're at a context and we're at the base timestep
216
+ return [] if neuron.context? && timestep == 0
209
217
 
210
- traverse from: n, network: network, timestep: timestep, deltas: deltas, &block
211
- end
212
- end
213
-
214
- def self.bptt_connecting_to neuron, network, timestep, deltas
215
218
  network.connections_to(neuron).each.with_object [] do |c, a|
216
- unless c.input_neuron.input? || deltas[timestep].key?(c.input_neuron.id)
217
- a << [c.input_neuron, c]
218
- end
219
+ # don't enqueue connections from inputs
220
+ next if c.input_neuron.input?
221
+
222
+ timestep -= timestep if neuron.context?
223
+ a << [c.input_neuron, timestep]
219
224
  end
220
225
  end
221
226
  end
@@ -24,7 +24,9 @@ module RANN
24
24
  gradapprox[i] = (error_thetaplus - error_thetaminus).div(EPSILON.mult(2, 10), 10)
25
25
  end
26
26
 
27
- gradapprox.each.with_index.all?{ |ga, i| in_epsilon? ga, dvec[i] }
27
+ gradapprox.each.with_index.with_object [] do |(ga, i), res|
28
+ res << i unless in_epsilon? ga, dvec[i]
29
+ end
28
30
  end
29
31
 
30
32
  def self.error outputs, targets
data/lib/rann/lstm.rb CHANGED
@@ -8,44 +8,46 @@ module RANN
8
8
  class LSTM
9
9
  attr_reader :network, :inputs, :outputs, :name
10
10
 
11
- def initialize name
11
+ def initialize name, size
12
12
  @name = name
13
13
  @network = RANN::Network.new
14
14
  @inputs = []
15
15
  @outputs = []
16
+ @size = size
17
+ init
16
18
  end
17
19
 
18
20
  def init
19
- @inputs.each.with_index do |input, i|
20
- f = RANN::Neuron.new("LSTM #{name} F #{i}", 3, :standard, :sig).tap{ |n| @network.add n }
21
- i = RANN::Neuron.new("LSTM #{name} I #{i}", 4, :standard, :sig).tap{ |n| @network.add n }
22
- g = RANN::Neuron.new("LSTM #{name} G #{i}", 3, :standard, :tanh).tap{ |n| @network.add n }
23
- o = RANN::Neuron.new("LSTM #{name} O #{i}", 3, :standard, :sig).tap{ |n| @network.add n }
24
- bias_f = RANN::Neuron.new("LSTM #{name} Bias F #{i}", 0, :bias).tap do |n|
21
+ input_bias = RANN::Neuron.new("LSTM #{name} Input Bias", 0, :bias).tap{ |n| @network.add n }
22
+ @size.times do |j|
23
+ input = RANN::Neuron.new("LSTM #{name} Input #{j}", 0, :standard).tap{ |n| @network.add n }
24
+ @inputs << input
25
+
26
+ f = RANN::Neuron.new("LSTM #{name} F #{j}", 3, :standard, :sig).tap{ |n| @network.add n }
27
+ i = RANN::Neuron.new("LSTM #{name} I #{j}", 4, :standard, :sig).tap{ |n| @network.add n }
28
+ g = RANN::Neuron.new("LSTM #{name} G #{j}", 3, :standard, :tanh).tap{ |n| @network.add n }
29
+ o = RANN::Neuron.new("LSTM #{name} O #{j}", 3, :standard, :sig).tap{ |n| @network.add n }
30
+ bias_f = RANN::Neuron.new("LSTM #{name} Bias F #{j}", 0, :bias).tap do |n|
25
31
  @network.add n
26
- n.value = 1.to_d
27
32
  end
28
- bias_i = RANN::Neuron.new("LSTM #{name} Bias I #{i}", 0, :bias).tap do |n|
33
+ bias_i = RANN::Neuron.new("LSTM #{name} Bias I #{j}", 0, :bias).tap do |n|
29
34
  @network.add n
30
- n.value = 1.to_d
31
35
  end
32
- bias_g = RANN::Neuron.new("LSTM #{name} Bias G #{i}", 0, :bias).tap do |n|
36
+ bias_g = RANN::Neuron.new("LSTM #{name} Bias G #{j}", 0, :bias).tap do |n|
33
37
  @network.add n
34
- n.value = 1.to_d
35
38
  end
36
- bias_o = RANN::Neuron.new("LSTM #{name} Bias O #{i}", 0, :bias).tap do |n|
39
+ bias_o = RANN::Neuron.new("LSTM #{name} Bias O #{j}", 0, :bias).tap do |n|
37
40
  @network.add n
38
- n.value = 1.to_d
39
41
  end
40
- memory_product = RANN::ProductNeuron.new("LSTM #{name} Mem Product #{i}", 2, :standard, :linear).tap{ |n| @network.add n }
41
- i_g_product = RANN::ProductNeuron.new("LSTM #{name} Hidden 2/3 Product #{i}", 2, :standard, :linear).tap{ |n| @network.add n }
42
- memory_standard = RANN::Neuron.new("LSTM #{name} Mem Standard #{i}", 2, :standard, :linear).tap{ |n| @network.add n }
43
- memory_tanh = RANN::Neuron.new("LSTM #{name} Mem Tanh #{i}", 1, :standard, :tanh).tap{ |n| @network.add n }
44
- memory_o_product = RANN::ProductNeuron.new("LSTM #{name} Mem/Hidden 4 Product #{i}", 2, :standard, :linear).tap{ |n| @network.add n }
45
- output = RANN::Neuron.new("LSTM #{name} Output #{i}", 1, :standard, :linear).tap{ |n| @network.add n }
42
+ memory_product = RANN::ProductNeuron.new("LSTM #{name} Mem Product #{j}", 2, :standard, :linear).tap{ |n| @network.add n }
43
+ i_g_product = RANN::ProductNeuron.new("LSTM #{name} Hidden 2/3 Product #{j}", 2, :standard, :linear).tap{ |n| @network.add n }
44
+ memory_standard = RANN::Neuron.new("LSTM #{name} Mem Standard #{j}", 2, :standard, :linear).tap{ |n| @network.add n }
45
+ memory_tanh = RANN::Neuron.new("LSTM #{name} Mem Tanh #{j}", 1, :standard, :tanh).tap{ |n| @network.add n }
46
+ memory_o_product = RANN::ProductNeuron.new("LSTM #{name} Mem/Hidden 4 Product #{j}", 2, :standard, :linear).tap{ |n| @network.add n }
47
+ output = RANN::Neuron.new("LSTM #{name} Output #{j}", 1, :standard, :linear).tap{ |n| @network.add n }
46
48
  @outputs << output
47
- memory_context = RANN::Neuron.new("LSTM #{name} Mem Context #{i}", 1, :context).tap{ |n| @network.add n }
48
- output_context = RANN::Neuron.new("LSTM #{name} Output Context #{i}", 1, :context).tap{ |n| @network.add n }
49
+ memory_context = RANN::Neuron.new("LSTM #{name} Mem Context #{j}", 1, :context).tap{ |n| @network.add n }
50
+ output_context = RANN::Neuron.new("LSTM #{name} Output Context #{j}", 1, :context).tap{ |n| @network.add n }
49
51
 
50
52
  @network.add RANN::LockedConnection.new input, f, 1
51
53
  @network.add RANN::LockedConnection.new input, i, 1
@@ -72,15 +74,14 @@ module RANN
72
74
  @network.add RANN::Connection.new bias_i, i
73
75
  @network.add RANN::Connection.new bias_g, g
74
76
  @network.add RANN::Connection.new bias_o, o
77
+ @network.add RANN::Connection.new input_bias, input
75
78
  end
76
79
  end
77
80
 
78
81
  def add_input neuron
79
- input = RANN::Neuron.new "LSTM #{name} Input #{neuron.name}", 0, :standard, :linear
80
- @network.add input
81
- @inputs << input
82
- connection = RANN::Connection.new neuron, input
83
- @network.add connection
82
+ @inputs.each do |input|
83
+ @network.add RANN::Connection.new neuron, input
84
+ end
84
85
  end
85
86
  end
86
87
  end
data/lib/rann/network.rb CHANGED
@@ -97,7 +97,10 @@ module RANN
97
97
  end
98
98
 
99
99
  def state
100
- neurons.each.with_object({}){ |n, s| s[n.id] = n.value }
100
+ {
101
+ values: neurons.each.with_object({}){ |n, s| s[n.id] = n.value },
102
+ intermediates: neurons.select{ |n| n.is_a? ProductNeuron }.each.with_object({}){ |n, s| s[n.id] = n.intermediate }
103
+ }
101
104
  end
102
105
 
103
106
  def connections_to neuron
data/lib/rann/neuron.rb CHANGED
@@ -5,7 +5,7 @@ require "bigdecimal/util"
5
5
  module RANN
6
6
  class Neuron
7
7
  ACTIVATION_FUNCTIONS = {
8
- sig: ->(v){ 1.to_d.div(1 + (Math::E ** -v), 10) },
8
+ sig: ->(v){ 1.to_d.div(1 + Math::E.to_d.power(-v, 10), 10) },
9
9
  tanh: ->(v){ Math.tanh(v).to_d(10) },
10
10
  relu: ->(v){ [0.to_d, v].max },
11
11
  linear: ->(v){ v },
@@ -0,0 +1,21 @@
1
+ require "bigdecimal"
2
+ require "bigdecimal/util"
3
+
4
+ # refactor to matrix stuff blah blah
5
+ module RANN
6
+ module Optimisers
7
+ class AdaGrad
8
+ def initialize opts = {}, restore = {}
9
+ @fudge_factor = opts[:fudge_factor] || 0.00000001.to_d
10
+ @learning_rate = opts[:learning_rate] || 0.1.to_d
11
+ @historical_gradient = (restore[:historical_gradient] || {}).tap{ |h| h.default = 0.to_d }
12
+ end
13
+
14
+ def update grad, cid
15
+ @historical_gradient[cid] = @historical_gradient[cid] + grad.power(2, 10)
16
+
17
+ grad.mult(- @learning_rate.div(@fudge_factor + @historical_gradient[cid].sqrt(10), 10), 10)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,22 @@
1
+ require "bigdecimal"
2
+ require "bigdecimal/util"
3
+
4
+ # refactor to matrix stuff blah blah
5
+ module RANN
6
+ module Optimisers
7
+ class RMSProp
8
+ def initialize opts = {}, restore = {}
9
+ @decay = opts[:decay] || 0.9.to_d
10
+ @fudge_factor = opts[:fudge_factor] || 0.00000001.to_d
11
+ @learning_rate = opts[:learning_rate] || 0.01.to_d
12
+ @historical_gradient = (restore[:historical_gradient] || {}).tap{ |h| h.default = 0.to_d }
13
+ end
14
+
15
+ def update grad, cid
16
+ @historical_gradient[cid] = @decay.mult(@historical_gradient[cid], 10) + (1 - @decay).mult(grad.power(2, 10), 10)
17
+
18
+ grad.mult(- @learning_rate.div(@fudge_factor + @historical_gradient[cid].sqrt(10), 10), 10)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -2,9 +2,11 @@ require "rann/neuron"
2
2
 
3
3
  module RANN
4
4
  class ProductNeuron < Neuron
5
+ attr_accessor :intermediate
6
+
5
7
  def set_value!
6
- intermediate = incoming.reduce{ |i, m| m.mult i, 10 }
7
- self.value = ACTIVATION_FUNCTIONS[activation_function].call intermediate
8
+ @intermediate = incoming.reduce{ |i, m| m.mult(i, 10) }
9
+ self.value = ACTIVATION_FUNCTIONS[activation_function].call @intermediate
8
10
  end
9
11
  end
10
12
  end
data/lib/rann/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module RANN
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rann
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Campbell
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-11-08 00:00:00.000000000 Z
11
+ date: 2017-11-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: parallel
@@ -95,6 +95,7 @@ files:
95
95
  - LICENCE
96
96
  - README.md
97
97
  - Rakefile
98
+ - bench/xor_benchmark.rb
98
99
  - bin/console
99
100
  - bin/setup
100
101
  - examples/xor.rb
@@ -106,6 +107,8 @@ files:
106
107
  - lib/rann/lstm.rb
107
108
  - lib/rann/network.rb
108
109
  - lib/rann/neuron.rb
110
+ - lib/rann/optimisers/adagrad.rb
111
+ - lib/rann/optimisers/rmsprop.rb
109
112
  - lib/rann/product_neuron.rb
110
113
  - lib/rann/util/array_ext.rb
111
114
  - lib/rann/version.rb