backprop 0.0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +214 -0
- data/Rakefile +22 -0
- data/VERSION +1 -0
- data/backprop.gemspec +17 -0
- data/demo/celsius.rb +11 -0
- data/demo/lol.rb +56 -0
- data/demo/loss.rb +64 -0
- data/demo/neuron.rb +61 -0
- data/lib/backprop.rb +146 -0
- data/lib/perceptron.rb +119 -0
- data/test/backprop.rb +202 -0
- metadata +53 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 30d4ab63e0502df289e6e648ad5c04c5d0ffe4b29a0bd5fce2053809c4879ddd
|
4
|
+
data.tar.gz: 943142da82fb2a4fd4adad13f55a1fe1f1e1713e71128a5c02c48887cc7aa4cb
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fedcb937e83efec000f8cc944e53b8cf8e61eb5e2ea4eccb60d4431ae230e9a7607adf4172a61cc14e32bfa41344538439c3538d5f4f52b2936df6ddf70827c1
|
7
|
+
data.tar.gz: 8d75ed673305ba9213840974a314caef65029156a23f56040dd99035166b8bb357fc44adff9f79a640d343fd84788a98422d7099b49e651016fa18aa48385108
|
data/README.md
ADDED
@@ -0,0 +1,214 @@
|
|
1
|
+
# Backward Propagation
|
2
|
+
|
3
|
+
This is a reimplementation of Andrej Karpathy's
|
4
|
+
[micrograd](https://github.com/karpathy/micrograd) in Ruby.
|
5
|
+
It has been further simplified and some liberties have been taken with naming.
|
6
|
+
|
7
|
+
# Rationale
|
8
|
+
|
9
|
+
This can be used to train neural nets, typically to minimize a loss function.
|
10
|
+
An efficient way to do this is via gradient descent.
|
11
|
+
Mathematical derivatives and the chain rule from calculus are used to determine
|
12
|
+
inputs with the greatest influence on the output.
|
13
|
+
The inputs are manipulated to minimize the output, represented as the loss
|
14
|
+
function.
|
15
|
+
That is, the output of the neural net is a prediction.
|
16
|
+
The error or loss (prediction compared to the ideal, or known output) is
|
17
|
+
computed for a variety of cases, and the network weights are adjusted to
|
18
|
+
better match the desired output.
|
19
|
+
The smallest loss implies the best performance at a given objective.
|
20
|
+
|
21
|
+
# Examples
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
require 'backprop'
|
25
|
+
|
26
|
+
include BackProp
|
27
|
+
|
28
|
+
# F = ma
|
29
|
+
|
30
|
+
mass = Value.new(25, label: 'mass')
|
31
|
+
acc = Value.new(10, label: 'acc')
|
32
|
+
force = mass * acc
|
33
|
+
force.label = 'force'
|
34
|
+
p force
|
35
|
+
```
|
36
|
+
|
37
|
+
```
|
38
|
+
force(value=250 gradient=0 *(mass=25, acc=10))
|
39
|
+
mass(value=25 gradient=0)
|
40
|
+
acc(value=10 gradient=0)
|
41
|
+
```
|
42
|
+
|
43
|
+
Use backward propagation to determine the gradient (derivative with respect
|
44
|
+
to the caller of `#backward`) for each Value:
|
45
|
+
|
46
|
+
```ruby
|
47
|
+
force.backward
|
48
|
+
p force
|
49
|
+
```
|
50
|
+
|
51
|
+
```
|
52
|
+
force(value=250 gradient=1.0 *(mass=25, acc=10))
|
53
|
+
mass(value=25 gradient=10.0)
|
54
|
+
acc(value=10 gradient=25.0)
|
55
|
+
```
|
56
|
+
|
57
|
+
The gradients have been updated, and the output gradient is 1.0.
|
58
|
+
We have a tree structure, where our inputs, mass and acceleration, are
|
59
|
+
leaf nodes, and they combine via multiplication to make a parent node, or
|
60
|
+
root node in this case, force.
|
61
|
+
By wrapping our numbers in the Value class, whenever we calculate a result,
|
62
|
+
we have a tree structure representing that expression, and we can easily
|
63
|
+
calculate derivatives for every node in the tree.
|
64
|
+
|
65
|
+
# Neural Networks
|
66
|
+
|
67
|
+
## Neuron
|
68
|
+
|
69
|
+
A neuron has a number of inputs which it combines to yield a single output.
|
70
|
+
Traditionally, each input has a weight, and the neuron itself has a bias, or
|
71
|
+
a fixed amount which is added to each input when considering the output.
|
72
|
+
Sum each input value times its input weight, add the bias, and apply an
|
73
|
+
*activation function* which "normalizes" the output to a predictable value,
|
74
|
+
typically between -1.0 and 1.0.
|
75
|
+
In other words, if you send the right combination of signals, you can get the
|
76
|
+
neuron to "fire".
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
require 'perceptron'
|
80
|
+
|
81
|
+
include BackProp
|
82
|
+
|
83
|
+
# create a new neuron with 3 inputs; initial weights and bias are random
|
84
|
+
n = Neuron.new(3)
|
85
|
+
|
86
|
+
puts n
|
87
|
+
#=> N(-0.098, 1.000, 0.064) (0.468 relu)
|
88
|
+
|
89
|
+
p n
|
90
|
+
#=> -0.098| 0.000 1.000| 0.000 0.064| 0.000 0.468| 0.000
|
91
|
+
|
92
|
+
# send 0 to each input
|
93
|
+
output = n.apply(0)
|
94
|
+
|
95
|
+
puts output
|
96
|
+
#=> 0.468
|
97
|
+
|
98
|
+
# output is positive due to rectified linear unit (ReLU) activation function
|
99
|
+
output.value >= 0 #=> true
|
100
|
+
|
101
|
+
# if bias is positive, zero input should result in bias
|
102
|
+
(n.bias.value >= 0) ? (output.value == n.bias) : (output.value == 0) #=> true
|
103
|
+
```
|
104
|
+
|
105
|
+
## Layer
|
106
|
+
|
107
|
+
A layer is composed of several neurons.
|
108
|
+
Each neuron has the same number of inputs, so the layer has just a single
|
109
|
+
number of inputs.
|
110
|
+
Each input is sent to each neuron in the layer.
|
111
|
+
If one layer is to feed into another, then the other layer's neurons must have
|
112
|
+
an input count that matches the one layer's neuron count.
|
113
|
+
|
114
|
+
```ruby
|
115
|
+
require 'perceptron'
|
116
|
+
|
117
|
+
include BackProp
|
118
|
+
|
119
|
+
# create a new layer of 4 neurons with 3 inputs
|
120
|
+
l = Layer.new(3, 4)
|
121
|
+
|
122
|
+
puts l
|
123
|
+
```
|
124
|
+
|
125
|
+
```
|
126
|
+
N(0.957, 0.650, 0.995) (-0.530 relu)
|
127
|
+
N(-0.482, 0.272, -0.467) (0.905 relu)
|
128
|
+
N(-0.083, -0.519, -0.921) (-0.811 relu)
|
129
|
+
N(-0.369, -0.688, -0.097) (0.122 relu)
|
130
|
+
```
|
131
|
+
|
132
|
+
```ruby
|
133
|
+
# send 0 to each input
|
134
|
+
output = l.apply(0)
|
135
|
+
|
136
|
+
# returns an array of outputs, one for each neuron
|
137
|
+
output.size == 4 #=> true
|
138
|
+
|
139
|
+
puts output.map(&:value).join(', ')
|
140
|
+
#=> 0.0, 0.90522363833711, 0.0, 0.12226124806686789
|
141
|
+
```
|
142
|
+
|
143
|
+
## Multilayer Perceptron (MLP)
|
144
|
+
|
145
|
+
First, define a number of inputs. Say 5 inputs, like temperature, etc.
|
146
|
+
Often we want a single output, which is the simple case.
|
147
|
+
Multiple outputs are possible but more complicated.
|
148
|
+
A single output could represent the recommended setting on a thermostat.
|
149
|
+
We can define multiple layers of neurons for our neural net which will feed
|
150
|
+
on inputs and yield outputs.
|
151
|
+
|
152
|
+
```ruby
|
153
|
+
require 'perceptron'
|
154
|
+
|
155
|
+
include BackProp
|
156
|
+
|
157
|
+
# create a network with 3 inputs, 2 layers of 4 neurons, and one output neuron
|
158
|
+
n = MLP.new(3, [4, 4, 1])
|
159
|
+
|
160
|
+
puts n
|
161
|
+
```
|
162
|
+
|
163
|
+
```
|
164
|
+
N(0.660, 0.250, -0.387) (-0.677 relu)
|
165
|
+
N(0.931, 0.202, 0.596) (0.861 relu)
|
166
|
+
N(0.101, 0.611, 0.885) (-0.295 relu)
|
167
|
+
N(-0.858, 0.136, 0.091) (-0.309 relu)
|
168
|
+
|
169
|
+
N(-0.594, 0.178, 0.484, -0.208) (0.515 relu)
|
170
|
+
N(-0.295, -0.899, 0.437, -0.812) (-0.200 relu)
|
171
|
+
N(-0.478, 0.230, -0.971, 0.897) (-0.858 relu)
|
172
|
+
N(0.636, 0.719, -0.857, -0.546) (-0.338 relu)
|
173
|
+
|
174
|
+
N(0.962, 0.529, 0.475, -0.837) (-0.362 relu)
|
175
|
+
```
|
176
|
+
|
177
|
+
```ruby
|
178
|
+
# the first layer has 4 neurons, 3 inputs
|
179
|
+
n.layers[0].neurons.size == 4 #=> true
|
180
|
+
n.layers[0].neurons[0].weights.size == 3 #=> true
|
181
|
+
|
182
|
+
# next layer has 4 neurons, 4 inputs
|
183
|
+
n.layers[1].neurons.size == 4 #=> true
|
184
|
+
n.layers[1].neurons[0].weights.size == 4 #=> true
|
185
|
+
|
186
|
+
# final layer has 1 neuron, 4 inputs
|
187
|
+
n.layers[2].neurons.size == 1 #=> true
|
188
|
+
n.layers[2].neurons[0].weights.size == 4 #=> true
|
189
|
+
|
190
|
+
# send 0 to each input
|
191
|
+
output = n.apply(0)
|
192
|
+
|
193
|
+
# returns an output value corresponding to the output neuron
|
194
|
+
# output is positive to due to ReLU
|
195
|
+
output.value >= 0 #=> true
|
196
|
+
|
197
|
+
puts output
|
198
|
+
#=> 0.045
|
199
|
+
```
|
200
|
+
|
201
|
+
## Gradient Descent
|
202
|
+
|
203
|
+
Loop:
|
204
|
+
|
205
|
+
1. Backward propagate the gradients
|
206
|
+
(derivatives for each value with respect to the output value)
|
207
|
+
2. Adjust all weights slightly, according to their gradients.
|
208
|
+
3. Run the network forward to generate a new output.
|
209
|
+
The loss should be smaller.
|
210
|
+
The new output should be closer to the desired output.
|
211
|
+
|
212
|
+
## Further Reading
|
213
|
+
|
214
|
+
* [demo/loss.rb](demo/loss.rb)
|
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'rake/testtask'
|
2
|
+
|
3
|
+
Rake::TestTask.new :test do |t|
|
4
|
+
t.pattern = "test/*.rb"
|
5
|
+
t.warning = true
|
6
|
+
end
|
7
|
+
|
8
|
+
#
|
9
|
+
# GEM BUILD / PUBLISH
|
10
|
+
#
|
11
|
+
|
12
|
+
begin
|
13
|
+
require 'buildar'
|
14
|
+
|
15
|
+
Buildar.new do |b|
|
16
|
+
b.gemspec_file = 'backprop.gemspec'
|
17
|
+
b.version_file = 'VERSION'
|
18
|
+
b.use_git = true
|
19
|
+
end
|
20
|
+
rescue LoadError
|
21
|
+
warn "buildar tasks unavailable"
|
22
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.0.1
|
data/backprop.gemspec
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'backprop'
|
3
|
+
s.summary = "WIP"
|
4
|
+
s.description = "WIP"
|
5
|
+
s.authors = ["Rick Hull"]
|
6
|
+
s.homepage = "https://github.com/rickhull/backprop"
|
7
|
+
s.license = "LGPL-3.0"
|
8
|
+
|
9
|
+
s.required_ruby_version = "> 2"
|
10
|
+
|
11
|
+
s.version = File.read(File.join(__dir__, 'VERSION')).chomp
|
12
|
+
|
13
|
+
s.files = %w[backprop.gemspec VERSION README.md Rakefile]
|
14
|
+
s.files += Dir['lib/**/*.rb']
|
15
|
+
s.files += Dir['test/**/*.rb']
|
16
|
+
s.files += Dir['demo/**/*.rb']
|
17
|
+
end
|
data/demo/celsius.rb
ADDED
data/demo/lol.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'backprop'
|
2
|
+
|
3
|
+
include BackProp
|
4
|
+
|
5
|
+
a = Value.new(2, label: :a)
|
6
|
+
b = Value.new(-3, label: :b)
|
7
|
+
c = Value.new(10, label: :c)
|
8
|
+
e = a * b; e.label = :e
|
9
|
+
d = e + c; d.label = :d
|
10
|
+
f = Value.new(-2, label: :f)
|
11
|
+
l = d * f; l.label = :L
|
12
|
+
|
13
|
+
puts "Setup:"
|
14
|
+
p l
|
15
|
+
puts
|
16
|
+
|
17
|
+
|
18
|
+
puts "Calculate gradient by hand:"
|
19
|
+
|
20
|
+
l.gradient = 1.0
|
21
|
+
|
22
|
+
# l = d * f; derivative dl/dd = f; dl/df = d
|
23
|
+
f.gradient = d.value
|
24
|
+
d.gradient = f.value
|
25
|
+
|
26
|
+
|
27
|
+
# now c.gradient
|
28
|
+
# that is dL/dc
|
29
|
+
|
30
|
+
# dL/dd is -2
|
31
|
+
# dd/dc is 1
|
32
|
+
# by chain rule (multiply): dL/dc is -2 * 1 = -2
|
33
|
+
|
34
|
+
c.gradient = d.gradient * l.gradient
|
35
|
+
e.gradient = d.gradient * l.gradient
|
36
|
+
|
37
|
+
# now b.gradient (and a.gradient)
|
38
|
+
# e = a * b
|
39
|
+
|
40
|
+
# dL/da = dL/de * de/da
|
41
|
+
a.gradient = e.gradient * b.value
|
42
|
+
b.gradient = e.gradient * a.value
|
43
|
+
|
44
|
+
p l
|
45
|
+
puts
|
46
|
+
|
47
|
+
puts "Reset gradients"
|
48
|
+
l.reset_gradient
|
49
|
+
p l
|
50
|
+
puts
|
51
|
+
|
52
|
+
puts "Calculate gradient via backward:"
|
53
|
+
|
54
|
+
l.backward
|
55
|
+
|
56
|
+
p l
|
data/demo/loss.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'perceptron'
|
2
|
+
|
3
|
+
include BackProp
|
4
|
+
|
5
|
+
num_inputs = 3
|
6
|
+
num_examples = 6
|
7
|
+
net_structure = [4, 4, 1]
|
8
|
+
gradient_step = 0.1
|
9
|
+
iterations = 999
|
10
|
+
afn = [:tanh, :sigmoid, :relu].sample
|
11
|
+
|
12
|
+
# binary classifier; 9 sets of inputs that map to 1 or 0
|
13
|
+
inputs = BackProp.rand_inputs(num_inputs, num_examples, (-1.0..1.0))
|
14
|
+
outputs = BackProp.rand_outputs(num_examples, 2)
|
15
|
+
predictions = []
|
16
|
+
|
17
|
+
n = MLP.new(num_inputs, net_structure, activation: afn)
|
18
|
+
|
19
|
+
puts "Training Cases:"
|
20
|
+
inputs.each.with_index { |input, i|
|
21
|
+
puts format("%s = %s", input.join(', '), outputs[i].value.inspect)
|
22
|
+
}
|
23
|
+
puts
|
24
|
+
|
25
|
+
puts "Neural Net:"
|
26
|
+
puts n
|
27
|
+
puts
|
28
|
+
|
29
|
+
puts "Press Enter to continue"
|
30
|
+
gets
|
31
|
+
|
32
|
+
999.times { |i|
|
33
|
+
# 1. apply inputs to the net to yield predictions
|
34
|
+
# 2. calculate the loss
|
35
|
+
# 3. backward propagate the gradients
|
36
|
+
# 4. adjust every neuron in the direction of minimizing loss
|
37
|
+
|
38
|
+
# 1. apply inputs
|
39
|
+
predictions = inputs.map { |input| n.apply(input).first }
|
40
|
+
|
41
|
+
# 2. calculate loss
|
42
|
+
loss = BackProp.mean_squared_error(outputs, predictions)
|
43
|
+
puts loss
|
44
|
+
|
45
|
+
# 3. propagate the derivatives (gradients) backwards
|
46
|
+
loss.backward
|
47
|
+
|
48
|
+
# output every so often
|
49
|
+
if i % 100 == 0
|
50
|
+
p outputs.map(&:value)
|
51
|
+
p predictions.map(&:value)
|
52
|
+
puts
|
53
|
+
p n
|
54
|
+
gets
|
55
|
+
end
|
56
|
+
|
57
|
+
# 4. adjust all weights and biases towards minimizing loss function
|
58
|
+
n.descend(gradient_step)
|
59
|
+
}
|
60
|
+
|
61
|
+
p outputs.map(&:value)
|
62
|
+
p predictions.map(&:value)
|
63
|
+
puts n
|
64
|
+
p n
|
data/demo/neuron.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'backprop'
|
2
|
+
|
3
|
+
include BackProp
|
4
|
+
|
5
|
+
# inputs x1, x2
|
6
|
+
x1 = Value.new(2, label: :x1)
|
7
|
+
x2 = Value.new(0, label: :x2)
|
8
|
+
|
9
|
+
# weights w1, w2
|
10
|
+
w1 = Value.new(-3, label: :w1)
|
11
|
+
w2 = Value.new(1, label: :w2)
|
12
|
+
|
13
|
+
# neuron bias
|
14
|
+
b = Value.new(6.8813735870195432, label: :b)
|
15
|
+
|
16
|
+
xw1 = x1*w1; xw1.label = :xw1
|
17
|
+
xw2 = x2*w2; xw2.label = :xw2
|
18
|
+
|
19
|
+
sum = xw1 + xw2; sum.label = :sum
|
20
|
+
n = sum + b; n.label = :n
|
21
|
+
|
22
|
+
o = n.tanh; o.label = :o
|
23
|
+
|
24
|
+
puts "Calculate gradient by hand:"
|
25
|
+
o.gradient = 1
|
26
|
+
|
27
|
+
# do/dn
|
28
|
+
# d/dx tanh x = 1 - tanh(x)^2
|
29
|
+
|
30
|
+
# 1 - o**2
|
31
|
+
|
32
|
+
n.gradient = 1 - o.value ** 2
|
33
|
+
|
34
|
+
# n = sum + b
|
35
|
+
sum.gradient = n.gradient
|
36
|
+
b.gradient = n.gradient
|
37
|
+
|
38
|
+
# sum = xw1 + xw2
|
39
|
+
xw1.gradient = sum.gradient
|
40
|
+
xw2.gradient = sum.gradient
|
41
|
+
|
42
|
+
# xw1 = x1 * w1
|
43
|
+
x1.gradient = xw1.gradient * w1.value
|
44
|
+
w1.gradient = xw1.gradient * x1.value
|
45
|
+
|
46
|
+
# xw2 = x2 * w2
|
47
|
+
x2.gradient = xw2.gradient * w2.value
|
48
|
+
w2.gradient = xw2.gradient * x2.value
|
49
|
+
|
50
|
+
p o
|
51
|
+
puts
|
52
|
+
|
53
|
+
puts "Reset gradient:"
|
54
|
+
o.reset_gradient
|
55
|
+
p o
|
56
|
+
puts
|
57
|
+
|
58
|
+
puts "Calculate gradient via backprop:"
|
59
|
+
o.backward
|
60
|
+
p o
|
61
|
+
puts
|
data/lib/backprop.rb
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
module BackProp
|
2
|
+
class Value
|
3
|
+
def self.wrap(other)
|
4
|
+
other.is_a?(Value) ? other : Value.new(other)
|
5
|
+
end
|
6
|
+
|
7
|
+
attr_reader :children
|
8
|
+
attr_accessor :value, :label, :gradient, :backstep, :op
|
9
|
+
|
10
|
+
def initialize(float, label: '', op: nil, children: [])
|
11
|
+
@value = float.to_f
|
12
|
+
@gradient = 0
|
13
|
+
@children = children
|
14
|
+
if @children.empty?
|
15
|
+
raise "op #{op.inspect} has no children" unless op.nil?
|
16
|
+
else
|
17
|
+
raise "op is required" if op.nil?
|
18
|
+
end
|
19
|
+
@op = op
|
20
|
+
@label = label
|
21
|
+
@backstep = -> {}
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
@label.empty? ? ("%.3f" % @value) : format("%s=%.3f", @label, @value)
|
26
|
+
end
|
27
|
+
|
28
|
+
def display
|
29
|
+
format("%s(%.3f gradient=%.3f",
|
30
|
+
@label.empty? ? @op || 'Value' : @label, @value, @gradient) +
|
31
|
+
(@op.nil? ? '' :
|
32
|
+
format(" %s(%s)", @op, @children.join(', '))) + ')'
|
33
|
+
end
|
34
|
+
|
35
|
+
def inspect
|
36
|
+
@children.empty? ? self.display :
|
37
|
+
[self.display, @children.map(&:inspect).join("\n\t")].join("\n\t")
|
38
|
+
end
|
39
|
+
|
40
|
+
#
|
41
|
+
# Primary operations; notice every Value.new(op:) also defines a backstep
|
42
|
+
# The backstep closes over the environment of the method so it can
|
43
|
+
# refer to values present when the method executes
|
44
|
+
#
|
45
|
+
|
46
|
+
def +(other)
|
47
|
+
other = Value.wrap(other)
|
48
|
+
val = Value.new(@value + other.value, children: [self, other], op: :+)
|
49
|
+
val.backstep = -> {
|
50
|
+
# gradients accumulate to handle a value used multiple times
|
51
|
+
self.gradient += val.gradient
|
52
|
+
other.gradient += val.gradient
|
53
|
+
}
|
54
|
+
val
|
55
|
+
end
|
56
|
+
|
57
|
+
def *(other)
|
58
|
+
other = Value.wrap(other)
|
59
|
+
val = Value.new(@value * other.value, children: [self, other], op: :*)
|
60
|
+
val.backstep = -> {
|
61
|
+
self.gradient += val.gradient * other.value
|
62
|
+
other.gradient += val.gradient * self.value
|
63
|
+
}
|
64
|
+
val
|
65
|
+
end
|
66
|
+
|
67
|
+
# Mostly we are squaring(2) or dividing(-1)
|
68
|
+
def **(other)
|
69
|
+
raise("Value is not supported") if other.is_a? Value
|
70
|
+
val = Value.new(@value ** other, children: [self], op: :**)
|
71
|
+
val.backstep = -> {
|
72
|
+
self.gradient += val.gradient * (other * self.value ** (other - 1))
|
73
|
+
}
|
74
|
+
val
|
75
|
+
end
|
76
|
+
|
77
|
+
def exp
|
78
|
+
val = Value.new(Math.exp(@value), children: [self], op: :exp)
|
79
|
+
val.backstep = -> {
|
80
|
+
self.gradient += val.gradient * val.value
|
81
|
+
}
|
82
|
+
val
|
83
|
+
end
|
84
|
+
|
85
|
+
#
|
86
|
+
# Secondary operations defined in terms of primary
|
87
|
+
#
|
88
|
+
|
89
|
+
def -(other)
|
90
|
+
self + (Value.wrap(other) * Value.new(-1))
|
91
|
+
end
|
92
|
+
|
93
|
+
def /(other)
|
94
|
+
self * (Value.wrap(other) ** -1)
|
95
|
+
end
|
96
|
+
|
97
|
+
#
|
98
|
+
# Activation functions
|
99
|
+
#
|
100
|
+
|
101
|
+
def tanh
|
102
|
+
val = Value.new(Math.tanh(@value), children: [self], op: :tanh)
|
103
|
+
val.backstep = -> {
|
104
|
+
self.gradient += val.gradient * (1 - val.value ** 2)
|
105
|
+
}
|
106
|
+
val
|
107
|
+
end
|
108
|
+
|
109
|
+
# 1 / 1 + e^-x
|
110
|
+
def sigmoid
|
111
|
+
((self * -1).exp + 1) ** -1
|
112
|
+
end
|
113
|
+
|
114
|
+
# rectified linear unit; not susceptible to vanishing gradient like above
|
115
|
+
def relu
|
116
|
+
neg = @value < 0
|
117
|
+
val = Value.new(neg ? 0 : @value, children: [self], op: :relu)
|
118
|
+
val.backstep = -> {
|
119
|
+
self.gradient += val.gradient * (neg ? 0 : 1)
|
120
|
+
}
|
121
|
+
val
|
122
|
+
end
|
123
|
+
|
124
|
+
#
|
125
|
+
# Backward propagation
|
126
|
+
#
|
127
|
+
|
128
|
+
def backward
|
129
|
+
self.reset_gradient
|
130
|
+
@gradient = 1.0
|
131
|
+
self.backprop
|
132
|
+
end
|
133
|
+
|
134
|
+
def reset_gradient
|
135
|
+
@gradient = 0.0
|
136
|
+
@children.each(&:reset_gradient)
|
137
|
+
self
|
138
|
+
end
|
139
|
+
|
140
|
+
def backprop
|
141
|
+
self.backstep.call
|
142
|
+
@children.each(&:backprop)
|
143
|
+
self
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
data/lib/perceptron.rb
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'backprop'
|
2
|
+
|
3
|
+
module BackProp
|
4
|
+
class Neuron
|
5
|
+
# available activation functions for Value objects
|
6
|
+
ACTIVATION = {
|
7
|
+
tanh: :tanh,
|
8
|
+
sigmoid: :sigmoid,
|
9
|
+
relu: :relu,
|
10
|
+
}
|
11
|
+
|
12
|
+
attr_reader :weights, :bias, :activation
|
13
|
+
|
14
|
+
def initialize(input_count, activation: :relu)
|
15
|
+
@weights = Array.new(input_count) { Value.new(rand(-1.0..1.0)) }
|
16
|
+
@bias = Value.new(rand(-1.0..1.0))
|
17
|
+
@activation = ACTIVATION.fetch(activation)
|
18
|
+
end
|
19
|
+
|
20
|
+
def apply(x = 0)
|
21
|
+
x = Array.new(@weights.size) { x } if !x.is_a? Enumerable
|
22
|
+
sum = @weights.map.with_index { |w, i|
|
23
|
+
w * x[i]
|
24
|
+
}.inject(Value.new(0)) { |memo, val| memo + val } + @bias
|
25
|
+
sum.send(@activation)
|
26
|
+
end
|
27
|
+
|
28
|
+
def descend(step_size)
|
29
|
+
(@weights + [@bias]).each { |p|
|
30
|
+
p.value += (-1 * step_size * p.gradient)
|
31
|
+
}
|
32
|
+
self
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_s
|
36
|
+
format("N(%s)\t(%s %s)", @weights.join(', '), @bias, @activation)
|
37
|
+
end
|
38
|
+
|
39
|
+
def inspect
|
40
|
+
fmt = "% .3f|% .3f"
|
41
|
+
@weights.map { |w| format(fmt, w.value, w.gradient) }.join("\t") +
|
42
|
+
"\t" + format(fmt, @bias.value, @bias.gradient)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
class Layer
|
47
|
+
attr_reader :neurons
|
48
|
+
|
49
|
+
def initialize(input_count, output_count, activation: :relu)
|
50
|
+
@neurons = Array.new(output_count) {
|
51
|
+
Neuron.new(input_count, activation: activation)
|
52
|
+
}
|
53
|
+
end
|
54
|
+
|
55
|
+
def apply(x = 0)
|
56
|
+
@neurons.map { |n| n.apply(x) }
|
57
|
+
end
|
58
|
+
|
59
|
+
def descend(step_size)
|
60
|
+
@neurons.each { |n| n.descend(step_size) }
|
61
|
+
self
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_s
|
65
|
+
@neurons.join("\n")
|
66
|
+
end
|
67
|
+
|
68
|
+
def inspect
|
69
|
+
@neurons.map(&:inspect).join("\n")
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
class MLP
|
74
|
+
attr_reader :layers
|
75
|
+
|
76
|
+
# MLP.new(3, [4, 4, 1])
|
77
|
+
def initialize(input_count, output_counts, activation: :relu)
|
78
|
+
flat = [input_count, *output_counts]
|
79
|
+
@layers = output_counts.map.with_index { |oc, i|
|
80
|
+
Layer.new(flat[i], flat[i+1], activation: activation)
|
81
|
+
}
|
82
|
+
end
|
83
|
+
|
84
|
+
def apply(x = 0)
|
85
|
+
@layers.each { |layer| x = layer.apply(x) }
|
86
|
+
# x.size == 1 ? x.first : x
|
87
|
+
x
|
88
|
+
end
|
89
|
+
|
90
|
+
def descend(step_size)
|
91
|
+
@layers.each { |l| l.descend(step_size) }
|
92
|
+
self
|
93
|
+
end
|
94
|
+
|
95
|
+
def to_s
|
96
|
+
@layers.join("\n\n")
|
97
|
+
end
|
98
|
+
|
99
|
+
def inspect
|
100
|
+
@layers.map(&:inspect).join("\n\n")
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def self.mean_squared_error(a1, a2)
|
105
|
+
a1.map.with_index { |a, i|
|
106
|
+
(a - a2[i]) ** 2
|
107
|
+
}.inject(Value.new(0)) { |memo, val| memo + val } / a1.size
|
108
|
+
end
|
109
|
+
|
110
|
+
def self.rand_inputs(num_inputs, num_examples, rand_arg)
|
111
|
+
Array.new(num_examples) {
|
112
|
+
Array.new(num_inputs) { Value.new rand(rand_arg) }
|
113
|
+
}
|
114
|
+
end
|
115
|
+
|
116
|
+
def self.rand_outputs(num_examples, rand_arg)
|
117
|
+
Array.new(num_examples) { Value.new rand(rand_arg) }
|
118
|
+
end
|
119
|
+
end
|
data/test/backprop.rb
ADDED
@@ -0,0 +1,202 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'backprop'
|
3
|
+
|
4
|
+
include BackProp
|
5
|
+
|
6
|
+
describe Value do
|
7
|
+
describe "basics" do
|
8
|
+
before do
|
9
|
+
@flt = 2.3
|
10
|
+
@v = Value.new(2.3)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "wraps numeric values, primarily floats" do
|
14
|
+
expect(@v).must_be_kind_of Value
|
15
|
+
expect(@v.value).must_be_kind_of Float
|
16
|
+
expect(@v.value).must_equal @flt
|
17
|
+
end
|
18
|
+
|
19
|
+
it "has several string representations" do
|
20
|
+
expect(@v.to_s).must_be_kind_of String
|
21
|
+
expect(@v.display).must_be_kind_of String
|
22
|
+
expect(@v.inspect).must_be_kind_of String
|
23
|
+
end
|
24
|
+
|
25
|
+
it "creates a tree structure when joined by an operator" do
|
26
|
+
expect(@v.children).must_be_empty
|
27
|
+
sum = @v + 3
|
28
|
+
expect(sum).must_be_kind_of Value
|
29
|
+
expect(sum.children).wont_be_empty
|
30
|
+
expect(sum.op).must_equal :+
|
31
|
+
expect(sum.value).must_be_within_epsilon(@flt + 3)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "keeps track of a gradient value, initialized to zero" do
|
35
|
+
expect(@v.gradient).must_equal 0
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "operations" do
|
40
|
+
it "updates the gradient value when used in a calculation" do
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
describe "addition" do
|
45
|
+
before do
|
46
|
+
@a = Value.new(1.0)
|
47
|
+
@b = Value.new(2.0)
|
48
|
+
@sum = @a + @b
|
49
|
+
end
|
50
|
+
|
51
|
+
it "yields a Value" do
|
52
|
+
expect(@sum).must_be_kind_of Value
|
53
|
+
expect(@sum.value).must_be_within_epsilon 3.0
|
54
|
+
end
|
55
|
+
|
56
|
+
it "has a sum parent with _a_ and _b_ as children" do
|
57
|
+
expect(@sum.children).must_include @a
|
58
|
+
expect(@sum.children).must_include @b
|
59
|
+
expect(@sum.op).must_equal :+
|
60
|
+
end
|
61
|
+
|
62
|
+
it "updates child gradients upon back propagation" do
|
63
|
+
expect(@a.gradient).must_equal 0
|
64
|
+
expect(@b.gradient).must_equal 0
|
65
|
+
expect(@sum.gradient).must_equal 0
|
66
|
+
|
67
|
+
@sum.backward
|
68
|
+
expect(@sum.gradient).must_equal 1 # by definition
|
69
|
+
expect(@a.gradient).must_equal 1 # via chain rule for addition
|
70
|
+
expect(@b.gradient).must_equal 1 # via chain rule for addition
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
describe "multiplication" do
|
75
|
+
before do
|
76
|
+
@a = Value.new(-1)
|
77
|
+
@b = Value.new(2.5)
|
78
|
+
@prod = @a * @b
|
79
|
+
end
|
80
|
+
|
81
|
+
it "yields a Value" do
|
82
|
+
expect(@prod).must_be_kind_of Value
|
83
|
+
expect(@prod.value).must_be_within_epsilon(-2.5)
|
84
|
+
end
|
85
|
+
|
86
|
+
it "has a prod parent with _a_ and _b_ and children" do
|
87
|
+
expect(@prod.children).must_include @a
|
88
|
+
expect(@prod.children).must_include @a
|
89
|
+
expect(@prod.op).must_equal :*
|
90
|
+
end
|
91
|
+
|
92
|
+
it "updates child gradients upon back propagation" do
|
93
|
+
expect(@a.gradient).must_equal 0
|
94
|
+
expect(@b.gradient).must_equal 0
|
95
|
+
expect(@prod.gradient).must_equal 0
|
96
|
+
|
97
|
+
@prod.backward
|
98
|
+
expect(@prod.gradient).must_equal 1
|
99
|
+
expect(@a.gradient).must_equal @b.value # via chain rule
|
100
|
+
expect(@b.gradient).must_equal @a.value # via chain rule
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
describe "subtraction" do
|
105
|
+
before do
|
106
|
+
@a = Value.new(10)
|
107
|
+
@b = Value.new(4)
|
108
|
+
@diff = @a - @b
|
109
|
+
end
|
110
|
+
|
111
|
+
it "combines addition with multiplication for negation" do
|
112
|
+
# @a + @b * -1
|
113
|
+
expect(@diff.value).must_be_within_epsilon 6.0
|
114
|
+
expect(@diff.op).wont_equal :-
|
115
|
+
expect(@diff.op).must_equal :+
|
116
|
+
expect(@diff.children).must_include @a
|
117
|
+
expect(@diff.children).wont_include @b
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
describe "pow" do
|
122
|
+
before do
|
123
|
+
@a = Value.new 2
|
124
|
+
@b = 10
|
125
|
+
@pow = @a ** @b
|
126
|
+
end
|
127
|
+
|
128
|
+
it "does not work with right-side Values" do
|
129
|
+
expect { @a ** Value.new(3) }.must_raise
|
130
|
+
end
|
131
|
+
|
132
|
+
it "yields a Value" do
|
133
|
+
expect(@pow).must_be_kind_of Value
|
134
|
+
expect(@pow.value).must_be_within_epsilon 1024.0
|
135
|
+
end
|
136
|
+
|
137
|
+
it "has a pow parent without _b_ in children" do
|
138
|
+
expect(@pow.children).must_include @a
|
139
|
+
expect(@pow.children).wont_include @b
|
140
|
+
expect(@pow.op).must_equal :**
|
141
|
+
end
|
142
|
+
|
143
|
+
it "updates child gradient upon back propagation" do
|
144
|
+
expect(@a.gradient).must_equal 0
|
145
|
+
expect(@pow.gradient).must_equal 0
|
146
|
+
|
147
|
+
@pow.backward
|
148
|
+
expect(@pow.gradient).must_equal 1
|
149
|
+
expect(@a.gradient).must_be_within_epsilon @b * @a.value ** (@b - 1)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
describe "division" do
|
154
|
+
before do
|
155
|
+
@a = Value.new 19.1
|
156
|
+
@b = Value.new 2.3
|
157
|
+
@quot = @a / @b
|
158
|
+
end
|
159
|
+
|
160
|
+
it "uses pow(-1)" do
|
161
|
+
# @a * @b ** -1
|
162
|
+
expect(@quot.value).must_be_within_epsilon(19.1 / 2.3)
|
163
|
+
expect(@quot.op).wont_equal :/
|
164
|
+
expect(@quot.op).must_equal :*
|
165
|
+
expect(@quot.children).must_include @a
|
166
|
+
expect(@quot.children).wont_include @b
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
describe "exp" do
|
171
|
+
before do
|
172
|
+
@a = Value.new 2.4
|
173
|
+
@exp = @a.exp
|
174
|
+
end
|
175
|
+
|
176
|
+
it "yields a Value" do
|
177
|
+
expect(@exp).must_be_kind_of Value
|
178
|
+
expect(@exp.value).must_be_within_epsilon Math.exp(2.4)
|
179
|
+
end
|
180
|
+
|
181
|
+
it "has exp parent with _a_ in children" do
|
182
|
+
expect(@exp.children).must_include @a
|
183
|
+
expect(@exp.op).must_equal :exp
|
184
|
+
end
|
185
|
+
|
186
|
+
it "updates child gradient upon back propagation" do
|
187
|
+
expect(@a.gradient).must_equal 0
|
188
|
+
expect(@exp.gradient).must_equal 0
|
189
|
+
|
190
|
+
@exp.backward
|
191
|
+
expect(@exp.gradient).must_equal 1
|
192
|
+
expect(@a.gradient).must_equal @exp.value # chain rule / derivative
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
describe "activation functions" do
|
198
|
+
end
|
199
|
+
|
200
|
+
describe "backward propagation" do
|
201
|
+
end
|
202
|
+
end
|
metadata
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: backprop
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Rick Hull
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 1980-01-01 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: WIP
|
14
|
+
email:
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- README.md
|
20
|
+
- Rakefile
|
21
|
+
- VERSION
|
22
|
+
- backprop.gemspec
|
23
|
+
- demo/celsius.rb
|
24
|
+
- demo/lol.rb
|
25
|
+
- demo/loss.rb
|
26
|
+
- demo/neuron.rb
|
27
|
+
- lib/backprop.rb
|
28
|
+
- lib/perceptron.rb
|
29
|
+
- test/backprop.rb
|
30
|
+
homepage: https://github.com/rickhull/backprop
|
31
|
+
licenses:
|
32
|
+
- LGPL-3.0
|
33
|
+
metadata: {}
|
34
|
+
post_install_message:
|
35
|
+
rdoc_options: []
|
36
|
+
require_paths:
|
37
|
+
- lib
|
38
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ">"
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '2'
|
43
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
requirements: []
|
49
|
+
rubygems_version: 3.2.26
|
50
|
+
signing_key:
|
51
|
+
specification_version: 4
|
52
|
+
summary: WIP
|
53
|
+
test_files: []
|