backprop 0.0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +214 -0
- data/Rakefile +22 -0
- data/VERSION +1 -0
- data/backprop.gemspec +17 -0
- data/demo/celsius.rb +11 -0
- data/demo/lol.rb +56 -0
- data/demo/loss.rb +64 -0
- data/demo/neuron.rb +61 -0
- data/lib/backprop.rb +146 -0
- data/lib/perceptron.rb +119 -0
- data/test/backprop.rb +202 -0
- metadata +53 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 30d4ab63e0502df289e6e648ad5c04c5d0ffe4b29a0bd5fce2053809c4879ddd
|
4
|
+
data.tar.gz: 943142da82fb2a4fd4adad13f55a1fe1f1e1713e71128a5c02c48887cc7aa4cb
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fedcb937e83efec000f8cc944e53b8cf8e61eb5e2ea4eccb60d4431ae230e9a7607adf4172a61cc14e32bfa41344538439c3538d5f4f52b2936df6ddf70827c1
|
7
|
+
data.tar.gz: 8d75ed673305ba9213840974a314caef65029156a23f56040dd99035166b8bb357fc44adff9f79a640d343fd84788a98422d7099b49e651016fa18aa48385108
|
data/README.md
ADDED
@@ -0,0 +1,214 @@
|
|
1
|
+
# Backward Propagation
|
2
|
+
|
3
|
+
This is a reimplementation of Andrej Karpathy's
|
4
|
+
[micrograd](https://github.com/karpathy/micrograd) in Ruby.
|
5
|
+
It has been further simplified and some liberties have been taken with naming.
|
6
|
+
|
7
|
+
# Rationale
|
8
|
+
|
9
|
+
This can be used to train neural nets, typically to minimize a loss function.
|
10
|
+
An efficient way to do this is via gradient descent.
|
11
|
+
Mathematical derivatives and the chain rule from calculus are used to determine
|
12
|
+
inputs with the greatest influence on the output.
|
13
|
+
The inputs are manipulated to minimize the output, represented as the loss
|
14
|
+
function.
|
15
|
+
That is, the output of the neural net is a prediction.
|
16
|
+
The error or loss (prediction compared to the ideal, or known output) is
|
17
|
+
computed for a variety of cases, and the network weights are adjusted to
|
18
|
+
better match the desired output.
|
19
|
+
The smallest loss implies the best performance at a given objective.
|
20
|
+
|
21
|
+
# Examples
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
require 'backprop'
|
25
|
+
|
26
|
+
include BackProp
|
27
|
+
|
28
|
+
# F = ma
|
29
|
+
|
30
|
+
mass = Value.new(25, label: 'mass')
|
31
|
+
acc = Value.new(10, label: 'acc')
|
32
|
+
force = mass * acc
|
33
|
+
force.label = 'force'
|
34
|
+
p force
|
35
|
+
```
|
36
|
+
|
37
|
+
```
|
38
|
+
force(value=250 gradient=0 *(mass=25, acc=10))
|
39
|
+
mass(value=25 gradient=0)
|
40
|
+
acc(value=10 gradient=0)
|
41
|
+
```
|
42
|
+
|
43
|
+
Use backward propagation to determine the gradient (derivative with respect
|
44
|
+
to the caller of `#backward`) for each Value:
|
45
|
+
|
46
|
+
```ruby
|
47
|
+
force.backward
|
48
|
+
p force
|
49
|
+
```
|
50
|
+
|
51
|
+
```
|
52
|
+
force(value=250 gradient=1.0 *(mass=25, acc=10))
|
53
|
+
mass(value=25 gradient=10.0)
|
54
|
+
acc(value=10 gradient=25.0)
|
55
|
+
```
|
56
|
+
|
57
|
+
The gradients have been updated, and the output gradient is 1.0.
|
58
|
+
We have a tree structure, where our inputs, mass and acceleration, are
|
59
|
+
leaf nodes, and they combine via multiplication to make a parent node, or
|
60
|
+
root node in this case, force.
|
61
|
+
By wrapping our numbers in the Value class, whenever we calculate a result,
|
62
|
+
we have a tree structure representing that expression, and we can easily
|
63
|
+
calculate derivatives for every node in the tree.
|
64
|
+
|
65
|
+
# Neural Networks
|
66
|
+
|
67
|
+
## Neuron
|
68
|
+
|
69
|
+
A neuron has a number of inputs which it combines to yield a single output.
|
70
|
+
Traditionally, each input has a weight, and the neuron itself has a bias, or
|
71
|
+
a fixed amount which is added to each input when considering the output.
|
72
|
+
Sum each input value times its input weight, add the bias, and apply an
|
73
|
+
*activation function* which "normalizes" the output to a predictable value,
|
74
|
+
typically between -1.0 and 1.0.
|
75
|
+
In other words, if you send the right combination of signals, you can get the
|
76
|
+
neuron to "fire".
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
require 'perceptron'
|
80
|
+
|
81
|
+
include BackProp
|
82
|
+
|
83
|
+
# create a new neuron with 3 inputs; initial weights and bias are random
|
84
|
+
n = Neuron.new(3)
|
85
|
+
|
86
|
+
puts n
|
87
|
+
#=> N(-0.098, 1.000, 0.064) (0.468 relu)
|
88
|
+
|
89
|
+
p n
|
90
|
+
#=> -0.098| 0.000 1.000| 0.000 0.064| 0.000 0.468| 0.000
|
91
|
+
|
92
|
+
# send 0 to each input
|
93
|
+
output = n.apply(0)
|
94
|
+
|
95
|
+
puts output
|
96
|
+
#=> 0.468
|
97
|
+
|
98
|
+
# output is positive due to rectified linear unit (ReLU) activation function
|
99
|
+
output.value >= 0 #=> true
|
100
|
+
|
101
|
+
# if bias is positive, zero input should result in bias
|
102
|
+
(n.bias.value >= 0) ? (output.value == n.bias) : (output.value == 0) #=> true
|
103
|
+
```
|
104
|
+
|
105
|
+
## Layer
|
106
|
+
|
107
|
+
A layer is composed of several neurons.
|
108
|
+
Each neuron has the same number of inputs, so the layer has just a single
|
109
|
+
number of inputs.
|
110
|
+
Each input is sent to each neuron in the layer.
|
111
|
+
If one layer is to feed into another, then the other layer's neurons must have
|
112
|
+
an input count that matches the one layer's neuron count.
|
113
|
+
|
114
|
+
```ruby
|
115
|
+
require 'perceptron'
|
116
|
+
|
117
|
+
include BackProp
|
118
|
+
|
119
|
+
# create a new layer of 4 neurons with 3 inputs
|
120
|
+
l = Layer.new(3, 4)
|
121
|
+
|
122
|
+
puts l
|
123
|
+
```
|
124
|
+
|
125
|
+
```
|
126
|
+
N(0.957, 0.650, 0.995) (-0.530 relu)
|
127
|
+
N(-0.482, 0.272, -0.467) (0.905 relu)
|
128
|
+
N(-0.083, -0.519, -0.921) (-0.811 relu)
|
129
|
+
N(-0.369, -0.688, -0.097) (0.122 relu)
|
130
|
+
```
|
131
|
+
|
132
|
+
```ruby
|
133
|
+
# send 0 to each input
|
134
|
+
output = l.apply(0)
|
135
|
+
|
136
|
+
# returns an array of outputs, one for each neuron
|
137
|
+
output.size == 4 #=> true
|
138
|
+
|
139
|
+
puts output.map(&:value).join(', ')
|
140
|
+
#=> 0.0, 0.90522363833711, 0.0, 0.12226124806686789
|
141
|
+
```
|
142
|
+
|
143
|
+
## Multilayer Perceptron (MLP)
|
144
|
+
|
145
|
+
First, define a number of inputs. Say 5 inputs, like temperature, etc.
|
146
|
+
Often we want a single output, which is the simple case.
|
147
|
+
Multiple outputs are possible but more complicated.
|
148
|
+
A single output could represent the recommended setting on a thermostat.
|
149
|
+
We can define multiple layers of neurons for our neural net which will feed
|
150
|
+
on inputs and yield outputs.
|
151
|
+
|
152
|
+
```ruby
|
153
|
+
require 'perceptron'
|
154
|
+
|
155
|
+
include BackProp
|
156
|
+
|
157
|
+
# create a network with 3 inputs, 2 layers of 4 neurons, and one output neuron
|
158
|
+
n = MLP.new(3, [4, 4, 1])
|
159
|
+
|
160
|
+
puts n
|
161
|
+
```
|
162
|
+
|
163
|
+
```
|
164
|
+
N(0.660, 0.250, -0.387) (-0.677 relu)
|
165
|
+
N(0.931, 0.202, 0.596) (0.861 relu)
|
166
|
+
N(0.101, 0.611, 0.885) (-0.295 relu)
|
167
|
+
N(-0.858, 0.136, 0.091) (-0.309 relu)
|
168
|
+
|
169
|
+
N(-0.594, 0.178, 0.484, -0.208) (0.515 relu)
|
170
|
+
N(-0.295, -0.899, 0.437, -0.812) (-0.200 relu)
|
171
|
+
N(-0.478, 0.230, -0.971, 0.897) (-0.858 relu)
|
172
|
+
N(0.636, 0.719, -0.857, -0.546) (-0.338 relu)
|
173
|
+
|
174
|
+
N(0.962, 0.529, 0.475, -0.837) (-0.362 relu)
|
175
|
+
```
|
176
|
+
|
177
|
+
```ruby
|
178
|
+
# the first layer has 4 neurons, 3 inputs
|
179
|
+
n.layers[0].neurons.size == 4 #=> true
|
180
|
+
n.layers[0].neurons[0].weights.size == 3 #=> true
|
181
|
+
|
182
|
+
# next layer has 4 neurons, 4 inputs
|
183
|
+
n.layers[1].neurons.size == 4 #=> true
|
184
|
+
n.layers[1].neurons[0].weights.size == 4 #=> true
|
185
|
+
|
186
|
+
# final layer has 1 neuron, 4 inputs
|
187
|
+
n.layers[2].neurons.size == 1 #=> true
|
188
|
+
n.layers[2].neurons[0].weights.size == 4 #=> true
|
189
|
+
|
190
|
+
# send 0 to each input
|
191
|
+
output = n.apply(0)
|
192
|
+
|
193
|
+
# returns an output value corresponding to the output neuron
|
194
|
+
# output is positive to due to ReLU
|
195
|
+
output.value >= 0 #=> true
|
196
|
+
|
197
|
+
puts output
|
198
|
+
#=> 0.045
|
199
|
+
```
|
200
|
+
|
201
|
+
## Gradient Descent
|
202
|
+
|
203
|
+
Loop:
|
204
|
+
|
205
|
+
1. Backward propagate the gradients
|
206
|
+
(derivatives for each value with respect to the output value)
|
207
|
+
2. Adjust all weights slightly, according to their gradients.
|
208
|
+
3. Run the network forward to generate a new output.
|
209
|
+
The loss should be smaller.
|
210
|
+
The new output should be closer to the desired output.
|
211
|
+
|
212
|
+
## Further Reading
|
213
|
+
|
214
|
+
* [demo/loss.rb](demo/loss.rb)
|
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'rake/testtask'
|
2
|
+
|
3
|
+
Rake::TestTask.new :test do |t|
|
4
|
+
t.pattern = "test/*.rb"
|
5
|
+
t.warning = true
|
6
|
+
end
|
7
|
+
|
8
|
+
#
|
9
|
+
# GEM BUILD / PUBLISH
|
10
|
+
#
|
11
|
+
|
12
|
+
begin
|
13
|
+
require 'buildar'
|
14
|
+
|
15
|
+
Buildar.new do |b|
|
16
|
+
b.gemspec_file = 'backprop.gemspec'
|
17
|
+
b.version_file = 'VERSION'
|
18
|
+
b.use_git = true
|
19
|
+
end
|
20
|
+
rescue LoadError
|
21
|
+
warn "buildar tasks unavailable"
|
22
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.0.1
|
data/backprop.gemspec
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'backprop'
|
3
|
+
s.summary = "WIP"
|
4
|
+
s.description = "WIP"
|
5
|
+
s.authors = ["Rick Hull"]
|
6
|
+
s.homepage = "https://github.com/rickhull/backprop"
|
7
|
+
s.license = "LGPL-3.0"
|
8
|
+
|
9
|
+
s.required_ruby_version = "> 2"
|
10
|
+
|
11
|
+
s.version = File.read(File.join(__dir__, 'VERSION')).chomp
|
12
|
+
|
13
|
+
s.files = %w[backprop.gemspec VERSION README.md Rakefile]
|
14
|
+
s.files += Dir['lib/**/*.rb']
|
15
|
+
s.files += Dir['test/**/*.rb']
|
16
|
+
s.files += Dir['demo/**/*.rb']
|
17
|
+
end
|
data/demo/celsius.rb
ADDED
data/demo/lol.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'backprop'
|
2
|
+
|
3
|
+
include BackProp
|
4
|
+
|
5
|
+
a = Value.new(2, label: :a)
|
6
|
+
b = Value.new(-3, label: :b)
|
7
|
+
c = Value.new(10, label: :c)
|
8
|
+
e = a * b; e.label = :e
|
9
|
+
d = e + c; d.label = :d
|
10
|
+
f = Value.new(-2, label: :f)
|
11
|
+
l = d * f; l.label = :L
|
12
|
+
|
13
|
+
puts "Setup:"
|
14
|
+
p l
|
15
|
+
puts
|
16
|
+
|
17
|
+
|
18
|
+
puts "Calculate gradient by hand:"
|
19
|
+
|
20
|
+
l.gradient = 1.0
|
21
|
+
|
22
|
+
# l = d * f; derivative dl/dd = f; dl/df = d
|
23
|
+
f.gradient = d.value
|
24
|
+
d.gradient = f.value
|
25
|
+
|
26
|
+
|
27
|
+
# now c.gradient
|
28
|
+
# that is dL/dc
|
29
|
+
|
30
|
+
# dL/dd is -2
|
31
|
+
# dd/dc is 1
|
32
|
+
# by chain rule (multiply): dL/dc is -2 * 1 = -2
|
33
|
+
|
34
|
+
c.gradient = d.gradient * l.gradient
|
35
|
+
e.gradient = d.gradient * l.gradient
|
36
|
+
|
37
|
+
# now b.gradient (and a.gradient)
|
38
|
+
# e = a * b
|
39
|
+
|
40
|
+
# dL/da = dL/de * de/da
|
41
|
+
a.gradient = e.gradient * b.value
|
42
|
+
b.gradient = e.gradient * a.value
|
43
|
+
|
44
|
+
p l
|
45
|
+
puts
|
46
|
+
|
47
|
+
puts "Reset gradients"
|
48
|
+
l.reset_gradient
|
49
|
+
p l
|
50
|
+
puts
|
51
|
+
|
52
|
+
puts "Calculate gradient via backward:"
|
53
|
+
|
54
|
+
l.backward
|
55
|
+
|
56
|
+
p l
|
data/demo/loss.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'perceptron'
|
2
|
+
|
3
|
+
include BackProp
|
4
|
+
|
5
|
+
num_inputs = 3
|
6
|
+
num_examples = 6
|
7
|
+
net_structure = [4, 4, 1]
|
8
|
+
gradient_step = 0.1
|
9
|
+
iterations = 999
|
10
|
+
afn = [:tanh, :sigmoid, :relu].sample
|
11
|
+
|
12
|
+
# binary classifier; 9 sets of inputs that map to 1 or 0
|
13
|
+
inputs = BackProp.rand_inputs(num_inputs, num_examples, (-1.0..1.0))
|
14
|
+
outputs = BackProp.rand_outputs(num_examples, 2)
|
15
|
+
predictions = []
|
16
|
+
|
17
|
+
n = MLP.new(num_inputs, net_structure, activation: afn)
|
18
|
+
|
19
|
+
puts "Training Cases:"
|
20
|
+
inputs.each.with_index { |input, i|
|
21
|
+
puts format("%s = %s", input.join(', '), outputs[i].value.inspect)
|
22
|
+
}
|
23
|
+
puts
|
24
|
+
|
25
|
+
puts "Neural Net:"
|
26
|
+
puts n
|
27
|
+
puts
|
28
|
+
|
29
|
+
puts "Press Enter to continue"
|
30
|
+
gets
|
31
|
+
|
32
|
+
999.times { |i|
|
33
|
+
# 1. apply inputs to the net to yield predictions
|
34
|
+
# 2. calculate the loss
|
35
|
+
# 3. backward propagate the gradients
|
36
|
+
# 4. adjust every neuron in the direction of minimizing loss
|
37
|
+
|
38
|
+
# 1. apply inputs
|
39
|
+
predictions = inputs.map { |input| n.apply(input).first }
|
40
|
+
|
41
|
+
# 2. calculate loss
|
42
|
+
loss = BackProp.mean_squared_error(outputs, predictions)
|
43
|
+
puts loss
|
44
|
+
|
45
|
+
# 3. propagate the derivatives (gradients) backwards
|
46
|
+
loss.backward
|
47
|
+
|
48
|
+
# output every so often
|
49
|
+
if i % 100 == 0
|
50
|
+
p outputs.map(&:value)
|
51
|
+
p predictions.map(&:value)
|
52
|
+
puts
|
53
|
+
p n
|
54
|
+
gets
|
55
|
+
end
|
56
|
+
|
57
|
+
# 4. adjust all weights and biases towards minimizing loss function
|
58
|
+
n.descend(gradient_step)
|
59
|
+
}
|
60
|
+
|
61
|
+
p outputs.map(&:value)
|
62
|
+
p predictions.map(&:value)
|
63
|
+
puts n
|
64
|
+
p n
|
data/demo/neuron.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'backprop'
|
2
|
+
|
3
|
+
include BackProp
|
4
|
+
|
5
|
+
# inputs x1, x2
|
6
|
+
x1 = Value.new(2, label: :x1)
|
7
|
+
x2 = Value.new(0, label: :x2)
|
8
|
+
|
9
|
+
# weights w1, w2
|
10
|
+
w1 = Value.new(-3, label: :w1)
|
11
|
+
w2 = Value.new(1, label: :w2)
|
12
|
+
|
13
|
+
# neuron bias
|
14
|
+
b = Value.new(6.8813735870195432, label: :b)
|
15
|
+
|
16
|
+
xw1 = x1*w1; xw1.label = :xw1
|
17
|
+
xw2 = x2*w2; xw2.label = :xw2
|
18
|
+
|
19
|
+
sum = xw1 + xw2; sum.label = :sum
|
20
|
+
n = sum + b; n.label = :n
|
21
|
+
|
22
|
+
o = n.tanh; o.label = :o
|
23
|
+
|
24
|
+
puts "Calculate gradient by hand:"
|
25
|
+
o.gradient = 1
|
26
|
+
|
27
|
+
# do/dn
|
28
|
+
# d/dx tanh x = 1 - tanh(x)^2
|
29
|
+
|
30
|
+
# 1 - o**2
|
31
|
+
|
32
|
+
n.gradient = 1 - o.value ** 2
|
33
|
+
|
34
|
+
# n = sum + b
|
35
|
+
sum.gradient = n.gradient
|
36
|
+
b.gradient = n.gradient
|
37
|
+
|
38
|
+
# sum = xw1 + xw2
|
39
|
+
xw1.gradient = sum.gradient
|
40
|
+
xw2.gradient = sum.gradient
|
41
|
+
|
42
|
+
# xw1 = x1 * w1
|
43
|
+
x1.gradient = xw1.gradient * w1.value
|
44
|
+
w1.gradient = xw1.gradient * x1.value
|
45
|
+
|
46
|
+
# xw2 = x2 * w2
|
47
|
+
x2.gradient = xw2.gradient * w2.value
|
48
|
+
w2.gradient = xw2.gradient * x2.value
|
49
|
+
|
50
|
+
p o
|
51
|
+
puts
|
52
|
+
|
53
|
+
puts "Reset gradient:"
|
54
|
+
o.reset_gradient
|
55
|
+
p o
|
56
|
+
puts
|
57
|
+
|
58
|
+
puts "Calculate gradient via backprop:"
|
59
|
+
o.backward
|
60
|
+
p o
|
61
|
+
puts
|
data/lib/backprop.rb
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
module BackProp
|
2
|
+
class Value
|
3
|
+
def self.wrap(other)
|
4
|
+
other.is_a?(Value) ? other : Value.new(other)
|
5
|
+
end
|
6
|
+
|
7
|
+
attr_reader :children
|
8
|
+
attr_accessor :value, :label, :gradient, :backstep, :op
|
9
|
+
|
10
|
+
def initialize(float, label: '', op: nil, children: [])
|
11
|
+
@value = float.to_f
|
12
|
+
@gradient = 0
|
13
|
+
@children = children
|
14
|
+
if @children.empty?
|
15
|
+
raise "op #{op.inspect} has no children" unless op.nil?
|
16
|
+
else
|
17
|
+
raise "op is required" if op.nil?
|
18
|
+
end
|
19
|
+
@op = op
|
20
|
+
@label = label
|
21
|
+
@backstep = -> {}
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
@label.empty? ? ("%.3f" % @value) : format("%s=%.3f", @label, @value)
|
26
|
+
end
|
27
|
+
|
28
|
+
def display
|
29
|
+
format("%s(%.3f gradient=%.3f",
|
30
|
+
@label.empty? ? @op || 'Value' : @label, @value, @gradient) +
|
31
|
+
(@op.nil? ? '' :
|
32
|
+
format(" %s(%s)", @op, @children.join(', '))) + ')'
|
33
|
+
end
|
34
|
+
|
35
|
+
def inspect
|
36
|
+
@children.empty? ? self.display :
|
37
|
+
[self.display, @children.map(&:inspect).join("\n\t")].join("\n\t")
|
38
|
+
end
|
39
|
+
|
40
|
+
#
|
41
|
+
# Primary operations; notice every Value.new(op:) also defines a backstep
|
42
|
+
# The backstep closes over the environment of the method so it can
|
43
|
+
# refer to values present when the method executes
|
44
|
+
#
|
45
|
+
|
46
|
+
def +(other)
|
47
|
+
other = Value.wrap(other)
|
48
|
+
val = Value.new(@value + other.value, children: [self, other], op: :+)
|
49
|
+
val.backstep = -> {
|
50
|
+
# gradients accumulate to handle a value used multiple times
|
51
|
+
self.gradient += val.gradient
|
52
|
+
other.gradient += val.gradient
|
53
|
+
}
|
54
|
+
val
|
55
|
+
end
|
56
|
+
|
57
|
+
def *(other)
|
58
|
+
other = Value.wrap(other)
|
59
|
+
val = Value.new(@value * other.value, children: [self, other], op: :*)
|
60
|
+
val.backstep = -> {
|
61
|
+
self.gradient += val.gradient * other.value
|
62
|
+
other.gradient += val.gradient * self.value
|
63
|
+
}
|
64
|
+
val
|
65
|
+
end
|
66
|
+
|
67
|
+
# Mostly we are squaring(2) or dividing(-1)
|
68
|
+
def **(other)
|
69
|
+
raise("Value is not supported") if other.is_a? Value
|
70
|
+
val = Value.new(@value ** other, children: [self], op: :**)
|
71
|
+
val.backstep = -> {
|
72
|
+
self.gradient += val.gradient * (other * self.value ** (other - 1))
|
73
|
+
}
|
74
|
+
val
|
75
|
+
end
|
76
|
+
|
77
|
+
def exp
|
78
|
+
val = Value.new(Math.exp(@value), children: [self], op: :exp)
|
79
|
+
val.backstep = -> {
|
80
|
+
self.gradient += val.gradient * val.value
|
81
|
+
}
|
82
|
+
val
|
83
|
+
end
|
84
|
+
|
85
|
+
#
|
86
|
+
# Secondary operations defined in terms of primary
|
87
|
+
#
|
88
|
+
|
89
|
+
def -(other)
|
90
|
+
self + (Value.wrap(other) * Value.new(-1))
|
91
|
+
end
|
92
|
+
|
93
|
+
def /(other)
|
94
|
+
self * (Value.wrap(other) ** -1)
|
95
|
+
end
|
96
|
+
|
97
|
+
#
|
98
|
+
# Activation functions
|
99
|
+
#
|
100
|
+
|
101
|
+
def tanh
|
102
|
+
val = Value.new(Math.tanh(@value), children: [self], op: :tanh)
|
103
|
+
val.backstep = -> {
|
104
|
+
self.gradient += val.gradient * (1 - val.value ** 2)
|
105
|
+
}
|
106
|
+
val
|
107
|
+
end
|
108
|
+
|
109
|
+
# 1 / 1 + e^-x
|
110
|
+
def sigmoid
|
111
|
+
((self * -1).exp + 1) ** -1
|
112
|
+
end
|
113
|
+
|
114
|
+
# rectified linear unit; not susceptible to vanishing gradient like above
|
115
|
+
def relu
|
116
|
+
neg = @value < 0
|
117
|
+
val = Value.new(neg ? 0 : @value, children: [self], op: :relu)
|
118
|
+
val.backstep = -> {
|
119
|
+
self.gradient += val.gradient * (neg ? 0 : 1)
|
120
|
+
}
|
121
|
+
val
|
122
|
+
end
|
123
|
+
|
124
|
+
#
|
125
|
+
# Backward propagation
|
126
|
+
#
|
127
|
+
|
128
|
+
def backward
|
129
|
+
self.reset_gradient
|
130
|
+
@gradient = 1.0
|
131
|
+
self.backprop
|
132
|
+
end
|
133
|
+
|
134
|
+
def reset_gradient
|
135
|
+
@gradient = 0.0
|
136
|
+
@children.each(&:reset_gradient)
|
137
|
+
self
|
138
|
+
end
|
139
|
+
|
140
|
+
def backprop
|
141
|
+
self.backstep.call
|
142
|
+
@children.each(&:backprop)
|
143
|
+
self
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
data/lib/perceptron.rb
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'backprop'
|
2
|
+
|
3
|
+
module BackProp
|
4
|
+
class Neuron
|
5
|
+
# available activation functions for Value objects
|
6
|
+
ACTIVATION = {
|
7
|
+
tanh: :tanh,
|
8
|
+
sigmoid: :sigmoid,
|
9
|
+
relu: :relu,
|
10
|
+
}
|
11
|
+
|
12
|
+
attr_reader :weights, :bias, :activation
|
13
|
+
|
14
|
+
def initialize(input_count, activation: :relu)
|
15
|
+
@weights = Array.new(input_count) { Value.new(rand(-1.0..1.0)) }
|
16
|
+
@bias = Value.new(rand(-1.0..1.0))
|
17
|
+
@activation = ACTIVATION.fetch(activation)
|
18
|
+
end
|
19
|
+
|
20
|
+
def apply(x = 0)
|
21
|
+
x = Array.new(@weights.size) { x } if !x.is_a? Enumerable
|
22
|
+
sum = @weights.map.with_index { |w, i|
|
23
|
+
w * x[i]
|
24
|
+
}.inject(Value.new(0)) { |memo, val| memo + val } + @bias
|
25
|
+
sum.send(@activation)
|
26
|
+
end
|
27
|
+
|
28
|
+
def descend(step_size)
|
29
|
+
(@weights + [@bias]).each { |p|
|
30
|
+
p.value += (-1 * step_size * p.gradient)
|
31
|
+
}
|
32
|
+
self
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_s
|
36
|
+
format("N(%s)\t(%s %s)", @weights.join(', '), @bias, @activation)
|
37
|
+
end
|
38
|
+
|
39
|
+
def inspect
|
40
|
+
fmt = "% .3f|% .3f"
|
41
|
+
@weights.map { |w| format(fmt, w.value, w.gradient) }.join("\t") +
|
42
|
+
"\t" + format(fmt, @bias.value, @bias.gradient)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
class Layer
|
47
|
+
attr_reader :neurons
|
48
|
+
|
49
|
+
def initialize(input_count, output_count, activation: :relu)
|
50
|
+
@neurons = Array.new(output_count) {
|
51
|
+
Neuron.new(input_count, activation: activation)
|
52
|
+
}
|
53
|
+
end
|
54
|
+
|
55
|
+
def apply(x = 0)
|
56
|
+
@neurons.map { |n| n.apply(x) }
|
57
|
+
end
|
58
|
+
|
59
|
+
def descend(step_size)
|
60
|
+
@neurons.each { |n| n.descend(step_size) }
|
61
|
+
self
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_s
|
65
|
+
@neurons.join("\n")
|
66
|
+
end
|
67
|
+
|
68
|
+
def inspect
|
69
|
+
@neurons.map(&:inspect).join("\n")
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
class MLP
|
74
|
+
attr_reader :layers
|
75
|
+
|
76
|
+
# MLP.new(3, [4, 4, 1])
|
77
|
+
def initialize(input_count, output_counts, activation: :relu)
|
78
|
+
flat = [input_count, *output_counts]
|
79
|
+
@layers = output_counts.map.with_index { |oc, i|
|
80
|
+
Layer.new(flat[i], flat[i+1], activation: activation)
|
81
|
+
}
|
82
|
+
end
|
83
|
+
|
84
|
+
def apply(x = 0)
|
85
|
+
@layers.each { |layer| x = layer.apply(x) }
|
86
|
+
# x.size == 1 ? x.first : x
|
87
|
+
x
|
88
|
+
end
|
89
|
+
|
90
|
+
def descend(step_size)
|
91
|
+
@layers.each { |l| l.descend(step_size) }
|
92
|
+
self
|
93
|
+
end
|
94
|
+
|
95
|
+
def to_s
|
96
|
+
@layers.join("\n\n")
|
97
|
+
end
|
98
|
+
|
99
|
+
def inspect
|
100
|
+
@layers.map(&:inspect).join("\n\n")
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def self.mean_squared_error(a1, a2)
|
105
|
+
a1.map.with_index { |a, i|
|
106
|
+
(a - a2[i]) ** 2
|
107
|
+
}.inject(Value.new(0)) { |memo, val| memo + val } / a1.size
|
108
|
+
end
|
109
|
+
|
110
|
+
def self.rand_inputs(num_inputs, num_examples, rand_arg)
|
111
|
+
Array.new(num_examples) {
|
112
|
+
Array.new(num_inputs) { Value.new rand(rand_arg) }
|
113
|
+
}
|
114
|
+
end
|
115
|
+
|
116
|
+
def self.rand_outputs(num_examples, rand_arg)
|
117
|
+
Array.new(num_examples) { Value.new rand(rand_arg) }
|
118
|
+
end
|
119
|
+
end
|
data/test/backprop.rb
ADDED
@@ -0,0 +1,202 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'backprop'
|
3
|
+
|
4
|
+
include BackProp
|
5
|
+
|
6
|
+
describe Value do
|
7
|
+
describe "basics" do
|
8
|
+
before do
|
9
|
+
@flt = 2.3
|
10
|
+
@v = Value.new(2.3)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "wraps numeric values, primarily floats" do
|
14
|
+
expect(@v).must_be_kind_of Value
|
15
|
+
expect(@v.value).must_be_kind_of Float
|
16
|
+
expect(@v.value).must_equal @flt
|
17
|
+
end
|
18
|
+
|
19
|
+
it "has several string representations" do
|
20
|
+
expect(@v.to_s).must_be_kind_of String
|
21
|
+
expect(@v.display).must_be_kind_of String
|
22
|
+
expect(@v.inspect).must_be_kind_of String
|
23
|
+
end
|
24
|
+
|
25
|
+
it "creates a tree structure when joined by an operator" do
|
26
|
+
expect(@v.children).must_be_empty
|
27
|
+
sum = @v + 3
|
28
|
+
expect(sum).must_be_kind_of Value
|
29
|
+
expect(sum.children).wont_be_empty
|
30
|
+
expect(sum.op).must_equal :+
|
31
|
+
expect(sum.value).must_be_within_epsilon(@flt + 3)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "keeps track of a gradient value, initialized to zero" do
|
35
|
+
expect(@v.gradient).must_equal 0
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "operations" do
|
40
|
+
it "updates the gradient value when used in a calculation" do
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
describe "addition" do
|
45
|
+
before do
|
46
|
+
@a = Value.new(1.0)
|
47
|
+
@b = Value.new(2.0)
|
48
|
+
@sum = @a + @b
|
49
|
+
end
|
50
|
+
|
51
|
+
it "yields a Value" do
|
52
|
+
expect(@sum).must_be_kind_of Value
|
53
|
+
expect(@sum.value).must_be_within_epsilon 3.0
|
54
|
+
end
|
55
|
+
|
56
|
+
it "has a sum parent with _a_ and _b_ as children" do
|
57
|
+
expect(@sum.children).must_include @a
|
58
|
+
expect(@sum.children).must_include @b
|
59
|
+
expect(@sum.op).must_equal :+
|
60
|
+
end
|
61
|
+
|
62
|
+
it "updates child gradients upon back propagation" do
|
63
|
+
expect(@a.gradient).must_equal 0
|
64
|
+
expect(@b.gradient).must_equal 0
|
65
|
+
expect(@sum.gradient).must_equal 0
|
66
|
+
|
67
|
+
@sum.backward
|
68
|
+
expect(@sum.gradient).must_equal 1 # by definition
|
69
|
+
expect(@a.gradient).must_equal 1 # via chain rule for addition
|
70
|
+
expect(@b.gradient).must_equal 1 # via chain rule for addition
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
describe "multiplication" do
|
75
|
+
before do
|
76
|
+
@a = Value.new(-1)
|
77
|
+
@b = Value.new(2.5)
|
78
|
+
@prod = @a * @b
|
79
|
+
end
|
80
|
+
|
81
|
+
it "yields a Value" do
|
82
|
+
expect(@prod).must_be_kind_of Value
|
83
|
+
expect(@prod.value).must_be_within_epsilon(-2.5)
|
84
|
+
end
|
85
|
+
|
86
|
+
it "has a prod parent with _a_ and _b_ and children" do
|
87
|
+
expect(@prod.children).must_include @a
|
88
|
+
expect(@prod.children).must_include @a
|
89
|
+
expect(@prod.op).must_equal :*
|
90
|
+
end
|
91
|
+
|
92
|
+
it "updates child gradients upon back propagation" do
|
93
|
+
expect(@a.gradient).must_equal 0
|
94
|
+
expect(@b.gradient).must_equal 0
|
95
|
+
expect(@prod.gradient).must_equal 0
|
96
|
+
|
97
|
+
@prod.backward
|
98
|
+
expect(@prod.gradient).must_equal 1
|
99
|
+
expect(@a.gradient).must_equal @b.value # via chain rule
|
100
|
+
expect(@b.gradient).must_equal @a.value # via chain rule
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
describe "subtraction" do
|
105
|
+
before do
|
106
|
+
@a = Value.new(10)
|
107
|
+
@b = Value.new(4)
|
108
|
+
@diff = @a - @b
|
109
|
+
end
|
110
|
+
|
111
|
+
it "combines addition with multiplication for negation" do
|
112
|
+
# @a + @b * -1
|
113
|
+
expect(@diff.value).must_be_within_epsilon 6.0
|
114
|
+
expect(@diff.op).wont_equal :-
|
115
|
+
expect(@diff.op).must_equal :+
|
116
|
+
expect(@diff.children).must_include @a
|
117
|
+
expect(@diff.children).wont_include @b
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
describe "pow" do
|
122
|
+
before do
|
123
|
+
@a = Value.new 2
|
124
|
+
@b = 10
|
125
|
+
@pow = @a ** @b
|
126
|
+
end
|
127
|
+
|
128
|
+
it "does not work with right-side Values" do
|
129
|
+
expect { @a ** Value.new(3) }.must_raise
|
130
|
+
end
|
131
|
+
|
132
|
+
it "yields a Value" do
|
133
|
+
expect(@pow).must_be_kind_of Value
|
134
|
+
expect(@pow.value).must_be_within_epsilon 1024.0
|
135
|
+
end
|
136
|
+
|
137
|
+
it "has a pow parent without _b_ in children" do
|
138
|
+
expect(@pow.children).must_include @a
|
139
|
+
expect(@pow.children).wont_include @b
|
140
|
+
expect(@pow.op).must_equal :**
|
141
|
+
end
|
142
|
+
|
143
|
+
it "updates child gradient upon back propagation" do
|
144
|
+
expect(@a.gradient).must_equal 0
|
145
|
+
expect(@pow.gradient).must_equal 0
|
146
|
+
|
147
|
+
@pow.backward
|
148
|
+
expect(@pow.gradient).must_equal 1
|
149
|
+
expect(@a.gradient).must_be_within_epsilon @b * @a.value ** (@b - 1)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
describe "division" do
|
154
|
+
before do
|
155
|
+
@a = Value.new 19.1
|
156
|
+
@b = Value.new 2.3
|
157
|
+
@quot = @a / @b
|
158
|
+
end
|
159
|
+
|
160
|
+
it "uses pow(-1)" do
|
161
|
+
# @a * @b ** -1
|
162
|
+
expect(@quot.value).must_be_within_epsilon(19.1 / 2.3)
|
163
|
+
expect(@quot.op).wont_equal :/
|
164
|
+
expect(@quot.op).must_equal :*
|
165
|
+
expect(@quot.children).must_include @a
|
166
|
+
expect(@quot.children).wont_include @b
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
describe "exp" do
|
171
|
+
before do
|
172
|
+
@a = Value.new 2.4
|
173
|
+
@exp = @a.exp
|
174
|
+
end
|
175
|
+
|
176
|
+
it "yields a Value" do
|
177
|
+
expect(@exp).must_be_kind_of Value
|
178
|
+
expect(@exp.value).must_be_within_epsilon Math.exp(2.4)
|
179
|
+
end
|
180
|
+
|
181
|
+
it "has exp parent with _a_ in children" do
|
182
|
+
expect(@exp.children).must_include @a
|
183
|
+
expect(@exp.op).must_equal :exp
|
184
|
+
end
|
185
|
+
|
186
|
+
it "updates child gradient upon back propagation" do
|
187
|
+
expect(@a.gradient).must_equal 0
|
188
|
+
expect(@exp.gradient).must_equal 0
|
189
|
+
|
190
|
+
@exp.backward
|
191
|
+
expect(@exp.gradient).must_equal 1
|
192
|
+
expect(@a.gradient).must_equal @exp.value # chain rule / derivative
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
describe "activation functions" do
|
198
|
+
end
|
199
|
+
|
200
|
+
describe "backward propagation" do
|
201
|
+
end
|
202
|
+
end
|
metadata
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: backprop
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Rick Hull
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 1980-01-01 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: WIP
|
14
|
+
email:
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- README.md
|
20
|
+
- Rakefile
|
21
|
+
- VERSION
|
22
|
+
- backprop.gemspec
|
23
|
+
- demo/celsius.rb
|
24
|
+
- demo/lol.rb
|
25
|
+
- demo/loss.rb
|
26
|
+
- demo/neuron.rb
|
27
|
+
- lib/backprop.rb
|
28
|
+
- lib/perceptron.rb
|
29
|
+
- test/backprop.rb
|
30
|
+
homepage: https://github.com/rickhull/backprop
|
31
|
+
licenses:
|
32
|
+
- LGPL-3.0
|
33
|
+
metadata: {}
|
34
|
+
post_install_message:
|
35
|
+
rdoc_options: []
|
36
|
+
require_paths:
|
37
|
+
- lib
|
38
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ">"
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '2'
|
43
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
requirements: []
|
49
|
+
rubygems_version: 3.2.26
|
50
|
+
signing_key:
|
51
|
+
specification_version: 4
|
52
|
+
summary: WIP
|
53
|
+
test_files: []
|