backprop 0.0.0.1 → 0.0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -5
- data/VERSION +1 -1
- data/demo/loss.rb +1 -1
- data/lib/backprop.rb +35 -4
- data/lib/perceptron.rb +6 -11
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2c263e60c633a0cad1e917d5bc3754e9c642c7389de1b3aded81dc120bedf163
|
4
|
+
data.tar.gz: 3a1ebc7367aa0ba51ba30b8a88bfdcc71bb24726123c49091b43e9465a93c44a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 16fb1e7ae73410ac405934c103606f723677eaa481e9d993b98caaad14de99fc1cbc4cbc1f3b21c64fd630c9b3cba608b8005fd120f2410e5fd2a0e81cd1bdde
|
7
|
+
data.tar.gz: 02600d7f6ef729f60285b94d033c875aa1c067df52ffff7350ffbe253ebaa35f5bee08efecfba46d57d85eb3012b21c76f995212e4e478466ad30da3d6d03a9a
|
data/README.md
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
[](https://github.com/rickhull/backprop/actions/workflows/test.yaml)
|
2
|
+
|
1
3
|
# Backward Propagation
|
2
4
|
|
3
5
|
This is a reimplementation of Andrej Karpathy's
|
@@ -202,12 +204,12 @@ puts output
|
|
202
204
|
|
203
205
|
Loop:
|
204
206
|
|
205
|
-
1.
|
207
|
+
1. Run the network forward to generate a new output.
|
208
|
+
2. Determine the loss; it should be smaller over time
|
209
|
+
3. Backward propagate the gradients
|
206
210
|
(derivatives for each value with respect to the output value)
|
207
|
-
|
208
|
-
|
209
|
-
The loss should be smaller.
|
210
|
-
The new output should be closer to the desired output.
|
211
|
+
4. Adjust all weights slightly, according to their gradients.
|
212
|
+
|
211
213
|
|
212
214
|
## Further Reading
|
213
215
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.1.1
|
data/demo/loss.rb
CHANGED
data/lib/backprop.rb
CHANGED
@@ -46,8 +46,17 @@ module BackProp
|
|
46
46
|
def +(other)
|
47
47
|
other = Value.wrap(other)
|
48
48
|
val = Value.new(@value + other.value, children: [self, other], op: :+)
|
49
|
+
|
50
|
+
# What we're about to do here is pretty twisted. We're going to refer
|
51
|
+
# to this execution context in the definition of a lambda, but we'll
|
52
|
+
# evaluate it later.
|
53
|
+
# Backstep is a lambda attached to val, which will be the return value
|
54
|
+
# here. When val.backstep is called later, it will update the gradients
|
55
|
+
# on both self and other.
|
49
56
|
val.backstep = -> {
|
50
|
-
# gradients accumulate
|
57
|
+
# gradients accumulate for handling a term used more than once
|
58
|
+
# chain rule says to multiply val's gradient and the op's derivative
|
59
|
+
# derivative of addition is 1.0; pass val's gradient to children
|
51
60
|
self.gradient += val.gradient
|
52
61
|
other.gradient += val.gradient
|
53
62
|
}
|
@@ -58,6 +67,7 @@ module BackProp
|
|
58
67
|
other = Value.wrap(other)
|
59
68
|
val = Value.new(@value * other.value, children: [self, other], op: :*)
|
60
69
|
val.backstep = -> {
|
70
|
+
# derivative of multiplication is the opposite term
|
61
71
|
self.gradient += val.gradient * other.value
|
62
72
|
other.gradient += val.gradient * self.value
|
63
73
|
}
|
@@ -65,15 +75,19 @@ module BackProp
|
|
65
75
|
end
|
66
76
|
|
67
77
|
# Mostly we are squaring(2) or dividing(-1)
|
78
|
+
# We don't support expressions, so Value is not supported for other
|
79
|
+
# This will look like a unary op in the tree
|
68
80
|
def **(other)
|
69
81
|
raise("Value is not supported") if other.is_a? Value
|
70
82
|
val = Value.new(@value ** other, children: [self], op: :**)
|
71
83
|
val.backstep = -> {
|
84
|
+
# accumulate, chain rule, derivative; as before
|
72
85
|
self.gradient += val.gradient * (other * self.value ** (other - 1))
|
73
86
|
}
|
74
87
|
val
|
75
88
|
end
|
76
89
|
|
90
|
+
# e^x - unary operation
|
77
91
|
def exp
|
78
92
|
val = Value.new(Math.exp(@value), children: [self], op: :exp)
|
79
93
|
val.backstep = -> {
|
@@ -84,6 +98,7 @@ module BackProp
|
|
84
98
|
|
85
99
|
#
|
86
100
|
# Secondary operations defined in terms of primary
|
101
|
+
# These return differentiable Values but with more steps
|
87
102
|
#
|
88
103
|
|
89
104
|
def -(other)
|
@@ -96,6 +111,7 @@ module BackProp
|
|
96
111
|
|
97
112
|
#
|
98
113
|
# Activation functions
|
114
|
+
# Unary operations
|
99
115
|
#
|
100
116
|
|
101
117
|
def tanh
|
@@ -125,22 +141,37 @@ module BackProp
|
|
125
141
|
# Backward propagation
|
126
142
|
#
|
127
143
|
|
144
|
+
# Generally, this is called on the final output, say of a loss function
|
145
|
+
# It will initialize the gradients and then update the gradients on
|
146
|
+
# all dependent Values via back propagation
|
128
147
|
def backward
|
129
|
-
self.reset_gradient
|
130
|
-
@gradient = 1.0
|
131
|
-
self.backprop
|
148
|
+
self.reset_gradient # set gradient to zero on all descendants
|
149
|
+
@gradient = 1.0 # this node's gradient is 1.0
|
150
|
+
self.backprop # call backstep on all descendants
|
132
151
|
end
|
133
152
|
|
153
|
+
# recursive call; visits all descendants; sets gradient to zero
|
134
154
|
def reset_gradient
|
135
155
|
@gradient = 0.0
|
136
156
|
@children.each(&:reset_gradient)
|
137
157
|
self
|
138
158
|
end
|
139
159
|
|
160
|
+
# recursive call; visits all descendants; updates gradients via backstep
|
140
161
|
def backprop
|
141
162
|
self.backstep.call
|
142
163
|
@children.each(&:backprop)
|
143
164
|
self
|
144
165
|
end
|
166
|
+
|
167
|
+
def descend(step_size = 0.1)
|
168
|
+
@value += -1 * step_size * @gradient
|
169
|
+
end
|
170
|
+
|
171
|
+
def descend_recursive(step_size = 0.1)
|
172
|
+
self.descend(step_size)
|
173
|
+
@children.each { |c| c.descend_recursive(step_size) }
|
174
|
+
self
|
175
|
+
end
|
145
176
|
end
|
146
177
|
end
|
data/lib/perceptron.rb
CHANGED
@@ -25,11 +25,8 @@ module BackProp
|
|
25
25
|
sum.send(@activation)
|
26
26
|
end
|
27
27
|
|
28
|
-
def
|
29
|
-
|
30
|
-
p.value += (-1 * step_size * p.gradient)
|
31
|
-
}
|
32
|
-
self
|
28
|
+
def parameters
|
29
|
+
@weights + [@bias]
|
33
30
|
end
|
34
31
|
|
35
32
|
def to_s
|
@@ -56,9 +53,8 @@ module BackProp
|
|
56
53
|
@neurons.map { |n| n.apply(x) }
|
57
54
|
end
|
58
55
|
|
59
|
-
def
|
60
|
-
@neurons.
|
61
|
-
self
|
56
|
+
def parameters
|
57
|
+
@neurons.map { |n| n.parameters }.flatten
|
62
58
|
end
|
63
59
|
|
64
60
|
def to_s
|
@@ -87,9 +83,8 @@ module BackProp
|
|
87
83
|
x
|
88
84
|
end
|
89
85
|
|
90
|
-
def
|
91
|
-
@layers.
|
92
|
-
self
|
86
|
+
def parameters
|
87
|
+
@layers.map { |l| l.parameters }.flatten
|
93
88
|
end
|
94
89
|
|
95
90
|
def to_s
|