rb-deeplearning-lib 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rb_deeplearning_lib/__init__.py +3 -3
- rb_deeplearning_lib/autogradient.py +275 -275
- rb_deeplearning_lib/neural_net.py +194 -194
- rb_deeplearning_lib/sequence.py +23 -23
- {rb_deeplearning_lib-0.0.1.dist-info → rb_deeplearning_lib-0.0.2.dist-info}/METADATA +37 -37
- rb_deeplearning_lib-0.0.2.dist-info/RECORD +9 -0
- {rb_deeplearning_lib-0.0.1.dist-info → rb_deeplearning_lib-0.0.2.dist-info}/licenses/LICENSE +20 -20
- rb_deeplearning_lib-0.0.1.dist-info/RECORD +0 -9
- {rb_deeplearning_lib-0.0.1.dist-info → rb_deeplearning_lib-0.0.2.dist-info}/WHEEL +0 -0
- {rb_deeplearning_lib-0.0.1.dist-info → rb_deeplearning_lib-0.0.2.dist-info}/top_level.txt +0 -0
rb_deeplearning_lib/__init__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import autogradient
|
|
2
|
-
import sequence
|
|
3
|
-
import neural_net
|
|
1
|
+
import autogradient
|
|
2
|
+
import sequence
|
|
3
|
+
import neural_net
|
|
@@ -1,275 +1,275 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
class Values:
|
|
3
|
-
def __init__(self, vals, grads = True):
|
|
4
|
-
self.vals = np.array(vals)
|
|
5
|
-
self._backward = lambda: None
|
|
6
|
-
self.grad = np.zeros_like(vals)
|
|
7
|
-
self.grad_flag = grads
|
|
8
|
-
|
|
9
|
-
#Gemini code that handles wierd broadcasting errors
|
|
10
|
-
@staticmethod
|
|
11
|
-
def _broadcast_grad(grad_from_output, original_shape):
|
|
12
|
-
"""
|
|
13
|
-
Adjusts the gradient by summing over broadcasted dimensions to match the original shape.
|
|
14
|
-
Assumes `grad_from_output` has the shape of the broadcasted result.
|
|
15
|
-
`original_shape` is the shape of the tensor *before* broadcasting.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
if grad_from_output.shape == original_shape:
|
|
19
|
-
return grad_from_output
|
|
20
|
-
|
|
21
|
-
# If original was a scalar, sum all dimensions of the gradient
|
|
22
|
-
if original_shape == (): # scalar
|
|
23
|
-
return np.sum(grad_from_output)
|
|
24
|
-
|
|
25
|
-
# Identify axes where original_shape has size 1 and grad_from_output has size > 1
|
|
26
|
-
axes_to_sum = []
|
|
27
|
-
|
|
28
|
-
# Handle cases where original_shape might have fewer dimensions than grad_from_output
|
|
29
|
-
# e.g., (D,) broadcast to (N, D). original_shape is padded implicitly with 1s on the left.
|
|
30
|
-
diff_ndim = grad_from_output.ndim - len(original_shape)
|
|
31
|
-
padded_original_shape = (1,) * diff_ndim + original_shape
|
|
32
|
-
|
|
33
|
-
for i in range(grad_from_output.ndim):
|
|
34
|
-
if padded_original_shape[i] == 1 and grad_from_output.shape[i] > 1:
|
|
35
|
-
axes_to_sum.append(i)
|
|
36
|
-
|
|
37
|
-
if axes_to_sum:
|
|
38
|
-
return np.sum(grad_from_output, axis=tuple(axes_to_sum), keepdims=True)
|
|
39
|
-
|
|
40
|
-
return grad_from_output # No broadcasting to sum over, or complex broadcasting not handled by this simple logic.
|
|
41
|
-
|
|
42
|
-
def __repr__(self):
|
|
43
|
-
return "vals: " + self.vals.__repr__() + "\ngrads: "+ self.grad.__repr__()
|
|
44
|
-
|
|
45
|
-
def __add__(self, other):
|
|
46
|
-
other = other if isinstance(other, Values) else Values(other)
|
|
47
|
-
|
|
48
|
-
out = Values(self.vals + other.vals)
|
|
49
|
-
def backward():
|
|
50
|
-
if self.grad_flag:
|
|
51
|
-
self_grad_to_add = Values._broadcast_grad(out.grad, self.vals.shape)
|
|
52
|
-
self.grad = self.grad + self_grad_to_add
|
|
53
|
-
self._backward()
|
|
54
|
-
if other.grad_flag:
|
|
55
|
-
other_grad_to_add = Values._broadcast_grad(out.grad, other.vals.shape)
|
|
56
|
-
other.grad = other.grad + other_grad_to_add
|
|
57
|
-
other._backward()
|
|
58
|
-
|
|
59
|
-
out._backward = backward
|
|
60
|
-
return out
|
|
61
|
-
def __radd__(self, other):
|
|
62
|
-
return self + other
|
|
63
|
-
|
|
64
|
-
def __neg__(self):
|
|
65
|
-
return self * -1
|
|
66
|
-
|
|
67
|
-
def __sub__(self, other):
|
|
68
|
-
return self + (-other)
|
|
69
|
-
|
|
70
|
-
def __rsub__(self, other):
|
|
71
|
-
return other + (-self)
|
|
72
|
-
|
|
73
|
-
def __mul__(self, other):
|
|
74
|
-
other = other if isinstance(other, Values) else Values(other)
|
|
75
|
-
|
|
76
|
-
out = Values(self.vals * other.vals)
|
|
77
|
-
def backward():
|
|
78
|
-
if self.grad_flag:
|
|
79
|
-
# Gradient for self is `other.vals * out.grad`
|
|
80
|
-
grad_term_for_self = other.vals * out.grad
|
|
81
|
-
self.grad =self.grad + Values._broadcast_grad(grad_term_for_self, self.vals.shape)
|
|
82
|
-
if other.grad_flag:
|
|
83
|
-
# Gradient for other is `self.vals * out.grad`
|
|
84
|
-
grad_term_for_other = self.vals * out.grad
|
|
85
|
-
other.grad =other.grad + Values._broadcast_grad(grad_term_for_other, other.vals.shape)
|
|
86
|
-
other._backward()
|
|
87
|
-
if self.grad_flag:
|
|
88
|
-
self._backward()
|
|
89
|
-
|
|
90
|
-
out._backward = backward
|
|
91
|
-
return out
|
|
92
|
-
|
|
93
|
-
def __rmul__(self, other): #other * self
|
|
94
|
-
return self * other
|
|
95
|
-
|
|
96
|
-
def __truediv__(self, other):
|
|
97
|
-
other = other if isinstance(other, Values) else Values(other)
|
|
98
|
-
|
|
99
|
-
out = Values(self.vals / other.vals)
|
|
100
|
-
def backward():
|
|
101
|
-
if self.grad_flag:
|
|
102
|
-
grad_term_for_self = out.grad / other.vals
|
|
103
|
-
self.grad = self.grad + Values._broadcast_grad(grad_term_for_self, self.vals.shape)
|
|
104
|
-
if other.grad_flag:
|
|
105
|
-
grad_term_for_other = -out.grad * self.vals / (other.vals**2)
|
|
106
|
-
other.grad = other.grad + Values._broadcast_grad(grad_term_for_other, other.vals.shape)
|
|
107
|
-
other._backward()
|
|
108
|
-
if self.grad_flag:
|
|
109
|
-
self._backward()
|
|
110
|
-
out._backward = backward
|
|
111
|
-
return out
|
|
112
|
-
|
|
113
|
-
def __rtruediv__(self, other):
|
|
114
|
-
return Values(other) / self
|
|
115
|
-
|
|
116
|
-
def __matmul__(self, other):
|
|
117
|
-
other = other if isinstance(other, Values) else Values(other)
|
|
118
|
-
out = Values(self.vals @ other.vals)
|
|
119
|
-
def backward():
|
|
120
|
-
if self.grad_flag:
|
|
121
|
-
# out.grad @ other.vals.T matches self.vals shape, so no broadcast_grad needed here.
|
|
122
|
-
self.grad = self.grad + out.grad @ other.vals.T
|
|
123
|
-
if other.grad_flag:
|
|
124
|
-
# self.vals.T @ out.grad matches other.vals shape, so no broadcast_grad needed here.
|
|
125
|
-
other.grad = other.grad + self.vals.T @ out.grad
|
|
126
|
-
other._backward()
|
|
127
|
-
if self.grad_flag:
|
|
128
|
-
self._backward()
|
|
129
|
-
out._backward = backward
|
|
130
|
-
return out
|
|
131
|
-
|
|
132
|
-
def __rmatmul__(self, other):
|
|
133
|
-
return Values(other) @ self
|
|
134
|
-
|
|
135
|
-
def exp(self):
|
|
136
|
-
out = Values(np.exp(self.vals))
|
|
137
|
-
def backward():
|
|
138
|
-
if self.grad_flag:
|
|
139
|
-
grad_term = out.vals * out.grad
|
|
140
|
-
self.grad = self.grad + Values._broadcast_grad(grad_term, self.vals.shape)
|
|
141
|
-
self._backward()
|
|
142
|
-
out._backward = backward
|
|
143
|
-
return out
|
|
144
|
-
|
|
145
|
-
def __pow__(self, other):
|
|
146
|
-
assert isinstance(other, (int, float)), "only supporting int/float powers for now"
|
|
147
|
-
out = Values(self.vals**other)
|
|
148
|
-
def backward():
|
|
149
|
-
if self.grad_flag:
|
|
150
|
-
grad_term = other * (self.vals**(other - 1)) * out.grad
|
|
151
|
-
self.grad =self.grad + Values._broadcast_grad(grad_term, self.vals.shape)
|
|
152
|
-
self._backward()
|
|
153
|
-
out._backward = backward
|
|
154
|
-
return out
|
|
155
|
-
|
|
156
|
-
def log(self):
|
|
157
|
-
out = Values(np.log(self.vals))
|
|
158
|
-
def backward():
|
|
159
|
-
if self.grad_flag:
|
|
160
|
-
grad_term = self.vals**-1 *out.grad
|
|
161
|
-
self.grad = self.grad + Values._broadcast_grad(grad_term, self.vals.shape)
|
|
162
|
-
self._backward()
|
|
163
|
-
out._backward = backward
|
|
164
|
-
return out
|
|
165
|
-
|
|
166
|
-
def relu(self):
|
|
167
|
-
out = Values(np.maximum(0,self.vals))
|
|
168
|
-
def backward():
|
|
169
|
-
if self.grad_flag:
|
|
170
|
-
grad_term = out.grad * (self.vals > 0)
|
|
171
|
-
self.grad = self.grad + Values._broadcast_grad(grad_term, self.vals.shape)
|
|
172
|
-
self._backward()
|
|
173
|
-
out._backward = backward
|
|
174
|
-
return out
|
|
175
|
-
|
|
176
|
-
def abs(self):
|
|
177
|
-
out = Values(np.abs(self.vals))
|
|
178
|
-
def backward():
|
|
179
|
-
if self.grad_flag:
|
|
180
|
-
# Gradient of abs(x) is np.sign(x)
|
|
181
|
-
grad_term = out.grad * np.sign(self.vals)
|
|
182
|
-
self.grad = self.grad + Values._broadcast_grad(grad_term, self.vals.shape)
|
|
183
|
-
self._backward()
|
|
184
|
-
out._backward = backward
|
|
185
|
-
return out
|
|
186
|
-
|
|
187
|
-
def sum(self, axis=None, keepdims=False):
|
|
188
|
-
out_vals = np.sum(self.vals, axis=axis, keepdims=keepdims)
|
|
189
|
-
out = Values(out_vals)
|
|
190
|
-
saved_axis = axis
|
|
191
|
-
saved_keepdims = keepdims
|
|
192
|
-
original_shape = self.vals.shape
|
|
193
|
-
|
|
194
|
-
def backward():
|
|
195
|
-
if self.grad_flag:
|
|
196
|
-
grad_to_distribute = out.grad
|
|
197
|
-
|
|
198
|
-
if saved_axis is not None and not saved_keepdims:
|
|
199
|
-
new_shape = list(original_shape)
|
|
200
|
-
if isinstance(saved_axis, int):
|
|
201
|
-
new_shape[saved_axis] = 1
|
|
202
|
-
else:
|
|
203
|
-
for ax in saved_axis:
|
|
204
|
-
new_shape[ax] = 1
|
|
205
|
-
grad_to_distribute = grad_to_distribute.reshape(new_shape)
|
|
206
|
-
|
|
207
|
-
self.grad = self.grad + Values._broadcast_grad(grad_to_distribute, self.vals.shape)
|
|
208
|
-
self._backward()
|
|
209
|
-
out._backward = backward
|
|
210
|
-
return out
|
|
211
|
-
|
|
212
|
-
def softmax(self, axis=-1):
|
|
213
|
-
max_val = self.vals.max(axis=axis, keepdims=True)
|
|
214
|
-
exp_vals = (self - max_val).exp()
|
|
215
|
-
sum_exp_vals = exp_vals.sum(axis=axis, keepdims=True)
|
|
216
|
-
out = Values(exp_vals / sum_exp_vals)
|
|
217
|
-
|
|
218
|
-
def backward():
|
|
219
|
-
if self.grad_flag:
|
|
220
|
-
sum_grad_times_out = (out.vals * out.grad).sum(axis=axis, keepdims=True)
|
|
221
|
-
grad_term = out.vals * (out.grad - sum_grad_times_out)
|
|
222
|
-
self.grad = self.grad + Values._broadcast_grad(grad_term, self.vals.shape)
|
|
223
|
-
self._backward()
|
|
224
|
-
out._backward = backward
|
|
225
|
-
return out
|
|
226
|
-
|
|
227
|
-
def mean(self):
|
|
228
|
-
out = Values(np.mean(self.vals))
|
|
229
|
-
def backward():
|
|
230
|
-
if self.grad_flag:
|
|
231
|
-
# For mean, out.grad is a scalar. Adding a scalar to self.grad (an array)
|
|
232
|
-
# implicitly broadcasts the scalar across all elements, which is the correct behavior
|
|
233
|
-
# for the gradient of a mean operation.
|
|
234
|
-
self.grad = self.grad + (out.grad / self.vals.size)
|
|
235
|
-
self._backward()
|
|
236
|
-
out._backward = backward
|
|
237
|
-
return out
|
|
238
|
-
|
|
239
|
-
def __call__(self):
|
|
240
|
-
return self
|
|
241
|
-
|
|
242
|
-
def __getitem__(self, item):
|
|
243
|
-
out = Values(self.vals[item])
|
|
244
|
-
saved_item = item
|
|
245
|
-
def backward():
|
|
246
|
-
if self.grad_flag:
|
|
247
|
-
temp_grad = np.zeros_like(self.vals)
|
|
248
|
-
temp_grad[saved_item] = out.grad
|
|
249
|
-
self._backward()
|
|
250
|
-
out._backward = backward
|
|
251
|
-
return out
|
|
252
|
-
|
|
253
|
-
def backward(self):
|
|
254
|
-
self.grad = np.ones_like(self.vals)
|
|
255
|
-
self._backward()
|
|
256
|
-
|
|
257
|
-
def __getattr__(self, name):
|
|
258
|
-
if name == "_":
|
|
259
|
-
return self
|
|
260
|
-
if name == "T" or name == "transpose":
|
|
261
|
-
out = Values(self.vals.T)
|
|
262
|
-
def backward():
|
|
263
|
-
if self.grad_flag:
|
|
264
|
-
# out.grad is the shape of out.vals (transposed vals), so out.grad.T is the shape of self.vals.
|
|
265
|
-
# No broadcast_grad needed here.
|
|
266
|
-
self.grad = self.grad + out.grad.T
|
|
267
|
-
self._backward()
|
|
268
|
-
return
|
|
269
|
-
out._backward = backward
|
|
270
|
-
return out
|
|
271
|
-
|
|
272
|
-
if hasattr(self.vals, name) and not callable(getattr(self.vals, name)):
|
|
273
|
-
return getattr(self.vals, name)
|
|
274
|
-
|
|
275
|
-
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}' or attribute is not supported for automatic differentiation.")
|
|
1
|
+
import numpy as np
|
|
2
|
+
class Values:
|
|
3
|
+
def __init__(self, vals, grads = True):
|
|
4
|
+
self.vals = np.array(vals)
|
|
5
|
+
self._backward = lambda: None
|
|
6
|
+
self.grad = np.zeros_like(vals)
|
|
7
|
+
self.grad_flag = grads
|
|
8
|
+
|
|
9
|
+
#Gemini code that handles wierd broadcasting errors
|
|
10
|
+
@staticmethod
|
|
11
|
+
def _broadcast_grad(grad_from_output, original_shape):
|
|
12
|
+
"""
|
|
13
|
+
Adjusts the gradient by summing over broadcasted dimensions to match the original shape.
|
|
14
|
+
Assumes `grad_from_output` has the shape of the broadcasted result.
|
|
15
|
+
`original_shape` is the shape of the tensor *before* broadcasting.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
if grad_from_output.shape == original_shape:
|
|
19
|
+
return grad_from_output
|
|
20
|
+
|
|
21
|
+
# If original was a scalar, sum all dimensions of the gradient
|
|
22
|
+
if original_shape == (): # scalar
|
|
23
|
+
return np.sum(grad_from_output)
|
|
24
|
+
|
|
25
|
+
# Identify axes where original_shape has size 1 and grad_from_output has size > 1
|
|
26
|
+
axes_to_sum = []
|
|
27
|
+
|
|
28
|
+
# Handle cases where original_shape might have fewer dimensions than grad_from_output
|
|
29
|
+
# e.g., (D,) broadcast to (N, D). original_shape is padded implicitly with 1s on the left.
|
|
30
|
+
diff_ndim = grad_from_output.ndim - len(original_shape)
|
|
31
|
+
padded_original_shape = (1,) * diff_ndim + original_shape
|
|
32
|
+
|
|
33
|
+
for i in range(grad_from_output.ndim):
|
|
34
|
+
if padded_original_shape[i] == 1 and grad_from_output.shape[i] > 1:
|
|
35
|
+
axes_to_sum.append(i)
|
|
36
|
+
|
|
37
|
+
if axes_to_sum:
|
|
38
|
+
return np.sum(grad_from_output, axis=tuple(axes_to_sum), keepdims=True)
|
|
39
|
+
|
|
40
|
+
return grad_from_output # No broadcasting to sum over, or complex broadcasting not handled by this simple logic.
|
|
41
|
+
|
|
42
|
+
def __repr__(self):
|
|
43
|
+
return "vals: " + self.vals.__repr__() + "\ngrads: "+ self.grad.__repr__()
|
|
44
|
+
|
|
45
|
+
def __add__(self, other):
|
|
46
|
+
other = other if isinstance(other, Values) else Values(other)
|
|
47
|
+
|
|
48
|
+
out = Values(self.vals + other.vals)
|
|
49
|
+
def backward():
|
|
50
|
+
if self.grad_flag:
|
|
51
|
+
self_grad_to_add = Values._broadcast_grad(out.grad, self.vals.shape)
|
|
52
|
+
self.grad = self.grad + self_grad_to_add
|
|
53
|
+
self._backward()
|
|
54
|
+
if other.grad_flag:
|
|
55
|
+
other_grad_to_add = Values._broadcast_grad(out.grad, other.vals.shape)
|
|
56
|
+
other.grad = other.grad + other_grad_to_add
|
|
57
|
+
other._backward()
|
|
58
|
+
|
|
59
|
+
out._backward = backward
|
|
60
|
+
return out
|
|
61
|
+
def __radd__(self, other):
|
|
62
|
+
return self + other
|
|
63
|
+
|
|
64
|
+
def __neg__(self):
|
|
65
|
+
return self * -1
|
|
66
|
+
|
|
67
|
+
def __sub__(self, other):
|
|
68
|
+
return self + (-other)
|
|
69
|
+
|
|
70
|
+
def __rsub__(self, other):
|
|
71
|
+
return other + (-self)
|
|
72
|
+
|
|
73
|
+
def __mul__(self, other):
|
|
74
|
+
other = other if isinstance(other, Values) else Values(other)
|
|
75
|
+
|
|
76
|
+
out = Values(self.vals * other.vals)
|
|
77
|
+
def backward():
|
|
78
|
+
if self.grad_flag:
|
|
79
|
+
# Gradient for self is `other.vals * out.grad`
|
|
80
|
+
grad_term_for_self = other.vals * out.grad
|
|
81
|
+
self.grad =self.grad + Values._broadcast_grad(grad_term_for_self, self.vals.shape)
|
|
82
|
+
if other.grad_flag:
|
|
83
|
+
# Gradient for other is `self.vals * out.grad`
|
|
84
|
+
grad_term_for_other = self.vals * out.grad
|
|
85
|
+
other.grad =other.grad + Values._broadcast_grad(grad_term_for_other, other.vals.shape)
|
|
86
|
+
other._backward()
|
|
87
|
+
if self.grad_flag:
|
|
88
|
+
self._backward()
|
|
89
|
+
|
|
90
|
+
out._backward = backward
|
|
91
|
+
return out
|
|
92
|
+
|
|
93
|
+
def __rmul__(self, other): #other * self
|
|
94
|
+
return self * other
|
|
95
|
+
|
|
96
|
+
def __truediv__(self, other):
|
|
97
|
+
other = other if isinstance(other, Values) else Values(other)
|
|
98
|
+
|
|
99
|
+
out = Values(self.vals / other.vals)
|
|
100
|
+
def backward():
|
|
101
|
+
if self.grad_flag:
|
|
102
|
+
grad_term_for_self = out.grad / other.vals
|
|
103
|
+
self.grad = self.grad + Values._broadcast_grad(grad_term_for_self, self.vals.shape)
|
|
104
|
+
if other.grad_flag:
|
|
105
|
+
grad_term_for_other = -out.grad * self.vals / (other.vals**2)
|
|
106
|
+
other.grad = other.grad + Values._broadcast_grad(grad_term_for_other, other.vals.shape)
|
|
107
|
+
other._backward()
|
|
108
|
+
if self.grad_flag:
|
|
109
|
+
self._backward()
|
|
110
|
+
out._backward = backward
|
|
111
|
+
return out
|
|
112
|
+
|
|
113
|
+
def __rtruediv__(self, other):
|
|
114
|
+
return Values(other) / self
|
|
115
|
+
|
|
116
|
+
def __matmul__(self, other):
|
|
117
|
+
other = other if isinstance(other, Values) else Values(other)
|
|
118
|
+
out = Values(self.vals @ other.vals)
|
|
119
|
+
def backward():
|
|
120
|
+
if self.grad_flag:
|
|
121
|
+
# out.grad @ other.vals.T matches self.vals shape, so no broadcast_grad needed here.
|
|
122
|
+
self.grad = self.grad + out.grad @ other.vals.T
|
|
123
|
+
if other.grad_flag:
|
|
124
|
+
# self.vals.T @ out.grad matches other.vals shape, so no broadcast_grad needed here.
|
|
125
|
+
other.grad = other.grad + self.vals.T @ out.grad
|
|
126
|
+
other._backward()
|
|
127
|
+
if self.grad_flag:
|
|
128
|
+
self._backward()
|
|
129
|
+
out._backward = backward
|
|
130
|
+
return out
|
|
131
|
+
|
|
132
|
+
def __rmatmul__(self, other):
|
|
133
|
+
return Values(other) @ self
|
|
134
|
+
|
|
135
|
+
def exp(self):
|
|
136
|
+
out = Values(np.exp(self.vals))
|
|
137
|
+
def backward():
|
|
138
|
+
if self.grad_flag:
|
|
139
|
+
grad_term = out.vals * out.grad
|
|
140
|
+
self.grad = self.grad + Values._broadcast_grad(grad_term, self.vals.shape)
|
|
141
|
+
self._backward()
|
|
142
|
+
out._backward = backward
|
|
143
|
+
return out
|
|
144
|
+
|
|
145
|
+
def __pow__(self, other):
|
|
146
|
+
assert isinstance(other, (int, float)), "only supporting int/float powers for now"
|
|
147
|
+
out = Values(self.vals**other)
|
|
148
|
+
def backward():
|
|
149
|
+
if self.grad_flag:
|
|
150
|
+
grad_term = other * (self.vals**(other - 1)) * out.grad
|
|
151
|
+
self.grad =self.grad + Values._broadcast_grad(grad_term, self.vals.shape)
|
|
152
|
+
self._backward()
|
|
153
|
+
out._backward = backward
|
|
154
|
+
return out
|
|
155
|
+
|
|
156
|
+
def log(self):
|
|
157
|
+
out = Values(np.log(self.vals))
|
|
158
|
+
def backward():
|
|
159
|
+
if self.grad_flag:
|
|
160
|
+
grad_term = self.vals**-1 *out.grad
|
|
161
|
+
self.grad = self.grad + Values._broadcast_grad(grad_term, self.vals.shape)
|
|
162
|
+
self._backward()
|
|
163
|
+
out._backward = backward
|
|
164
|
+
return out
|
|
165
|
+
|
|
166
|
+
def relu(self):
|
|
167
|
+
out = Values(np.maximum(0,self.vals))
|
|
168
|
+
def backward():
|
|
169
|
+
if self.grad_flag:
|
|
170
|
+
grad_term = out.grad * (self.vals > 0)
|
|
171
|
+
self.grad = self.grad + Values._broadcast_grad(grad_term, self.vals.shape)
|
|
172
|
+
self._backward()
|
|
173
|
+
out._backward = backward
|
|
174
|
+
return out
|
|
175
|
+
|
|
176
|
+
def abs(self):
|
|
177
|
+
out = Values(np.abs(self.vals))
|
|
178
|
+
def backward():
|
|
179
|
+
if self.grad_flag:
|
|
180
|
+
# Gradient of abs(x) is np.sign(x)
|
|
181
|
+
grad_term = out.grad * np.sign(self.vals)
|
|
182
|
+
self.grad = self.grad + Values._broadcast_grad(grad_term, self.vals.shape)
|
|
183
|
+
self._backward()
|
|
184
|
+
out._backward = backward
|
|
185
|
+
return out
|
|
186
|
+
|
|
187
|
+
def sum(self, axis=None, keepdims=False):
|
|
188
|
+
out_vals = np.sum(self.vals, axis=axis, keepdims=keepdims)
|
|
189
|
+
out = Values(out_vals)
|
|
190
|
+
saved_axis = axis
|
|
191
|
+
saved_keepdims = keepdims
|
|
192
|
+
original_shape = self.vals.shape
|
|
193
|
+
|
|
194
|
+
def backward():
|
|
195
|
+
if self.grad_flag:
|
|
196
|
+
grad_to_distribute = out.grad
|
|
197
|
+
|
|
198
|
+
if saved_axis is not None and not saved_keepdims:
|
|
199
|
+
new_shape = list(original_shape)
|
|
200
|
+
if isinstance(saved_axis, int):
|
|
201
|
+
new_shape[saved_axis] = 1
|
|
202
|
+
else:
|
|
203
|
+
for ax in saved_axis:
|
|
204
|
+
new_shape[ax] = 1
|
|
205
|
+
grad_to_distribute = grad_to_distribute.reshape(new_shape)
|
|
206
|
+
|
|
207
|
+
self.grad = self.grad + Values._broadcast_grad(grad_to_distribute, self.vals.shape)
|
|
208
|
+
self._backward()
|
|
209
|
+
out._backward = backward
|
|
210
|
+
return out
|
|
211
|
+
|
|
212
|
+
def softmax(self, axis=-1):
|
|
213
|
+
max_val = self.vals.max(axis=axis, keepdims=True)
|
|
214
|
+
exp_vals = (self - max_val).exp()
|
|
215
|
+
sum_exp_vals = exp_vals.sum(axis=axis, keepdims=True)
|
|
216
|
+
out = Values(exp_vals / sum_exp_vals)
|
|
217
|
+
|
|
218
|
+
def backward():
|
|
219
|
+
if self.grad_flag:
|
|
220
|
+
sum_grad_times_out = (out.vals * out.grad).sum(axis=axis, keepdims=True)
|
|
221
|
+
grad_term = out.vals * (out.grad - sum_grad_times_out)
|
|
222
|
+
self.grad = self.grad + Values._broadcast_grad(grad_term, self.vals.shape)
|
|
223
|
+
self._backward()
|
|
224
|
+
out._backward = backward
|
|
225
|
+
return out
|
|
226
|
+
|
|
227
|
+
def mean(self):
|
|
228
|
+
out = Values(np.mean(self.vals))
|
|
229
|
+
def backward():
|
|
230
|
+
if self.grad_flag:
|
|
231
|
+
# For mean, out.grad is a scalar. Adding a scalar to self.grad (an array)
|
|
232
|
+
# implicitly broadcasts the scalar across all elements, which is the correct behavior
|
|
233
|
+
# for the gradient of a mean operation.
|
|
234
|
+
self.grad = self.grad + (out.grad / self.vals.size)
|
|
235
|
+
self._backward()
|
|
236
|
+
out._backward = backward
|
|
237
|
+
return out
|
|
238
|
+
|
|
239
|
+
def __call__(self):
|
|
240
|
+
return self
|
|
241
|
+
|
|
242
|
+
def __getitem__(self, item):
|
|
243
|
+
out = Values(self.vals[item])
|
|
244
|
+
saved_item = item
|
|
245
|
+
def backward():
|
|
246
|
+
if self.grad_flag:
|
|
247
|
+
temp_grad = np.zeros_like(self.vals)
|
|
248
|
+
temp_grad[saved_item] = out.grad
|
|
249
|
+
self._backward()
|
|
250
|
+
out._backward = backward
|
|
251
|
+
return out
|
|
252
|
+
|
|
253
|
+
def backward(self):
|
|
254
|
+
self.grad = np.ones_like(self.vals)
|
|
255
|
+
self._backward()
|
|
256
|
+
|
|
257
|
+
def __getattr__(self, name):
|
|
258
|
+
if name == "_":
|
|
259
|
+
return self
|
|
260
|
+
if name == "T" or name == "transpose":
|
|
261
|
+
out = Values(self.vals.T)
|
|
262
|
+
def backward():
|
|
263
|
+
if self.grad_flag:
|
|
264
|
+
# out.grad is the shape of out.vals (transposed vals), so out.grad.T is the shape of self.vals.
|
|
265
|
+
# No broadcast_grad needed here.
|
|
266
|
+
self.grad = self.grad + out.grad.T
|
|
267
|
+
self._backward()
|
|
268
|
+
return
|
|
269
|
+
out._backward = backward
|
|
270
|
+
return out
|
|
271
|
+
|
|
272
|
+
if hasattr(self.vals, name) and not callable(getattr(self.vals, name)):
|
|
273
|
+
return getattr(self.vals, name)
|
|
274
|
+
|
|
275
|
+
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}' or attribute is not supported for automatic differentiation.")
|
|
@@ -1,194 +1,194 @@
|
|
|
1
|
-
class Layer:
|
|
2
|
-
def __init__(self, input,out,activ="_",rangeW=(-1,1),rangeB=(-1,1)):
|
|
3
|
-
self.weights = Values((rangeW[0]-rangeW[1])*np.random.rand(input,out)+rangeW[1])
|
|
4
|
-
self.bias = Values((rangeB[0]-rangeB[1])*np.random.rand(1,out)+rangeB[1])
|
|
5
|
-
self.activation = activ
|
|
6
|
-
|
|
7
|
-
def __call__(self, x):
|
|
8
|
-
y = x @ self.weights + self.bias
|
|
9
|
-
if self.activation == "_": # No activation function
|
|
10
|
-
return y
|
|
11
|
-
else:
|
|
12
|
-
# Get the method corresponding to the activation string and call it.
|
|
13
|
-
# This will now correctly find methods like y.relu() or y.softmax().
|
|
14
|
-
# If self.activation is not a valid method name, it will raise an AttributeError.
|
|
15
|
-
activation_func = getattr(y, self.activation)
|
|
16
|
-
return activation_func()
|
|
17
|
-
|
|
18
|
-
def params(self):
|
|
19
|
-
return self.weights, self.bias
|
|
20
|
-
|
|
21
|
-
def updateParams(self, l_rate):
|
|
22
|
-
self.weights.vals = self.weights.vals - l_rate * self.weights.grad
|
|
23
|
-
self.bias.vals = self.bias.vals - l_rate * self.bias.grad
|
|
24
|
-
self.weights.grad = self.weights.grad * 0
|
|
25
|
-
self.bias.grad = self.bias.grad * 0
|
|
26
|
-
|
|
27
|
-
class Dense:
|
|
28
|
-
def __init__(self, layNum, inL, midL, outL, activ="_",f_activ="_",rangeW=(-0.1,0.1),rangeB=(-0.1,0.1)):
|
|
29
|
-
if layNum < 1:
|
|
30
|
-
print("Dense can't have 0 layers or below.")
|
|
31
|
-
elif layNum == 1:
|
|
32
|
-
self.seq = Sequence([Layer(inL,outL,f_activ,rangeW,rangeB)])
|
|
33
|
-
else:
|
|
34
|
-
lays = []
|
|
35
|
-
for i in range(layNum):
|
|
36
|
-
if i == 0:
|
|
37
|
-
lays.append(Layer(inL,midL,activ,rangeW,rangeB))
|
|
38
|
-
elif i == layNum-1:
|
|
39
|
-
lays.append(Layer(midL,outL,f_activ,rangeW,rangeB))
|
|
40
|
-
else:
|
|
41
|
-
lays.append(Layer(midL,midL,activ,rangeW,rangeB))
|
|
42
|
-
self.seq = Sequence(lays)
|
|
43
|
-
|
|
44
|
-
def __call__(self, x):
|
|
45
|
-
return self.seq(x)
|
|
46
|
-
|
|
47
|
-
def params(self):
|
|
48
|
-
return self.seq.params()
|
|
49
|
-
def updateParams(self, l_rate):
|
|
50
|
-
self.seq.updateParams(l_rate)
|
|
51
|
-
|
|
52
|
-
class Dropout:
|
|
53
|
-
def __init__(self, size, chance):
|
|
54
|
-
self.size = size
|
|
55
|
-
self.rate = chance
|
|
56
|
-
self.inTrain = False
|
|
57
|
-
|
|
58
|
-
def __call__(self, x):
|
|
59
|
-
r = self.rate
|
|
60
|
-
if r <= 0.0 or not self.inTrain:
|
|
61
|
-
return x
|
|
62
|
-
elif r >= 1.0:
|
|
63
|
-
return Values(np.zeros_like(x.vals))
|
|
64
|
-
|
|
65
|
-
do = Values(np.random.choice([0,1/(1-r)],size=(self.size,),p=[r, 1-r]))
|
|
66
|
-
return x * do
|
|
67
|
-
|
|
68
|
-
def updateParams(self, l_rate): #here so errors don't occur when update params is called for the whole sequence
|
|
69
|
-
return
|
|
70
|
-
|
|
71
|
-
#loss functions
|
|
72
|
-
def cross_entropy_loss(y_true, y_pred):
|
|
73
|
-
loss = -(y_true * y_pred.log()).sum(axis=1).mean()
|
|
74
|
-
return loss
|
|
75
|
-
|
|
76
|
-
def mse_loss(y_true, y_pred):
|
|
77
|
-
loss = ((y_true - y_pred)**2).mean()
|
|
78
|
-
return loss
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
class Model:
|
|
84
|
-
def __init__(self, blocks, regu = "", train = True, loss_fn=None, pen_fn = None):
|
|
85
|
-
self.blocks = Sequence(blocks)
|
|
86
|
-
self.regu = regu
|
|
87
|
-
self.inTrain = train
|
|
88
|
-
self.train_loss = []
|
|
89
|
-
self.val_loss = []
|
|
90
|
-
# Set default loss function to cross-entropy if not provided
|
|
91
|
-
if loss_fn is None:
|
|
92
|
-
self.loss_fn = cross_entropy_loss
|
|
93
|
-
else:
|
|
94
|
-
self.loss_fn = loss_fn
|
|
95
|
-
|
|
96
|
-
if pen_fn is None:
|
|
97
|
-
def emptyPenFn(loss_prev, model, _lambda):
|
|
98
|
-
return loss_prev
|
|
99
|
-
pen_fn = emptyPenFn
|
|
100
|
-
self.pen_fn = pen_fn
|
|
101
|
-
|
|
102
|
-
def __call__(self, x):
|
|
103
|
-
x_ = x if isinstance(x, Values) else Values(x)
|
|
104
|
-
return self.blocks(x_)
|
|
105
|
-
|
|
106
|
-
def train(self, epochs, x_t, y_t, x_v, y_v, val_run=1, l_rate=0.01, _lambda=0.1, batch_size = None):
|
|
107
|
-
x_trn = x_t if isinstance(x_t, Values) else Values(x_t)
|
|
108
|
-
y_trn = y_t if isinstance(y_t, Values) else Values(y_t)
|
|
109
|
-
x_vl = x_v if isinstance(x_v, Values) else Values(x_v)
|
|
110
|
-
y_vl = y_v if isinstance(y_v, Values) else Values(y_v)
|
|
111
|
-
x_trn.grad_flag = y_trn.grad_flag = x_vl.grad_flag = y_vl.grad_flag = False
|
|
112
|
-
|
|
113
|
-
for l in self.blocks.arr:
|
|
114
|
-
if isinstance(l, Dropout):
|
|
115
|
-
l.inTrain = True
|
|
116
|
-
|
|
117
|
-
if not batch_size:
|
|
118
|
-
batch_size = len(x_trn.vals)
|
|
119
|
-
|
|
120
|
-
batches = 0
|
|
121
|
-
if len(x_trn.vals) % batch_size == 0:
|
|
122
|
-
batches = int(len(x_trn.vals) / batch_size)
|
|
123
|
-
else:
|
|
124
|
-
batches = int(len(x_trn.vals) / batch_size + 1)
|
|
125
|
-
|
|
126
|
-
bat = np.array(range(batches))
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
loss_strt = len(self.train_loss)
|
|
130
|
-
if loss_strt != 0:
|
|
131
|
-
loss_strt = int(self.train_loss[-1][0] + 1)
|
|
132
|
-
for i in range(epochs):
|
|
133
|
-
if i % val_run == 0:
|
|
134
|
-
for l in self.blocks.arr:
|
|
135
|
-
if isinstance(l, Dropout):
|
|
136
|
-
l.inTrain = False
|
|
137
|
-
y_val_hat = self.__call__(x_vl)
|
|
138
|
-
val_loss_value = self.loss_fn(y_vl, y_val_hat).vals
|
|
139
|
-
print(f"epoch: {i} \t loss: {val_loss_value}")
|
|
140
|
-
self.val_loss.append((loss_strt+i,val_loss_value))
|
|
141
|
-
for l in self.blocks.arr:
|
|
142
|
-
if isinstance(l, Dropout):
|
|
143
|
-
l.inTrain = True
|
|
144
|
-
np.random.shuffle(bat)
|
|
145
|
-
for b in range(batches):
|
|
146
|
-
print(f"\rep{i}: b{b}/{batches}", end="")
|
|
147
|
-
x_train_batch = x_trn[bat[b]*batch_size:(bat[b]+1)*batch_size]
|
|
148
|
-
y_train_batch = y_trn[bat[b]*batch_size:(bat[b]+1)*batch_size]
|
|
149
|
-
|
|
150
|
-
y_hat = self.__call__(x_train_batch)
|
|
151
|
-
|
|
152
|
-
# Calculate loss using the specified loss_fn
|
|
153
|
-
current_loss = self.loss_fn(y_train_batch, y_hat)
|
|
154
|
-
|
|
155
|
-
self.train_loss.append((loss_strt+i + 1.0*b/batches,current_loss.vals))
|
|
156
|
-
penalized_loss = self.pen_fn(current_loss,self,_lambda)
|
|
157
|
-
penalized_loss.grad = np.ones_like(penalized_loss.vals)
|
|
158
|
-
penalized_loss.backward()
|
|
159
|
-
self.blocks.updateParams(l_rate)
|
|
160
|
-
print("\r", end="")
|
|
161
|
-
|
|
162
|
-
for l in self.blocks.arr:
|
|
163
|
-
if isinstance(l, Dropout):
|
|
164
|
-
l.inTrain = False
|
|
165
|
-
|
|
166
|
-
loss_strt = len(self.train_loss)
|
|
167
|
-
if loss_strt != 0:
|
|
168
|
-
loss_strt = int(self.train_loss[-1][0] + 1)
|
|
169
|
-
|
|
170
|
-
y_val_hat = self.__call__(x_vl)
|
|
171
|
-
val_loss_value = self.loss_fn(y_vl, y_val_hat).vals # Use loss_fn for validation too
|
|
172
|
-
print(f"epoch: {epochs} \t loss: {val_loss_value}") # Generic 'loss' instead of 'cross_entropy loss'
|
|
173
|
-
self.val_loss.append((loss_strt,val_loss_value))
|
|
174
|
-
|
|
175
|
-
#penalty functions
|
|
176
|
-
def l2_reg(loss_prev, model, _lambda):
|
|
177
|
-
l2_pen = Values(0.0)
|
|
178
|
-
|
|
179
|
-
for block in model.blocks.arr:
|
|
180
|
-
if isinstance(block, Dense) or isinstance(block, Layer):
|
|
181
|
-
weights, _ = block.params()
|
|
182
|
-
for weis in weights:
|
|
183
|
-
l2_pen = l2_pen + (weis**2).sum()
|
|
184
|
-
return loss_prev + _lambda * l2_pen
|
|
185
|
-
|
|
186
|
-
def l1_reg(loss_prev, model, _lambda):
|
|
187
|
-
l1_pen = Values(0.0)
|
|
188
|
-
|
|
189
|
-
for block in model.blocks.arr:
|
|
190
|
-
if isinstance(block, Dense) or isinstance(block, Layer):
|
|
191
|
-
weights, _ = block.params()
|
|
192
|
-
for weis in weights:
|
|
193
|
-
l1_pen = l1_pen + (weis.abs()).sum()
|
|
194
|
-
return loss_prev + _lambda * l1_pen
|
|
1
|
+
class Layer:
|
|
2
|
+
def __init__(self, input,out,activ="_",rangeW=(-1,1),rangeB=(-1,1)):
|
|
3
|
+
self.weights = Values((rangeW[0]-rangeW[1])*np.random.rand(input,out)+rangeW[1])
|
|
4
|
+
self.bias = Values((rangeB[0]-rangeB[1])*np.random.rand(1,out)+rangeB[1])
|
|
5
|
+
self.activation = activ
|
|
6
|
+
|
|
7
|
+
def __call__(self, x):
|
|
8
|
+
y = x @ self.weights + self.bias
|
|
9
|
+
if self.activation == "_": # No activation function
|
|
10
|
+
return y
|
|
11
|
+
else:
|
|
12
|
+
# Get the method corresponding to the activation string and call it.
|
|
13
|
+
# This will now correctly find methods like y.relu() or y.softmax().
|
|
14
|
+
# If self.activation is not a valid method name, it will raise an AttributeError.
|
|
15
|
+
activation_func = getattr(y, self.activation)
|
|
16
|
+
return activation_func()
|
|
17
|
+
|
|
18
|
+
def params(self):
|
|
19
|
+
return self.weights, self.bias
|
|
20
|
+
|
|
21
|
+
def updateParams(self, l_rate):
|
|
22
|
+
self.weights.vals = self.weights.vals - l_rate * self.weights.grad
|
|
23
|
+
self.bias.vals = self.bias.vals - l_rate * self.bias.grad
|
|
24
|
+
self.weights.grad = self.weights.grad * 0
|
|
25
|
+
self.bias.grad = self.bias.grad * 0
|
|
26
|
+
|
|
27
|
+
class Dense:
|
|
28
|
+
def __init__(self, layNum, inL, midL, outL, activ="_",f_activ="_",rangeW=(-0.1,0.1),rangeB=(-0.1,0.1)):
|
|
29
|
+
if layNum < 1:
|
|
30
|
+
print("Dense can't have 0 layers or below.")
|
|
31
|
+
elif layNum == 1:
|
|
32
|
+
self.seq = Sequence([Layer(inL,outL,f_activ,rangeW,rangeB)])
|
|
33
|
+
else:
|
|
34
|
+
lays = []
|
|
35
|
+
for i in range(layNum):
|
|
36
|
+
if i == 0:
|
|
37
|
+
lays.append(Layer(inL,midL,activ,rangeW,rangeB))
|
|
38
|
+
elif i == layNum-1:
|
|
39
|
+
lays.append(Layer(midL,outL,f_activ,rangeW,rangeB))
|
|
40
|
+
else:
|
|
41
|
+
lays.append(Layer(midL,midL,activ,rangeW,rangeB))
|
|
42
|
+
self.seq = Sequence(lays)
|
|
43
|
+
|
|
44
|
+
def __call__(self, x):
|
|
45
|
+
return self.seq(x)
|
|
46
|
+
|
|
47
|
+
def params(self):
|
|
48
|
+
return self.seq.params()
|
|
49
|
+
def updateParams(self, l_rate):
|
|
50
|
+
self.seq.updateParams(l_rate)
|
|
51
|
+
|
|
52
|
+
class Dropout:
|
|
53
|
+
def __init__(self, size, chance):
|
|
54
|
+
self.size = size
|
|
55
|
+
self.rate = chance
|
|
56
|
+
self.inTrain = False
|
|
57
|
+
|
|
58
|
+
def __call__(self, x):
|
|
59
|
+
r = self.rate
|
|
60
|
+
if r <= 0.0 or not self.inTrain:
|
|
61
|
+
return x
|
|
62
|
+
elif r >= 1.0:
|
|
63
|
+
return Values(np.zeros_like(x.vals))
|
|
64
|
+
|
|
65
|
+
do = Values(np.random.choice([0,1/(1-r)],size=(self.size,),p=[r, 1-r]))
|
|
66
|
+
return x * do
|
|
67
|
+
|
|
68
|
+
def updateParams(self, l_rate): #here so errors don't occur when update params is called for the whole sequence
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
#loss functions
|
|
72
|
+
def cross_entropy_loss(y_true, y_pred):
|
|
73
|
+
loss = -(y_true * y_pred.log()).sum(axis=1).mean()
|
|
74
|
+
return loss
|
|
75
|
+
|
|
76
|
+
def mse_loss(y_true, y_pred):
|
|
77
|
+
loss = ((y_true - y_pred)**2).mean()
|
|
78
|
+
return loss
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class Model:
|
|
84
|
+
def __init__(self, blocks, regu = "", train = True, loss_fn=None, pen_fn = None):
|
|
85
|
+
self.blocks = Sequence(blocks)
|
|
86
|
+
self.regu = regu
|
|
87
|
+
self.inTrain = train
|
|
88
|
+
self.train_loss = []
|
|
89
|
+
self.val_loss = []
|
|
90
|
+
# Set default loss function to cross-entropy if not provided
|
|
91
|
+
if loss_fn is None:
|
|
92
|
+
self.loss_fn = cross_entropy_loss
|
|
93
|
+
else:
|
|
94
|
+
self.loss_fn = loss_fn
|
|
95
|
+
|
|
96
|
+
if pen_fn is None:
|
|
97
|
+
def emptyPenFn(loss_prev, model, _lambda):
|
|
98
|
+
return loss_prev
|
|
99
|
+
pen_fn = emptyPenFn
|
|
100
|
+
self.pen_fn = pen_fn
|
|
101
|
+
|
|
102
|
+
def __call__(self, x):
|
|
103
|
+
x_ = x if isinstance(x, Values) else Values(x)
|
|
104
|
+
return self.blocks(x_)
|
|
105
|
+
|
|
106
|
+
def train(self, epochs, x_t, y_t, x_v, y_v, val_run=1, l_rate=0.01, _lambda=0.1, batch_size = None):
|
|
107
|
+
x_trn = x_t if isinstance(x_t, Values) else Values(x_t)
|
|
108
|
+
y_trn = y_t if isinstance(y_t, Values) else Values(y_t)
|
|
109
|
+
x_vl = x_v if isinstance(x_v, Values) else Values(x_v)
|
|
110
|
+
y_vl = y_v if isinstance(y_v, Values) else Values(y_v)
|
|
111
|
+
x_trn.grad_flag = y_trn.grad_flag = x_vl.grad_flag = y_vl.grad_flag = False
|
|
112
|
+
|
|
113
|
+
for l in self.blocks.arr:
|
|
114
|
+
if isinstance(l, Dropout):
|
|
115
|
+
l.inTrain = True
|
|
116
|
+
|
|
117
|
+
if not batch_size:
|
|
118
|
+
batch_size = len(x_trn.vals)
|
|
119
|
+
|
|
120
|
+
batches = 0
|
|
121
|
+
if len(x_trn.vals) % batch_size == 0:
|
|
122
|
+
batches = int(len(x_trn.vals) / batch_size)
|
|
123
|
+
else:
|
|
124
|
+
batches = int(len(x_trn.vals) / batch_size + 1)
|
|
125
|
+
|
|
126
|
+
bat = np.array(range(batches))
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
loss_strt = len(self.train_loss)
|
|
130
|
+
if loss_strt != 0:
|
|
131
|
+
loss_strt = int(self.train_loss[-1][0] + 1)
|
|
132
|
+
for i in range(epochs):
|
|
133
|
+
if i % val_run == 0:
|
|
134
|
+
for l in self.blocks.arr:
|
|
135
|
+
if isinstance(l, Dropout):
|
|
136
|
+
l.inTrain = False
|
|
137
|
+
y_val_hat = self.__call__(x_vl)
|
|
138
|
+
val_loss_value = self.loss_fn(y_vl, y_val_hat).vals
|
|
139
|
+
print(f"epoch: {i} \t loss: {val_loss_value}")
|
|
140
|
+
self.val_loss.append((loss_strt+i,val_loss_value))
|
|
141
|
+
for l in self.blocks.arr:
|
|
142
|
+
if isinstance(l, Dropout):
|
|
143
|
+
l.inTrain = True
|
|
144
|
+
np.random.shuffle(bat)
|
|
145
|
+
for b in range(batches):
|
|
146
|
+
print(f"\rep{i}: b{b}/{batches}", end="")
|
|
147
|
+
x_train_batch = x_trn[bat[b]*batch_size:(bat[b]+1)*batch_size]
|
|
148
|
+
y_train_batch = y_trn[bat[b]*batch_size:(bat[b]+1)*batch_size]
|
|
149
|
+
|
|
150
|
+
y_hat = self.__call__(x_train_batch)
|
|
151
|
+
|
|
152
|
+
# Calculate loss using the specified loss_fn
|
|
153
|
+
current_loss = self.loss_fn(y_train_batch, y_hat)
|
|
154
|
+
|
|
155
|
+
self.train_loss.append((loss_strt+i + 1.0*b/batches,current_loss.vals))
|
|
156
|
+
penalized_loss = self.pen_fn(current_loss,self,_lambda)
|
|
157
|
+
penalized_loss.grad = np.ones_like(penalized_loss.vals)
|
|
158
|
+
penalized_loss.backward()
|
|
159
|
+
self.blocks.updateParams(l_rate)
|
|
160
|
+
print("\r", end="")
|
|
161
|
+
|
|
162
|
+
for l in self.blocks.arr:
|
|
163
|
+
if isinstance(l, Dropout):
|
|
164
|
+
l.inTrain = False
|
|
165
|
+
|
|
166
|
+
loss_strt = len(self.train_loss)
|
|
167
|
+
if loss_strt != 0:
|
|
168
|
+
loss_strt = int(self.train_loss[-1][0] + 1)
|
|
169
|
+
|
|
170
|
+
y_val_hat = self.__call__(x_vl)
|
|
171
|
+
val_loss_value = self.loss_fn(y_vl, y_val_hat).vals # Use loss_fn for validation too
|
|
172
|
+
print(f"epoch: {epochs} \t loss: {val_loss_value}") # Generic 'loss' instead of 'cross_entropy loss'
|
|
173
|
+
self.val_loss.append((loss_strt,val_loss_value))
|
|
174
|
+
|
|
175
|
+
#penalty functions
|
|
176
|
+
def l2_reg(loss_prev, model, _lambda):
|
|
177
|
+
l2_pen = Values(0.0)
|
|
178
|
+
|
|
179
|
+
for block in model.blocks.arr:
|
|
180
|
+
if isinstance(block, Dense) or isinstance(block, Layer):
|
|
181
|
+
weights, _ = block.params()
|
|
182
|
+
for weis in weights:
|
|
183
|
+
l2_pen = l2_pen + (weis**2).sum()
|
|
184
|
+
return loss_prev + _lambda * l2_pen
|
|
185
|
+
|
|
186
|
+
def l1_reg(loss_prev, model, _lambda):
|
|
187
|
+
l1_pen = Values(0.0)
|
|
188
|
+
|
|
189
|
+
for block in model.blocks.arr:
|
|
190
|
+
if isinstance(block, Dense) or isinstance(block, Layer):
|
|
191
|
+
weights, _ = block.params()
|
|
192
|
+
for weis in weights:
|
|
193
|
+
l1_pen = l1_pen + (weis.abs()).sum()
|
|
194
|
+
return loss_prev + _lambda * l1_pen
|
rb_deeplearning_lib/sequence.py
CHANGED
|
@@ -1,23 +1,23 @@
|
|
|
1
|
-
class Sequence:
|
|
2
|
-
def __init__(self, arr):
|
|
3
|
-
self.arr = arr
|
|
4
|
-
|
|
5
|
-
def __call__(self, x):
|
|
6
|
-
x_i = x
|
|
7
|
-
for item in self.arr:
|
|
8
|
-
x_i = item(x_i)
|
|
9
|
-
return x_i
|
|
10
|
-
|
|
11
|
-
def params(self):
|
|
12
|
-
weis = []
|
|
13
|
-
biases = []
|
|
14
|
-
for l in self.arr:
|
|
15
|
-
w, b = l.params()
|
|
16
|
-
weis.append(w)
|
|
17
|
-
biases.append(b)
|
|
18
|
-
|
|
19
|
-
return weis, biases
|
|
20
|
-
|
|
21
|
-
def updateParams(self, l_rate):
|
|
22
|
-
for l in self.arr:
|
|
23
|
-
l.updateParams(l_rate)
|
|
1
|
+
class Sequence:
|
|
2
|
+
def __init__(self, arr):
|
|
3
|
+
self.arr = arr
|
|
4
|
+
|
|
5
|
+
def __call__(self, x):
|
|
6
|
+
x_i = x
|
|
7
|
+
for item in self.arr:
|
|
8
|
+
x_i = item(x_i)
|
|
9
|
+
return x_i
|
|
10
|
+
|
|
11
|
+
def params(self):
|
|
12
|
+
weis = []
|
|
13
|
+
biases = []
|
|
14
|
+
for l in self.arr:
|
|
15
|
+
w, b = l.params()
|
|
16
|
+
weis.append(w)
|
|
17
|
+
biases.append(b)
|
|
18
|
+
|
|
19
|
+
return weis, biases
|
|
20
|
+
|
|
21
|
+
def updateParams(self, l_rate):
|
|
22
|
+
for l in self.arr:
|
|
23
|
+
l.updateParams(l_rate)
|
|
@@ -1,37 +1,37 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: rb-deeplearning-lib
|
|
3
|
-
Version: 0.0.
|
|
4
|
-
Summary: This is a machine learning--more specifically deep learning--library from my independent study on deep learning. This library is both a result of my learning and a tool for AI development.
|
|
5
|
-
License-Expression: MIT
|
|
6
|
-
Project-URL: Homepage, https://github.com/rylan-berry/DeepLearningIndependentStudy/deeplearning_package
|
|
7
|
-
Requires-Python: >=3.8
|
|
8
|
-
Description-Content-Type: text/markdown
|
|
9
|
-
License-File: LICENSE
|
|
10
|
-
Requires-Dist: numpy>=2.0
|
|
11
|
-
Dynamic: license-file
|
|
12
|
-
|
|
13
|
-
# Deeplearning Package
|
|
14
|
-
|
|
15
|
-
## Overview
|
|
16
|
-
|
|
17
|
-
This package is designed to be similar to the PyTorch system of a building block system. Providing the functions that can be mixed, matched, and customized as pleased for any given model. This library is bare bones and only includes the few methods and ideas I learned about while studying *Deep Learning* by Ian Goodfellow et. al.. AI was used in the project, but it was used sparingly.
|
|
18
|
-
|
|
19
|
-
## Modules
|
|
20
|
-
|
|
21
|
-
This project has three main modules:
|
|
22
|
-
|
|
23
|
-
* `autogradient.py`
|
|
24
|
-
* `sequence.py`
|
|
25
|
-
* `neural_net.py`
|
|
26
|
-
|
|
27
|
-
All of which are automatically part of the initial import of the package.
|
|
28
|
-
|
|
29
|
-
## Making and Running a Model
|
|
30
|
-
|
|
31
|
-
When creating a model, use the Model class, which runs most of the functions included in the package itself. The first argument is a list of layers or blocks, each element is the steps in the network. These steps can be a Dense, Layer, or Dropout blocks (more will be made), a Dense is just multiple layers stacked back to back.
|
|
32
|
-
Training a model is done through: def train(epochs, x\_t, y\_t, x\_v, y\_v, val\_run=1, l\_rate=0.01, \_lambda\=0.1, batch\_size \= None)
|
|
33
|
-
Where epochs is the number of times you train through the data, the \#\_t means training data and \#\_v means validation data, x means input, y means output, val\_run is the epochs between when you want to test the validation data, l\_rate is the learn rate, \_lambda is a hyperparameter that determines the strength of the penalty functions, and batch\_size determines how large batches will be (if the batch size isn’t a multiple of the data size then it will still run, there is just a smaller batch then the others).
|
|
34
|
-
|
|
35
|
-
## Dependencies
|
|
36
|
-
|
|
37
|
-
The auto gradient–which is used for back propagation–relies heavily on **numpy**.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rb-deeplearning-lib
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: This is a machine learning--more specifically deep learning--library from my independent study on deep learning. This library is both a result of my learning and a tool for AI development.
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/rylan-berry/DeepLearningIndependentStudy/tree/main/deeplearning_package
|
|
7
|
+
Requires-Python: >=3.8
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: numpy>=2.0
|
|
11
|
+
Dynamic: license-file
|
|
12
|
+
|
|
13
|
+
# Deeplearning Package
|
|
14
|
+
|
|
15
|
+
## Overview
|
|
16
|
+
|
|
17
|
+
This package is designed to be similar to the PyTorch system of a building block system. Providing the functions that can be mixed, matched, and customized as pleased for any given model. This library is bare bones and only includes the few methods and ideas I learned about while studying *Deep Learning* by Ian Goodfellow et. al.. AI was used in the project, but it was used sparingly.
|
|
18
|
+
|
|
19
|
+
## Modules
|
|
20
|
+
|
|
21
|
+
This project has three main modules:
|
|
22
|
+
|
|
23
|
+
* `autogradient.py`
|
|
24
|
+
* `sequence.py`
|
|
25
|
+
* `neural_net.py`
|
|
26
|
+
|
|
27
|
+
All of which are automatically part of the initial import of the package.
|
|
28
|
+
|
|
29
|
+
## Making and Running a Model
|
|
30
|
+
|
|
31
|
+
When creating a model, use the Model class, which runs most of the functions included in the package itself. The first argument is a list of layers or blocks, each element is the steps in the network. These steps can be a Dense, Layer, or Dropout blocks (more will be made), a Dense is just multiple layers stacked back to back.
|
|
32
|
+
Training a model is done through: def train(epochs, x\_t, y\_t, x\_v, y\_v, val\_run=1, l\_rate=0.01, \_lambda\=0.1, batch\_size \= None)
|
|
33
|
+
Where epochs is the number of times you train through the data, the \#\_t means training data and \#\_v means validation data, x means input, y means output, val\_run is the epochs between when you want to test the validation data, l\_rate is the learn rate, \_lambda is a hyperparameter that determines the strength of the penalty functions, and batch\_size determines how large batches will be (if the batch size isn’t a multiple of the data size then it will still run, there is just a smaller batch then the others).
|
|
34
|
+
|
|
35
|
+
## Dependencies
|
|
36
|
+
|
|
37
|
+
The auto gradient–which is used for back propagation–relies heavily on **numpy**.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
rb_deeplearning_lib/__init__.py,sha256=ocbbPLbgvqPWapKJCbutXdhg2qS2TCeOySmvyLXqJW8,57
|
|
2
|
+
rb_deeplearning_lib/autogradient.py,sha256=ZZdXnyPirx_A3xLexiqQ2p26Iu2zulXTDJyuiNHR3p0,9663
|
|
3
|
+
rb_deeplearning_lib/neural_net.py,sha256=EyPhJaZWV0D_-0gZLY_xo_tGcprdBBc8dzsPByUjx_k,6605
|
|
4
|
+
rb_deeplearning_lib/sequence.py,sha256=qxMiJvadFGXRXF5R3JgnAELzQQUGftpZRIYZ_bYdHt0,440
|
|
5
|
+
rb_deeplearning_lib-0.0.2.dist-info/licenses/LICENSE,sha256=KaPiob6jyYzac2fmuJ_u3xCaXWekJCvppmg95sC76as,1089
|
|
6
|
+
rb_deeplearning_lib-0.0.2.dist-info/METADATA,sha256=wU9M8c80776HGyjZgXXosbEzb02SFAGqcZ2Hv9rQfOw,2311
|
|
7
|
+
rb_deeplearning_lib-0.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
+
rb_deeplearning_lib-0.0.2.dist-info/top_level.txt,sha256=oK1ClPmIXDzAbwPHl69BE88PFjpsYta_a5vNEa5WJoA,20
|
|
9
|
+
rb_deeplearning_lib-0.0.2.dist-info/RECORD,,
|
{rb_deeplearning_lib-0.0.1.dist-info → rb_deeplearning_lib-0.0.2.dist-info}/licenses/LICENSE
RENAMED
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2026 Rylan L Berry
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Rylan L Berry
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
21
|
SOFTWARE.
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
rb_deeplearning_lib/__init__.py,sha256=3pOhAKKTkdfXcPT1mxouH4ZOHfk419-Wnqv24oss6P4,54
|
|
2
|
-
rb_deeplearning_lib/autogradient.py,sha256=woXGKi1EZ1QobvRbPMtj4WQqdkOCr7Xk_KeG8WE1d6k,9388
|
|
3
|
-
rb_deeplearning_lib/neural_net.py,sha256=tMGHeL1VVVTe3D4MzFMxQaz9pPdMA3-gbQbMqkPQzKs,6411
|
|
4
|
-
rb_deeplearning_lib/sequence.py,sha256=As8FiHVrcN2w0xXV8Vt_eHhNh1vDWDM3oIfrZMUkZ5g,417
|
|
5
|
-
rb_deeplearning_lib-0.0.1.dist-info/licenses/LICENSE,sha256=RGN7cN89q7JPnZj-z1KSZT9M3zcJPwjzb-gEOt6SCCA,1069
|
|
6
|
-
rb_deeplearning_lib-0.0.1.dist-info/METADATA,sha256=LDVDxShZ5P5C_rHucP-lsx196KwVLZa9tX2E7k_7nLo,2264
|
|
7
|
-
rb_deeplearning_lib-0.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
-
rb_deeplearning_lib-0.0.1.dist-info/top_level.txt,sha256=oK1ClPmIXDzAbwPHl69BE88PFjpsYta_a5vNEa5WJoA,20
|
|
9
|
-
rb_deeplearning_lib-0.0.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|