neuralnetworknumpy 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,56 @@
1
+ """
2
+ neuralnetworknumpy
3
+ A minimal deep learning framework built using NumPy.
4
+ """
5
+
6
+ __version__ = "0.1.0"
7
+
8
+ # Layers
9
+ from .layers import (
10
+ Layer,
11
+ Dense,
12
+ Activation,
13
+ ReLu,
14
+ Sigmoid,
15
+ Softmax,
16
+ Linear,
17
+ Tanh,
18
+ BatchNorm,
19
+ Dropout,
20
+ )
21
+
22
+ # Model
23
+ from .model import NeuralNetwork
24
+
25
+ # Utils
26
+ from .utils import (
27
+ History,
28
+ Scaler,
29
+ split_train_test,
30
+ split_train_validation,
31
+ )
32
+
33
+ __all__ = [
34
+ # Core
35
+ "NeuralNetwork",
36
+
37
+ # Base
38
+ "Layer",
39
+
40
+ # Layers
41
+ "Dense",
42
+ "Activation",
43
+ "ReLu",
44
+ "Sigmoid",
45
+ "Softmax",
46
+ "Linear",
47
+ "Tanh",
48
+ "BatchNorm",
49
+ "Dropout",
50
+
51
+ # Utilities
52
+ "History",
53
+ "Scaler",
54
+ "split_train_test",
55
+ "split_train_validation",
56
+ ]
@@ -0,0 +1,342 @@
1
+ import numpy as np
2
+
3
+ class Layer:
4
+ def __init__(self):
5
+ # Trainable parameters (some layers won't use them)
6
+ self.W = None
7
+ self.b = None
8
+
9
+ # Gradients
10
+ self.dW = None
11
+ self.db = None
12
+
13
+ # Optimizer states
14
+ self.vW = None
15
+ self.vb = None
16
+ self.mW = None
17
+ self.mb = None
18
+
19
+ # Forward pass values
20
+ self.A = None
21
+ self.A_prev = None # Input to this layer
22
+
23
+ # Backprop pass values
24
+ self.Z = None
25
+
26
+ # Forward and backward are abstract methods — override in subclasses
27
+ def _forward(self, A_prev, training=None):
28
+ raise NotImplementedError
29
+
30
+ def _backward(self, dA):
31
+ raise NotImplementedError
32
+
33
+ def _update(self, lambda_, lr, beta1, beta2, _eps, optimizer, t):
34
+ raise NotImplementedError
35
+
36
+
37
+ class Dense(Layer):
38
+ def __init__(self, units: int, inputs:int=0, kernel_initializer: str=None):
39
+ super().__init__()
40
+
41
+ self.units = units
42
+ self.in_size = inputs
43
+ self.out_size = units
44
+ self.kernel_initializer = kernel_initializer
45
+
46
+ if not kernel_initializer:
47
+ self._set_default_initializers()
48
+
49
+
50
+ def _set_default_initializers(self):
51
+ self.initializer = "he"
52
+ """if self.activation == "relu":
53
+ self.initializer = "he"
54
+ elif self.activation in ["sigmoid", "tanh", "softmax", "linear"]:
55
+ self.initializer = "xavier"
56
+ else:
57
+ raise Exception("Invalid activation function")"""
58
+
59
+ def build(self, input_size):
60
+ self.in_size = input_size
61
+ self._initialize_weights()
62
+
63
+ self.b = np.zeros((self.out_size, 1), dtype=np.float32)
64
+
65
+ # Optimizer state
66
+ self.vW = np.zeros_like(self.W).astype(np.float32)
67
+ self.vb = np.zeros_like(self.b).astype(np.float32)
68
+ self.mW = np.zeros_like(self.W).astype(np.float32)
69
+ self.mb = np.zeros_like(self.b).astype(np.float32)
70
+
71
+ def _initialize_weights(self):
72
+
73
+ if self.initializer == "he":
74
+ # W ~ N(0, √(2/in))
75
+ std = np.sqrt(2.0 / self.in_size)
76
+ self.W = np.random.randn(self.out_size, self.in_size).astype(np.float32) * std
77
+
78
+ elif self.initializer == "xavier":
79
+ # U(-√(6/(in+out)),√(6/(in+out)))
80
+ limit = np.sqrt(6.0 / (self.in_size + self.out_size))
81
+ self.W = np.random.uniform(-limit, limit, (self.out_size, self.in_size)).astype(np.float32)
82
+
83
+ else:
84
+ self.W = np.random.randn(self.out_size, self.in_size).astype(np.float32) * 0.01
85
+
86
+ def _forward(self, A_prev, training=None):
87
+ if self.W is None:
88
+ self.build(A_prev.shape[0])
89
+
90
+ self.A_prev = A_prev
91
+ self.Z = np.dot(self.W, A_prev) + self.b
92
+
93
+ return self.Z
94
+
95
+ def _backward(self, dA, skip_activation=False):
96
+
97
+ # dW_i = dZ_i · A_{i}^T
98
+ # Gradient of the loss w.r.t. weights of layer i
99
+ self.dW = np.dot(dA, self.A_prev.T)
100
+
101
+ # dB_i = sum(dZ_i) over the batch
102
+ # Gradient of the loss w.r.t. biases of layer i
103
+ self.db = np.sum(dA, axis=1, keepdims=True)
104
+
105
+ # Gradient to pass backward
106
+ # dA_prev_i = W_i · dA_i
107
+ return np.dot(self.W.T, dA)
108
+
109
+
110
+ def _update(self, lambda_, lr, beta1, beta2, _eps, optimizer, t):
111
+
112
+ if optimizer == "adamW":
113
+ dw = self.dW # pure gradient
114
+ else:
115
+ # Get batch size from the last dZ calculation to scale regularization
116
+ m = self.A.shape[1]
117
+ dw = self.dW + (lambda_ / m) * self.W # L2 regularization
118
+
119
+ if optimizer == "momentum":
120
+ self.vW = beta1 * self.vW + dw
121
+ self.vb = beta1 * self.vb + self.db
122
+
123
+ update_w = self.vW
124
+ update_b = self.vb
125
+
126
+ elif optimizer == "adam" or optimizer == "adamW":
127
+
128
+ self.mW = beta1 * self.mW + (1 - beta1) * dw
129
+ self.mb = beta1 * self.mb + (1 - beta1) * self.db
130
+
131
+ self.vW = beta2 * self.vW + (1 - beta2) * (dw ** 2)
132
+ self.vb = beta2 * self.vb + (1 - beta2) * (self.db ** 2)
133
+
134
+ m_w_hat = self.mW / (1 - beta1 ** t)
135
+ m_b_hat = self.mb / (1 - beta1 ** t)
136
+
137
+ v_w_hat = self.vW / (1 - beta2 ** t)
138
+ v_b_hat = self.vb / (1 - beta2 ** t)
139
+
140
+
141
+ update_w = m_w_hat / (np.sqrt(v_w_hat) + _eps)
142
+ update_b = m_b_hat / (np.sqrt(v_b_hat) + _eps)
143
+
144
+ elif optimizer == "rmsprop":
145
+ self.vW = beta2 * self.vW + (1 - beta2) * (dw ** 2)
146
+ self.vb = beta2 * self.vb + (1 - beta2) * (self.db ** 2)
147
+
148
+ update_w = dw / (np.sqrt(self.vW) + _eps)
149
+ update_b = self.db / (np.sqrt(self.vb) + _eps)
150
+
151
+ else:
152
+ # Classic SGD
153
+ update_w = dw
154
+ update_b = self.db
155
+
156
+
157
+ # W = W - lr * (dW + λ * W)
158
+ self.W -= lr * update_w
159
+ self.b -= lr * update_b
160
+
161
+ # Decouple (adamW) Weight Decay
162
+ if optimizer == "adamW":
163
+ self.W *= (1 - lr * lambda_) # Decoupled weight decay
164
+
165
+
166
+ class Activation(Layer):
167
+ def __init__(self):
168
+ super().__init__()
169
+
170
+ def _update(self, *args, **kwargs):
171
+ pass # no parameters to update
172
+
173
+ class ReLu(Activation):
174
+
175
+ def _forward(self, Z, training=None):
176
+ self.Z = Z
177
+ self.A = np.maximum(0, Z)
178
+ return self.A
179
+
180
+ def _backward(self, dA):
181
+ return dA * (self.Z > 0)
182
+
183
+ class Sigmoid(Activation):
184
+
185
+ def _forward(self, Z, training=None):
186
+ self.Z = Z
187
+ self.A = 1 / (1 + np.exp(-Z))
188
+ return self.A
189
+
190
+ def _backward(self, dA):
191
+ return dA * self.A * (1 - self.A)
192
+
193
+ class Softmax(Activation):
194
+
195
+ def _forward(self, Z, training=None):
196
+ self.Z = Z
197
+ shifted = Z - np.max(Z, axis=0, keepdims=True)
198
+ exp_vals = np.exp(shifted)
199
+ self.A = exp_vals / np.sum(exp_vals, axis=0, keepdims=True)
200
+ return self.A
201
+
202
+ def _backward(self, dA):
203
+ s = np.sum(dA * self.A, axis=0, keepdims=True)
204
+ return self.A * (dA - s)
205
+
206
+ class Linear(Activation):
207
+
208
+ def _forward(self, Z, training=None):
209
+ self.Z = Z
210
+ self.A = Z
211
+ return self.A
212
+
213
+ def _backward(self, dA):
214
+ return dA # derivative is 1
215
+
216
+ class Tanh(Activation):
217
+
218
+ def _forward(self, Z, training=None):
219
+ self.Z = Z
220
+ self.A = np.tanh(Z)
221
+ return self.A
222
+
223
+ def _backward(self, dA):
224
+ return dA * (1 - self.A ** 2)
225
+
226
+
227
+
228
+ class BatchNorm(Layer):
229
+ def __init__(self, momentum=0.9):
230
+ super().__init__()
231
+ self.momentum = momentum
232
+ self._eps = 1e-08
233
+
234
+ self.gamma = None
235
+ self.beta = None
236
+
237
+ self.running_mean = None
238
+ self.running_var = None
239
+
240
+ def build(self, input_size):
241
+ self.gamma = np.ones((input_size, 1), dtype=np.float32)
242
+ self.beta = np.zeros((input_size, 1), dtype=np.float32)
243
+
244
+ self.running_mean = np.zeros((input_size, 1), dtype=np.float32)
245
+ self.running_var = np.ones((input_size, 1), dtype=np.float32)
246
+
247
+ def _forward(self, A_prev, training=True):
248
+ if self.gamma is None:
249
+ self.build(A_prev.shape[0])
250
+
251
+ self.A_prev = A_prev
252
+
253
+ if training:
254
+ self.mean = np.mean(A_prev, axis=1, keepdims=True)
255
+ self.var = np.var(A_prev, axis=1, keepdims=True)
256
+
257
+ # X̂ = (X - μB) / √(σB^2 + ε)
258
+ self.X_hat = (A_prev - self.mean) / np.sqrt(self.var + self._eps)
259
+ # A = γ * X̂ + β
260
+ self.A = self.gamma * self.X_hat + self.beta
261
+
262
+ # Update running stats
263
+ self.running_mean = (
264
+ self.momentum * self.running_mean
265
+ + (1 - self.momentum) * self.mean
266
+ )
267
+
268
+ self.running_var = (
269
+ self.momentum * self.running_var
270
+ + (1 - self.momentum) * self.var
271
+ )
272
+
273
+ else:
274
+ self.X_hat = (A_prev - self.running_mean) / np.sqrt(self.running_var + self._eps)
275
+ self.A = self.gamma * self.X_hat + self.beta
276
+
277
+ return self.A
278
+
279
+ def _backward(self, dA, skip_activation=False):
280
+
281
+ m = dA.shape[1]
282
+
283
+ dgamma = np.sum(dA * self.X_hat, axis=1, keepdims=True)
284
+ dbeta = np.sum(dA, axis=1, keepdims=True)
285
+
286
+ dX_hat = dA * self.gamma
287
+
288
+ var_inv = 1. / np.sqrt(self.var + self._eps)
289
+
290
+ dvar = np.sum(dX_hat * (self.A_prev - self.mean) * -0.5 * var_inv**3,
291
+ axis=1, keepdims=True)
292
+
293
+ dmean = (
294
+ np.sum(dX_hat * -var_inv, axis=1, keepdims=True)
295
+ + dvar * np.mean(-2. * (self.A_prev - self.mean), axis=1, keepdims=True)
296
+ )
297
+
298
+ dX = (
299
+ dX_hat * var_inv
300
+ + dvar * 2 * (self.A_prev - self.mean) / m
301
+ + dmean / m
302
+ )
303
+
304
+ self.dgamma = dgamma / m
305
+ self.dbeta = dbeta / m
306
+
307
+ return dX
308
+
309
+
310
+
311
+ def _update(self, lambda_, lr, beta1, beta2, eps, optimizer, t):
312
+ self.gamma -= lr * self.dgamma
313
+ self.beta -= lr * self.dbeta
314
+
315
+ class Dropout(Layer):
316
+ def __init__(self, rate):
317
+ super().__init__()
318
+ self.rate = rate # probability of dropping a unit
319
+ self.mask = None
320
+
321
+ def _forward(self, A_prev, training=True):
322
+ if not training:
323
+ # No dropout during inference
324
+ self.mask = np.ones_like(A_prev)
325
+ self.A = A_prev
326
+ return self.A
327
+
328
+ # Create dropout mask
329
+ self.mask = np.random.rand(A_prev.shape[0], A_prev.shape[1]) > self.rate
330
+ # Apply mask AND scale (inverted dropout)
331
+ self.A = (A_prev * self.mask) / (1 - self.rate)
332
+
333
+ return self.A
334
+
335
+ def _backward(self, dA, skip_activation=False):
336
+ # Backprop only through active neurons
337
+ dA_prev = (dA * self.mask) / (1 - self.rate)
338
+ return dA_prev
339
+
340
+ def _update(self, lambda_, lr, beta1, beta2, _eps, optimizer, t):
341
+ # Dropout layer has no trainable parameters
342
+ pass
@@ -0,0 +1,506 @@
1
+ import numpy as np
2
+ from tqdm.auto import tqdm
3
+
4
+ from .layers import Dropout, Activation, BatchNorm, Dense
5
+ from .utils import History
6
+
7
+ class NeuralNetwork:
8
+ def __init__(self, layers:list):
9
+
10
+ self.num_classes = 0
11
+ self.layers = layers
12
+
13
+ self._eps = 1e-08 # Avoid dividing by zero
14
+ self.lr = 0.001
15
+ self.lambda_ = 0.0
16
+ self.beta1 = 0.9
17
+ self.beta2 = 0.999
18
+ self.loss_type = "cross_entropy"
19
+ self.optimizer = "adam"
20
+
21
+ def save(self, path):
22
+ layer_data = []
23
+
24
+ for layer in self.layers:
25
+ entry = {"type": type(layer).__name__}
26
+
27
+ if isinstance(layer, Dense):
28
+ entry.update({
29
+ "W": layer.W,
30
+ "b": layer.b,
31
+ "in_size": layer.in_size,
32
+ "out_size": layer.out_size,
33
+ "initializer": layer.initializer,
34
+ })
35
+ elif isinstance(layer, BatchNorm):
36
+ entry.update({
37
+ "gamma": layer.gamma,
38
+ "beta": layer.beta,
39
+ "running_mean": layer.running_mean,
40
+ "running_var": layer.running_var,
41
+ "momentum": layer.momentum,
42
+ })
43
+ elif isinstance(layer, Dropout):
44
+ entry["rate"] = layer.rate
45
+
46
+ # Activation layers (ReLu, Sigmoid, etc.) need no extra data
47
+
48
+ layer_data.append(entry)
49
+
50
+ np.savez(
51
+ path,
52
+ layers=np.array(layer_data, dtype=object),
53
+ lr=self.lr,
54
+ lambda_=self.lambda_,
55
+ beta1=self.beta1,
56
+ beta2=self.beta2,
57
+ loss_type=self.loss_type,
58
+ optimizer=self.optimizer,
59
+ num_classes=self.num_classes,
60
+ )
61
+
62
+
63
+ @staticmethod
64
+ def load(path):
65
+ from .layers import Dense, BatchNorm, Dropout, ReLu, Sigmoid, Softmax, Tanh, Linear
66
+
67
+ ACTIVATION_MAP = {
68
+ "ReLu": ReLu,
69
+ "Sigmoid": Sigmoid,
70
+ "Softmax": Softmax,
71
+ "Tanh": Tanh,
72
+ "Linear": Linear,
73
+ }
74
+
75
+ data = np.load(path, allow_pickle=True)
76
+ layers = []
77
+
78
+ for entry in data["layers"]:
79
+ layer_type = entry["type"]
80
+
81
+ if layer_type == "Dense":
82
+ layer = Dense(units=entry["out_size"])
83
+ layer.W = entry["W"]
84
+ layer.b = entry["b"]
85
+ layer.in_size = entry["in_size"]
86
+ layer.out_size = entry["out_size"]
87
+ layer.initializer = entry["initializer"]
88
+ # Restore optimizer states as zeros (not serialized)
89
+ layer.vW = np.zeros_like(layer.W)
90
+ layer.vb = np.zeros_like(layer.b)
91
+ layer.mW = np.zeros_like(layer.W)
92
+ layer.mb = np.zeros_like(layer.b)
93
+
94
+ elif layer_type == "BatchNorm":
95
+ layer = BatchNorm(momentum=entry["momentum"])
96
+ layer.gamma = entry["gamma"]
97
+ layer.beta = entry["beta"]
98
+ layer.running_mean = entry["running_mean"]
99
+ layer.running_var = entry["running_var"]
100
+
101
+ elif layer_type == "Dropout":
102
+ layer = Dropout(rate=entry["rate"])
103
+
104
+ elif layer_type in ACTIVATION_MAP:
105
+ layer = ACTIVATION_MAP[layer_type]()
106
+
107
+ else:
108
+ raise ValueError(f"Unknown layer type: {layer_type}")
109
+
110
+ layers.append(layer)
111
+
112
+ model = NeuralNetwork(layers)
113
+ model.lr = data["lr"].item()
114
+ model.lambda_ = data["lambda_"].item()
115
+ model.beta1 = data["beta1"].item()
116
+ model.beta2 = data["beta2"].item()
117
+ model.loss_type = data["loss_type"].item()
118
+ model.optimizer = data["optimizer"].item()
119
+ model.num_classes = data["num_classes"].item()
120
+
121
+ return model
122
+
123
+
124
+ def summary(self):
125
+ print("=" * 55)
126
+ print("Model Summary")
127
+ print("=" * 55)
128
+
129
+ total_params = 0
130
+
131
+ for i, layer in enumerate(self.layers):
132
+ layer_type = type(layer).__name__
133
+
134
+ if isinstance(layer, Dense):
135
+ params = layer.W.size + layer.b.size if layer.W is not None else 0
136
+ total_params += params
137
+ built = f"{layer.in_size} → {layer.out_size}"
138
+ print(f"[{i+1}] Dense {built:<20} params: {params}")
139
+
140
+ elif isinstance(layer, BatchNorm):
141
+ params = layer.gamma.size + layer.beta.size if layer.gamma is not None else 0
142
+ total_params += params
143
+ print(f"[{i+1}] BatchNorm momentum={layer.momentum:<13} params: {params}")
144
+
145
+ elif isinstance(layer, Dropout):
146
+ print(f"[{i+1}] Dropout rate={layer.rate}")
147
+
148
+ elif isinstance(layer, Activation):
149
+ print(f"[{i+1}] {layer_type:<15}")
150
+
151
+ print("-" * 55)
152
+
153
+ print(f"Total trainable parameters: {total_params}")
154
+ print("=" * 55)
155
+
156
+
157
+ # One hot encoding for y_true - convert format into a matrix for calculations
158
+ def _one_hot_encoding(self, y):
159
+ one_hot_y = np.zeros((y.size, self.num_classes))
160
+ one_hot_y[np.arange(y.size), y] = 1
161
+ return one_hot_y.T
162
+
163
+ """ **********************************************************
164
+ Model Algorithms
165
+ ********************************************************** """
166
+
167
+ # Forward function - feed input and get prediction
168
+ def _forward(self, X, training=True):
169
+ # Z = W * A + B
170
+ # A - output (after activation function)
171
+ # Also next layer input
172
+ for layer in self.layers:
173
+ X = layer._forward(X, training=training)
174
+ return X
175
+
176
+
177
+ # Backward function - locates the origin of the loss and tweaks it
178
+ def _backward(self, y_true):
179
+ m = y_true.size
180
+
181
+
182
+ dA = self._loss_derivative(self.layers[-1].A, y_true) / m
183
+ dA = self.layers[-1]._backward(dA)
184
+
185
+ # Remaining layers
186
+ # Backpropagation: iterate layers in reverse order (from last to first)
187
+ # Compute gradients dW and dB for each layer
188
+ for layer in reversed(self.layers[:-1]):
189
+ # dA = ∂J/∂A_L
190
+ # This is the derivative of the loss w.r.t. the network output
191
+ dA = layer._backward(dA)
192
+
193
+
194
+ # Loss derivative - for the last layer based on the loss-type
195
+ def _loss_derivative(self, y_pred, y_true):
196
+ one_hot = self._one_hot_encoding(y_true) # y_true formatting
197
+ # Division by m happens in backward function
198
+ if self.loss_type == "cross_entropy":
199
+ # - y_true / y_pred
200
+ return -(one_hot / (y_pred + self._eps))
201
+
202
+ elif self.loss_type == "mse":
203
+ # 2 * (y_pred - y_true)
204
+ return 2 * (y_pred - one_hot)
205
+
206
+ else:
207
+ raise Exception("Invalid loss function")
208
+
209
+
210
+ # Compute loss for logging
211
+ def _compute_loss(self, y_pred, y_true):
212
+ one_hot = self._one_hot_encoding(y_true) # shape: (num_classes, N)
213
+ m = y_true.size
214
+
215
+ if self.loss_type == "cross_entropy":
216
+ # -1/m * ∑ (y_true * log(y_pred))
217
+ data_loss = -1 * np.sum(one_hot * np.log(y_pred + self._eps)) / m
218
+ elif self.loss_type == "mse":
219
+ # 1/m * ∑ ((y_pred - y_true)^2)
220
+ data_loss = np.mean((y_pred - one_hot) ** 2)
221
+ else:
222
+ raise Exception("Invalid loss function")
223
+
224
+ # L2 regularization term: (λ / 2m) * sum(||W||^2)
225
+ reg_loss = 0.0
226
+ for layer in self.layers:
227
+ if not isinstance(layer, (Dropout, Activation, BatchNorm)):
228
+ reg_loss += np.sum(layer.W ** 2)
229
+
230
+ reg_loss = (self.lambda_ / (2 * m)) * reg_loss
231
+
232
+ return data_loss + reg_loss
233
+
234
+
235
+ # Update weights and biases
236
+ # lr = learning rate
237
+ # high lr - impact the model fast, can overshoot
238
+ # low lr - learns slower, wont overshoot
239
+ # Lambda λ = Counter overfitting by punishing big weights
240
+ # Force weights to be small but not zero (w = 0 -> no impact on the model)
241
+ # Beta1 = momentum factor
242
+ # Beta2 = RSMprop factor
243
+ def _update(self, optimizer_t):
244
+
245
+ for layer in self.layers:
246
+ layer._update(self.lambda_, self.lr, self.beta1, self.beta2, self._eps, self.optimizer, optimizer_t)
247
+
248
+ """ **********************************************************
249
+ Metrics
250
+ ********************************************************** """
251
+
252
+ # Calculate model accuracy
253
+ @staticmethod
254
+ def accuracy(predictions, y):
255
+ return np.sum(predictions==y) / y.size
256
+
257
+
258
+ # Calculate model precision
259
+ @staticmethod
260
+ def precision(predictions, y, num_classes):
261
+ precisions = []
262
+
263
+ for c in range(num_classes):
264
+ tp = np.sum((predictions == c) & (y == c))
265
+ fp = np.sum((predictions == c) & (y != c))
266
+
267
+ precisions.append(tp / (tp + fp + 1e-8))
268
+
269
+ return np.mean(precisions)
270
+
271
+
272
+ # Calculate model recall
273
+ @staticmethod
274
+ def recall(predictions, y, num_classes):
275
+ recalls = []
276
+
277
+ for c in range(num_classes):
278
+ tp = np.sum((predictions == c) & (y == c))
279
+ fn = np.sum((predictions != c) & (y == c))
280
+
281
+ recalls.append(tp / (tp + fn + 1e-8))
282
+
283
+ return np.mean(recalls)
284
+
285
+
286
+ # Calculate model f1
287
+ @staticmethod
288
+ def f1(predictions, y, num_classes):
289
+ precision = NeuralNetwork.precision(predictions, y, num_classes)
290
+ recall = NeuralNetwork.recall(predictions, y, num_classes)
291
+ return 2 * (precision * recall) / (precision + recall + 1e-8)
292
+
293
+
294
+ def calc_metrics(self, history:History, y_pred, y_true, metrics=None):
295
+ if metrics is None:
296
+ metrics = []
297
+ for metric in metrics:
298
+ if metric == "accuracy":
299
+ accuracy = NeuralNetwork.accuracy(y_pred, y_true)
300
+ history.add("accuracy", accuracy)
301
+ if metric == "precision":
302
+ precision = NeuralNetwork.precision(y_pred, y_true, self.num_classes)
303
+ history.add("precision", precision)
304
+ if metric == "recall":
305
+ recall = NeuralNetwork.recall(y_pred, y_true, self.num_classes)
306
+ history.add("recall", recall)
307
+ if metric == "f1":
308
+ f1 = NeuralNetwork.f1(y_pred, y_true, self.num_classes)
309
+ history.add("f1", f1)
310
+
311
+ return history
312
+
313
+ """ **********************************************************
314
+ Runtime functions
315
+ ********************************************************** """
316
+
317
+ # Converts final layer activation to predicted class labels
318
+ @staticmethod
319
+ def _decode_output(output):
320
+ if output.shape[0] == 1: # Binary classification
321
+ return (output > 0.5).astype(int).flatten()
322
+ else: # Multi-class (softmax)
323
+ return np.argmax(output, axis=0)
324
+
325
+
326
+ @staticmethod
327
+ def shuffle_data(x, y):
328
+ perm = np.random.permutation(y.size)
329
+ x = x[:, perm]
330
+ y = y[perm]
331
+ return x, y
332
+
333
+ @staticmethod
334
+ def set_seed(seed):
335
+ np.random.seed(seed)
336
+
337
+
338
+ def check_gradient(self, X, y):
339
+ assert X.shape[1] == y.size, f"X has {X.shape[1]} samples but y has {y.size}"
340
+
341
+ # Use a small batch to avoid numerical issues
342
+ X = X[:, :8].astype(np.float64) # <-- float64 is critical for numerical grad
343
+ y = y[:8]
344
+
345
+ rel_diff = []
346
+ original_lambda = self.lambda_
347
+ self.lambda_ = 0.0
348
+ epsilon = 1e-5 # smaller epsilon can help
349
+
350
+ self._forward(X, training=False)
351
+ self._backward(y)
352
+
353
+ # Snapshot ALL analytical gradients before any weight perturbation
354
+ analytical_grads = {}
355
+ for idx, layer in enumerate(self.layers):
356
+ if hasattr(layer, 'dW') and layer.dW is not None:
357
+ analytical_grads[idx] = layer.dW.copy() # <-- copy before perturbation
358
+
359
+ for idx, layer in enumerate(self.layers):
360
+ if hasattr(layer, 'W') and layer.W is not None:
361
+ i = np.random.randint(0, layer.W.shape[0])
362
+ j = np.random.randint(0, layer.W.shape[1])
363
+
364
+ W_orig = layer.W[i, j]
365
+ grad_analytical = analytical_grads[idx][i, j]
366
+
367
+ layer.W[i, j] = W_orig + epsilon
368
+ y_pred = self._forward(X, training=False)
369
+ loss_plus = self._compute_loss(y_pred, y)
370
+
371
+ layer.W[i, j] = W_orig - epsilon
372
+ y_pred = self._forward(X, training=False)
373
+ loss_minus = self._compute_loss(y_pred, y)
374
+
375
+ grad_numerical = (loss_plus - loss_minus) / (2 * epsilon)
376
+ layer.W[i, j] = W_orig
377
+
378
+ numerator = abs(grad_numerical - grad_analytical)
379
+ denominator = abs(grad_numerical) + abs(grad_analytical) + 1e-10
380
+ rel_diff.append(numerator / denominator)
381
+
382
+ print(f"Layer {idx} W[{i},{j}] Numerical: {grad_numerical:.10f} Analytical: {grad_analytical:.10f} Rel diff: {rel_diff[-1]:.2e}")
383
+
384
+ self.lambda_ = original_lambda
385
+ return rel_diff
386
+
387
+
388
+ # Compile the model and trains it
389
+ # x - input features
390
+ # y - labels
391
+ # lr - learning rate
392
+ # epochs - num of iterations over the data
393
+ # batch_size - data size before model updating
394
+ # loss_type - loss functions used
395
+ # lambda_ - lambda used for preventing weights overfitting
396
+ def gradient_descent(self, X, y, X_val=None, y_val=None, epochs=10, batch_size=1):
397
+ X = X.astype(np.float32)
398
+ history = History()
399
+
400
+ optimizer_t = 0
401
+
402
+ stopping_patience = 5
403
+
404
+ for ep in range(epochs):
405
+ predictions = []
406
+ epoch_loss = 0
407
+
408
+ x_shuffled, y_shuffled = NeuralNetwork.shuffle_data(X, y)
409
+
410
+ # Batches
411
+ # tqdm - loading animation
412
+ for i in tqdm(range(0, x_shuffled.shape[1], batch_size)):
413
+
414
+ optimizer_t += 1
415
+
416
+ # get batch
417
+ x_batch = x_shuffled[:, i:i+batch_size]
418
+ y_batch = y_shuffled[i:i+batch_size]
419
+ # feed model
420
+ self._forward(x_batch)
421
+ self._backward(y_batch)
422
+ self._update(optimizer_t)
423
+
424
+ y_pred = self.layers[-1].A
425
+ # Monitor loss - epoch_loss = Avg(batches_loss)
426
+ predictions.append(self._decode_output(y_pred))
427
+ batch_loss = self._compute_loss(y_pred, y_batch)
428
+ epoch_loss += batch_loss * x_batch.shape[1] / X.shape[1]
429
+
430
+ # Check Gradient - make sure backpropagation works well
431
+ #self.check_gradient(x_batch, y_batch)
432
+
433
+ predictions = np.concatenate(predictions)
434
+
435
+ history.add("epoch", ep)
436
+ history.add("loss", epoch_loss)
437
+ history = self.calc_metrics(history, predictions, y_shuffled, metrics=["accuracy", "precision", "recall"])
438
+
439
+ # Validation
440
+ if X_val is not None and y_val is not None:
441
+ val_pred = self.predict_proba(X_val)
442
+ val_loss = self._compute_loss(val_pred, y_val)
443
+ val_acc = NeuralNetwork.accuracy(self._decode_output(val_pred), y_val)
444
+ history.add("val_loss", val_loss)
445
+ history.add("val_accuracy", val_acc)
446
+
447
+ history.progress()
448
+
449
+ if len(history.history["val_loss"]) > 1:
450
+ if history.history["val_loss"][-1] > history.history["val_loss"][-2]:
451
+ stopping_patience -= 1
452
+ if stopping_patience == 0:
453
+ print("Early stopping")
454
+ break
455
+ else:
456
+ history.progress()
457
+
458
+ return history
459
+
460
+
461
+ """ **********************************************************
462
+ Model API functions
463
+ ********************************************************** """
464
+
465
+ # Configure training hyperparameters and optimization settings
466
+ def compile(self, loss_type="cross_entropy", optimizer="adam", lr=0.001, lambda_=0.0, beta1=0.9, beta2=0.999):
467
+ self.lr = lr
468
+ self.lambda_ = lambda_
469
+ self.beta1 = beta1
470
+ self.beta2 = beta2
471
+ self.loss_type = loss_type
472
+ self.optimizer = optimizer
473
+
474
+
475
+ # Train the model
476
+ def fit(self, X, y, X_val=None, y_val=None, epochs=10, batch_size=1):
477
+ if X.shape[1] != y.size:
478
+ raise ValueError("Mismatch between samples and labels")
479
+
480
+ # Find last layer - last outsize is num classes
481
+ for layer in reversed(self.layers):
482
+ if hasattr(layer, "out_size"):
483
+ self.num_classes = layer.out_size
484
+ break
485
+ return self.gradient_descent(X, y, X_val=X_val, y_val=y_val, epochs=epochs, batch_size=batch_size)
486
+
487
+
488
+ # Add layers to model
489
+ def add(self, layer):
490
+ self.layers.append(layer)
491
+
492
+
493
+ # Return predicted class labels for input data
494
+ def predict(self, X):
495
+ return self._decode_output(self.predict_proba(X))
496
+
497
+
498
+ # Return raw output activations (probabilities or scores)
499
+ def predict_proba(self, X):
500
+ return self._forward(X, training=False)
501
+
502
+
503
+ # Evaluate model performance (accuracy) on given dataset
504
+ def evaluate(self, X, y):
505
+ predictions = self.predict(X)
506
+ return NeuralNetwork.accuracy(predictions, y)
@@ -0,0 +1,87 @@
1
+ import numpy as np
2
+
3
+ class History:
4
+ def __init__(self):
5
+ self.history = {}
6
+
7
+ def add(self, key, value):
8
+ if key not in self.history:
9
+ self.history[key] = []
10
+ self.history[key].append(value)
11
+
12
+ def progress(self):
13
+ for key, value in self.history.items():
14
+ print(f"{key}: {value[-1]}")
15
+
16
+
17
+
18
+ class Scaler:
19
+ def __init__(self, mode="standard"):
20
+ """
21
+ mode: "standard" for Z-score (Mean=0, Std=1)
22
+ "minmax" for range scaling (0 to 1)
23
+ """
24
+ self.mode = mode
25
+ self.mean = None
26
+ self.std = None
27
+ self.min = None
28
+ self.max = None
29
+
30
+ def fit(self, X):
31
+ """Calculates parameters from training data. X shape: (features, samples)"""
32
+ if self.mode == "standard":
33
+ self.mean = np.mean(X, axis=1, keepdims=True)
34
+ self.std = np.std(X, axis=1, keepdims=True)
35
+ self.std[self.std == 0] = 1e-8 # Avoid division by zero
36
+
37
+ elif self.mode == "minmax":
38
+ self.min = np.min(X, axis=1, keepdims=True)
39
+ self.max = np.max(X, axis=1, keepdims=True)
40
+ # Avoid division by zero if all values in a feature are the same
41
+ self.diff = self.max - self.min
42
+ self.diff[self.diff == 0] = 1e-8
43
+
44
+ def transform(self, X):
45
+ """Applies scaling to data using fitted parameters."""
46
+ if self.mode == "standard":
47
+ return (X - self.mean) / self.std
48
+ elif self.mode == "minmax":
49
+ return (X - self.min) / self.diff
50
+ else:
51
+ raise NotImplementedError
52
+
53
+ def fit_transform(self, X):
54
+ self.fit(X)
55
+ return self.transform(X)
56
+
57
+ def split_train_test(X, y, test_ratio=0.2):
58
+ m = X.shape[1]
59
+ perm = np.random.permutation(m)
60
+
61
+ X = X[:, perm]
62
+ y = y[perm]
63
+
64
+ test_size = int(m * test_ratio)
65
+
66
+ X_test = X[:, :test_size]
67
+ y_test = y[:test_size]
68
+
69
+ X_train = X[:, test_size:]
70
+ y_train = y[test_size:]
71
+
72
+ return X_train, y_train, X_test, y_test
73
+
74
+
75
+ def split_train_validation(X, y, val_ratio=0.2):
76
+ m = X.shape[0]
77
+ perm = np.random.permutation(m)
78
+ X = X[perm]
79
+ y = y[perm]
80
+
81
+ val_size = int(m * val_ratio)
82
+ X_val = X[:val_size]
83
+ y_val = y[:val_size]
84
+ X_train = X[val_size:]
85
+ y_train = y[val_size:]
86
+
87
+ return X_train, y_train, X_val, y_val
@@ -0,0 +1,150 @@
1
+ Metadata-Version: 2.4
2
+ Name: neuralnetworknumpy
3
+ Version: 0.1.0
4
+ Summary: A neural network framework built completely from scratch using NumPy
5
+ Author: Itamar Senderovitz
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/Sendy45/NeuralNetworkFromScratch
8
+ Keywords: neural network,deep learning,machine learning,numpy
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.8
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Requires-Python: >=3.8
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: numpy>=1.21
18
+ Requires-Dist: tqdm>=4.60
19
+
20
+ # NeuralNetworkFromScratch
21
+
22
+ A lightweight Python library implementing a fully functional neural network **from scratch using NumPy**, without relying on machine learning frameworks such as TensorFlow or PyTorch.
23
+
24
+ The goal of this project is to provide a clear and educational implementation of neural networks, including forward propagation, backpropagation, normalization, and regularization techniques.
25
+
26
+ ---
27
+
28
+ ## Features
29
+
30
+ * Fully connected neural network implementation
31
+ * Modular layer system
32
+ * Forward and backward propagation
33
+ * Batch normalization
34
+ * Dropout regularization
35
+ * ReLU and Softmax activation functions
36
+ * Dataset scaling utilities
37
+ * Train / validation split helpers
38
+
39
+ ---
40
+
41
+ ## Installation
42
+
43
+ Install from PyPI:
44
+
45
+ ```bash
46
+ pip install neuralnetwork-from-scratch
47
+ ```
48
+
49
+ Or install from source:
50
+
51
+ ```bash
52
+ git clone https://github.com/Sendy45/NeuralNetworkFromScratch.git
53
+ cd neuralnetworknumpy
54
+ pip install .
55
+ ```
56
+
57
+ ---
58
+
59
+ ## Example Usage
60
+
61
+ ```python
62
+ import numpy as np
63
+ from keras.datasets import mnist
64
+
65
+ from neuralnetworknumpy import (
66
+ NeuralNetwork,
67
+ Dense,
68
+ ReLu,
69
+ BatchNorm,
70
+ Dropout,
71
+ Softmax
72
+ )
73
+
74
+ # load dataset
75
+ (X_train, y_train), _ = mnist.load_data()
76
+
77
+ # flatten images
78
+ X_train = X_train.reshape(-1, 784) / 255.0
79
+
80
+ model = NeuralNetwork([
81
+ Dense(64, inputs=784),
82
+ ReLu(),
83
+ BatchNorm(),
84
+ Dropout(0.1),
85
+ Dense(10),
86
+ Softmax()
87
+ ])
88
+
89
+ model.compile(
90
+ optimizer="adam",
91
+ loss="categorical_crossentropy"
92
+ )
93
+
94
+ model.fit(X_train, y_train, epochs=10, batch_size=32)
95
+ ```
96
+
97
+ ---
98
+
99
+ ## Project Structure
100
+
101
+ ```
102
+ NeuralNetworkFromScratch
103
+
104
+ ├── neuralnet
105
+ │ ├── __init__.py
106
+ │ ├── network.py
107
+ │ ├── layers.py
108
+ │ ├── activations.py
109
+ │ └── utils.py
110
+
111
+ ├── tests
112
+ ├── README.md
113
+ └── pyproject.toml
114
+ ```
115
+
116
+ ---
117
+
118
+ ## Goals of the Project
119
+
120
+ This project was designed to:
121
+
122
+ * Demonstrate **how neural networks work internally**
123
+ * Provide a **clean NumPy-based implementation**
124
+ * Serve as an **educational resource for learning deep learning fundamentals**
125
+
126
+ Unlike production ML frameworks, this project prioritizes **clarity and learning over performance**.
127
+
128
+ ---
129
+
130
+ ## Dependencies
131
+
132
+ * numpy
133
+ * tqdm
134
+
135
+ Optional dependencies used in examples:
136
+
137
+ * matplotlib
138
+ * keras (for datasets such as MNIST)
139
+
140
+ ---
141
+
142
+ ## License
143
+
144
+ This project is licensed under the MIT License.
145
+
146
+ ---
147
+
148
+ ## Author
149
+
150
+ Created by **Itamar Senderovitz**.
@@ -0,0 +1,8 @@
1
+ neuralnetworknumpy/__init__.py,sha256=XE69QMzt_49nHxfV51oqlWcTYAxln97llc1JId5vVn4,781
2
+ neuralnetworknumpy/layers.py,sha256=QUJACrS62u_oyAX1xfo_92OGWnAxemXthJ-4vRFQBFU,9462
3
+ neuralnetworknumpy/model.py,sha256=uTASLPdXtIFAaXbWGLWul9vHdX0Q8XpwJsxI_S5d0T0,17343
4
+ neuralnetworknumpy/utils.py,sha256=ck9yNw3dymXqGEvXyaexMHaK2lqLF5M34_EkqIrR4n0,2372
5
+ neuralnetworknumpy-0.1.0.dist-info/METADATA,sha256=RAXDzFQ4ZPmV6tSK6P5D0UK9JWEQry6GtMZpZdM2GtA,3263
6
+ neuralnetworknumpy-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
7
+ neuralnetworknumpy-0.1.0.dist-info/top_level.txt,sha256=rAqTiX1ez7pQ-aGPeVCeKekjBXSWxt6t7cEr8OYeXUQ,19
8
+ neuralnetworknumpy-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ neuralnetworknumpy