phitodeep 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ Apache Software License 2.0
2
+
3
+ Copyright (c) 2026, Ralph Dugue
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+
@@ -0,0 +1,82 @@
1
+ Metadata-Version: 2.4
2
+ Name: phitodeep
3
+ Version: 0.1.3
4
+ Summary: Deep learning framework built from scratch with numpy!
5
+ License: Apache-2.0
6
+ License-File: LICENSE
7
+ Author: Ralph Dugue
8
+ Requires-Python: >=3.12,<4.0
9
+ Classifier: License :: OSI Approved :: Apache Software License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Classifier: Programming Language :: Python :: 3.14
14
+ Requires-Dist: numpy (>=2.4.4,<3.0.0)
15
+ Description-Content-Type: text/markdown
16
+
17
+ # phitodeep
18
+
19
+ Deep learning framework built from scratch with numpy!
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ $ pip install phitodeep
25
+ ```
26
+
27
+ ## Usage
28
+ MNIST quickstart:
29
+ ```python
30
+ import numpy as np
31
+ from datasets import load_dataset
32
+
33
+ import phitodeep.loss as loss
34
+ import phitodeep.model as m
35
+
36
+ train_dataset = load_dataset("ylecun/mnist", split="train")
37
+ test_dataset = load_dataset("ylecun/mnist", split="test")
38
+
39
+ X_train = train_dataset["image"]
40
+ y_train = train_dataset["label"]
41
+ X_test = test_dataset["image"]
42
+ y_test = test_dataset["label"]
43
+
44
+ X_train = np.array(X_train).astype(np.float32) / 255.0
45
+ y_train = np.array(y_train)
46
+ X_test = np.array(X_test).astype(np.float32) / 255.0
47
+ y_test = np.array(y_test)
48
+ print(X_train.shape, y_train.shape)
49
+
50
+ model = (
51
+ m.SequentialBuilder()
52
+ .flatten()
53
+ .dense(784, 128)
54
+ .relu()
55
+ .dense(128, 10)
56
+ .softmax()
57
+ .optimizer("adam")
58
+ .loss(loss.CategoricalCrossEntropy())
59
+ .alpha(0.001)
60
+ .epochs(300)
61
+ .batch(32)
62
+ .build()
63
+ )
64
+
65
+ model.summary()
66
+
67
+ model.train(X_train, y_train, X_test, y_test)
68
+
69
+ ```
70
+
71
+ ## Contributing
72
+
73
+ Interested in contributing? Check out the contributing guidelines. Please note that this project is released with a Code of Conduct. By contributing to this project, you agree to abide by its terms.
74
+
75
+ ## License
76
+
77
+ `phitodeep` was created by Ralph Dugue. It is licensed under the terms of the Apache License 2.0 license.
78
+
79
+ ## Credits
80
+
81
+ `phitodeep` was created with [`cookiecutter`](https://cookiecutter.readthedocs.io/en/latest/) and the `py-pkgs-cookiecutter` [template](https://github.com/py-pkgs/py-pkgs-cookiecutter).
82
+
@@ -0,0 +1,65 @@
1
+ # phitodeep
2
+
3
+ Deep learning framework built from scratch with numpy!
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ $ pip install phitodeep
9
+ ```
10
+
11
+ ## Usage
12
+ MNIST quickstart:
13
+ ```python
14
+ import numpy as np
15
+ from datasets import load_dataset
16
+
17
+ import phitodeep.loss as loss
18
+ import phitodeep.model as m
19
+
20
+ train_dataset = load_dataset("ylecun/mnist", split="train")
21
+ test_dataset = load_dataset("ylecun/mnist", split="test")
22
+
23
+ X_train = train_dataset["image"]
24
+ y_train = train_dataset["label"]
25
+ X_test = test_dataset["image"]
26
+ y_test = test_dataset["label"]
27
+
28
+ X_train = np.array(X_train).astype(np.float32) / 255.0
29
+ y_train = np.array(y_train)
30
+ X_test = np.array(X_test).astype(np.float32) / 255.0
31
+ y_test = np.array(y_test)
32
+ print(X_train.shape, y_train.shape)
33
+
34
+ model = (
35
+ m.SequentialBuilder()
36
+ .flatten()
37
+ .dense(784, 128)
38
+ .relu()
39
+ .dense(128, 10)
40
+ .softmax()
41
+ .optimizer("adam")
42
+ .loss(loss.CategoricalCrossEntropy())
43
+ .alpha(0.001)
44
+ .epochs(300)
45
+ .batch(32)
46
+ .build()
47
+ )
48
+
49
+ model.summary()
50
+
51
+ model.train(X_train, y_train, X_test, y_test)
52
+
53
+ ```
54
+
55
+ ## Contributing
56
+
57
+ Interested in contributing? Check out the contributing guidelines. Please note that this project is released with a Code of Conduct. By contributing to this project, you agree to abide by its terms.
58
+
59
+ ## License
60
+
61
+ `phitodeep` was created by Ralph Dugue. It is licensed under the terms of the Apache License 2.0 license.
62
+
63
+ ## Credits
64
+
65
+ `phitodeep` was created with [`cookiecutter`](https://cookiecutter.readthedocs.io/en/latest/) and the `py-pkgs-cookiecutter` [template](https://github.com/py-pkgs/py-pkgs-cookiecutter).
@@ -0,0 +1,27 @@
1
+ [tool.poetry]
2
+ name = "phitodeep"
3
+ version = "0.1.3"
4
+ description = "Deep learning framework built from scratch with numpy!"
5
+ authors = ["Ralph Dugue"]
6
+ license = "Apache License 2.0"
7
+ readme = "README.md"
8
+ packages = [{include = "phitodeep", from = "src"}]
9
+
10
+ [tool.poetry.dependencies]
11
+ python = "^3.12"
12
+ numpy = "^2.4.4"
13
+
14
+
15
+ [build-system]
16
+ requires = ["poetry-core>=1.0.0"]
17
+ build-backend = "poetry.core.masonry.api"
18
+
19
+ [dependency-groups]
20
+ dev = [
21
+ "ipykernel (>=7.2.0,<8.0.0)",
22
+ "myst-nb (>=1.4.0,<2.0.0)",
23
+ "sphinx-autoapi (>=3.8.0,<4.0.0)",
24
+ "sphinx-rtd-theme (>=3.1.0,<4.0.0)",
25
+ "pillow (>=12.2.0,<13.0.0)",
26
+ "datasets (>=4.8.4,<5.0.0)"
27
+ ]
File without changes
File without changes
@@ -0,0 +1,106 @@
1
+ import numpy as np
2
+
3
+ from .base import Layer
4
+
5
+
6
+ class ReLu(Layer):
7
+ def __init__(self) -> None:
8
+ super().__init__("relu")
9
+
10
+ def forward(self, X):
11
+ self.cache["X"] = X
12
+ return np.maximum(0, X)
13
+
14
+ def backward(self, dL_dZ):
15
+ """
16
+ Backpropagate through ReLU activation.
17
+ ReLU derivative: 1 if X > 0, else 0
18
+ """
19
+ X = self.cache["X"]
20
+ dL_dX = dL_dZ * (X > 0).astype(float)
21
+ return dL_dX
22
+
23
+
24
+ class Sigmoid(Layer):
25
+ def __init__(self) -> None:
26
+ super().__init__("sigmoid")
27
+
28
+ def forward(self, X):
29
+ self.cache["X"] = X
30
+ self.cache["Z"] = 1 / (1 + np.exp(-X))
31
+ return self.cache["Z"]
32
+
33
+ def backward(self, dL_dZ):
34
+ """
35
+ Backpropagate through Sigmoid activation.
36
+ Sigmoid derivative: sigmoid(Z) * (1 - sigmoid(Z))
37
+ """
38
+ Z = self.cache["Z"]
39
+ dL_dX = dL_dZ * Z * (1 - Z)
40
+ return dL_dX
41
+
42
+
43
+ class Tanh(Layer):
44
+ def __init__(self) -> None:
45
+ super().__init__("tanh")
46
+
47
+ def forward(self, X):
48
+ self.cache["X"] = X
49
+ e_x = np.exp(X)
50
+ e_neg_x = np.exp(-X)
51
+ self.cache["Z"] = (e_x - e_neg_x) / (e_x + e_neg_x)
52
+ return self.cache["Z"]
53
+
54
+ def backward(self, dL_dZ):
55
+ """
56
+ Backpropagate through Tanh activation.
57
+ Tanh derivative: 1 - tanh(Z)^2
58
+ """
59
+ Z = self.cache["Z"]
60
+ dL_dX = dL_dZ * (1 - Z**2)
61
+ return dL_dX
62
+
63
+
64
+ class Softmax(Layer):
65
+ def __init__(self) -> None:
66
+ super().__init__("softmax")
67
+
68
+ def forward(self, X):
69
+ self.cache["X"] = X
70
+ axis = None if X.ndim < 2 else 1
71
+ max_a = np.max(X, axis=axis, keepdims=True)
72
+
73
+ dividend = np.exp(X - max_a)
74
+ divisor = np.sum(np.exp(X - max_a), axis=axis, keepdims=True)
75
+
76
+ self.cache["Z"] = dividend / divisor
77
+ return self.cache["Z"]
78
+
79
+ def backward(self, dL_dZ):
80
+ """
81
+ Backpropagate through Softmax activation.
82
+ When paired with CategoricalCrossEntropy, the combined gradient
83
+ (y_pred - one_hot(y_true)) / N is computed entirely in the loss,
84
+ so this layer is a straight pass-through.
85
+ """
86
+ return dL_dZ
87
+
88
+
89
+ class ELU(Layer):
90
+ def __init__(self, alpha=1.0) -> None:
91
+ super().__init__("elu")
92
+ self.alpha_activation = alpha
93
+
94
+ def forward(self, X):
95
+ self.cache["X"] = X
96
+ self.cache["Z"] = np.where(X > 0, X, self.alpha_activation * (np.exp(X) - 1))
97
+ return self.cache["Z"]
98
+
99
+ def backward(self, dL_dZ):
100
+ """
101
+ Backpropagate through ELU activation.
102
+ ELU derivative: 1 if X > 0, else alpha * exp(X)
103
+ """
104
+ X = self.cache["X"]
105
+ dL_dX = dL_dZ * np.where(X > 0, 1.0, self.alpha_activation * np.exp(X))
106
+ return dL_dX
@@ -0,0 +1,85 @@
1
+ import numpy as np
2
+
3
+
4
+ class Layer:
5
+ def __init__(self, name) -> None:
6
+ self.name = name
7
+ self.cache = {}
8
+ self.grads = {}
9
+
10
+ def forward(self, X):
11
+ raise NotImplementedError(f"Block '{self.name}' must implement forward method")
12
+
13
+ def backward(self, dL_dZ):
14
+ """
15
+ Backward pass through the block.
16
+
17
+ Args:
18
+ dL_dZ: gradient of loss w.r.t. output of this block
19
+
20
+ Returns:
21
+ dL_dX: gradient of loss w.r.t. input (to pass to previous layer)
22
+ """
23
+ raise NotImplementedError(f"Block '{self.name}' must implement backward method")
24
+
25
+
26
+ class Flatten(Layer):
27
+ def __init__(self):
28
+ super().__init__("flatten")
29
+
30
+ def forward(self, X):
31
+ """
32
+ X: (batch_size, ...) -> (batch_size, ...)
33
+ """
34
+ self.cache["X"] = X
35
+ return X.reshape(X.shape[0], -1)
36
+
37
+ def backward(self, dL_dZ):
38
+ """
39
+ dL_dZ: (batch_size, ...) -> (batch_size, ...)
40
+ """
41
+ X = self.cache["X"]
42
+ return dL_dZ.reshape(X.shape)
43
+
44
+
45
+ class Dense(Layer):
46
+ def __init__(self, input_size, output_size):
47
+ super().__init__("dense")
48
+ self.grads = {}
49
+ self.input_size = input_size
50
+ self.output_size = output_size
51
+ # Initialize weights and biases
52
+ self.W = np.random.randn(input_size, output_size) * np.sqrt(2.0 / input_size)
53
+ self.b = np.zeros(output_size)
54
+
55
+ def forward(self, X):
56
+ """
57
+ X: (batch_size, input_size) -> (batch_size, output_size)
58
+ """
59
+ self.cache["X"] = X
60
+ Z = np.dot(X, self.W) + self.b
61
+ return Z
62
+
63
+ def backward(self, dL_dZ):
64
+ """
65
+ Backpropagate through Dense layer.
66
+
67
+ Args:
68
+ dL_dZ: (batch_size, output_size) - gradient of loss w.r.t. output
69
+
70
+ Returns:
71
+ dL_dX: (batch_size, input_size) - gradient to pass to previous layer
72
+ """
73
+ X = self.cache["X"]
74
+ m = X.shape[0] # batch size
75
+
76
+ # Gradient w.r.t. weights: (1/m) * X^T @ dL_dZ
77
+ self.grads["W"] = np.dot(X.T, dL_dZ) / m
78
+
79
+ # Gradient w.r.t. bias: (1/m) * sum(dL_dZ)
80
+ self.grads["b"] = np.sum(dL_dZ, axis=0) / m
81
+
82
+ # Gradient w.r.t. input: dL_dZ @ W^T
83
+ dL_dX = np.dot(dL_dZ, self.W.T)
84
+
85
+ return dL_dX
@@ -0,0 +1,56 @@
1
+ import numpy as np
2
+
3
+
4
+ class LossBase:
5
+ def __init__(self, name) -> None:
6
+ self.name = name
7
+
8
+ def loss_func(self, y_pred, y_true):
9
+ raise NotImplementedError(f"{self.name} must implement the loss_func method.")
10
+
11
+ def loss_gradient(self, y_pred, y_true):
12
+ raise NotImplementedError(
13
+ f"{self.name} must implement the loss_gradient method."
14
+ )
15
+
16
+
17
+ class MeanSquaredError(LossBase):
18
+ def __init__(self) -> None:
19
+ super().__init__("MeanSquaredError")
20
+
21
+ def loss_func(self, y_pred, y_true):
22
+ return np.mean((y_pred - y_true) ** 2)
23
+
24
+ def loss_gradient(self, y_pred, y_true):
25
+ m = len(y_true)
26
+ return 2 * (y_pred - y_true) / m
27
+
28
+
29
+ class CategoricalCrossEntropy(LossBase):
30
+ def __init__(self) -> None:
31
+ super().__init__("CategoricalCrossEntropy")
32
+
33
+ def loss_func(self, y_pred, y_true):
34
+ N = len(y_true)
35
+ correct = y_pred[np.arange(N), y_true]
36
+ return -np.mean(np.log(correct + 1e-8))
37
+
38
+ def loss_gradient(self, y_pred, y_true):
39
+ # Fused Softmax + CCE gradient: (y_pred - one_hot(y_true)) / N
40
+ N = len(y_true)
41
+ grad = y_pred.copy()
42
+ grad[np.arange(N), y_true] -= 1.0
43
+ return grad / N
44
+
45
+
46
+ class BinaryCrossEntropy(LossBase):
47
+ def __init__(self) -> None:
48
+ super().__init__("BinaryCrossEntropy")
49
+
50
+ def loss_func(self, y_pred, y_true):
51
+ return -np.mean(
52
+ y_true * np.log(y_pred + 1e-8) + (1 - y_true) * np.log(1 - y_pred + 1e-8)
53
+ )
54
+
55
+ def loss_gradient(self, y_pred, y_true):
56
+ return (y_pred - y_true) / (y_pred * (1 - y_pred) + 1e-8)
@@ -0,0 +1,213 @@
1
+ from . import loss as ls
2
+ from . import optimization
3
+ from .layers import activation as a
4
+ from .layers import base as b
5
+
6
+
7
+ class Sequential:
8
+ def __init__(
9
+ self,
10
+ *layers,
11
+ alpha=0.01,
12
+ optimizer="adam",
13
+ batch_size=1,
14
+ epochs=1000,
15
+ loss_class=ls.MeanSquaredError(),
16
+ ) -> None:
17
+ """
18
+ Initialize with variable number of layers.
19
+
20
+ Usage:
21
+ model = Sequential(
22
+ b.Dense(256, 128),
23
+ a.ReLu(),
24
+ b.Dense(128, 1),
25
+ a.Sigmoid()
26
+ )
27
+ """
28
+ self.layers = list(layers)
29
+ self.alpha = alpha
30
+ self.optimizer = optimizer
31
+ self.batch_size = batch_size
32
+ self.epochs = epochs
33
+ self.loss_class = loss_class
34
+
35
+ def add(self, layer) -> None:
36
+ """Add a layer to the network."""
37
+ self.layers.append(layer)
38
+
39
+ def setoptimizer(self, name):
40
+ self.optimizer = name
41
+
42
+ def setbatchsize(self, num):
43
+ self.batch_size = num
44
+
45
+ def setloss(self, loss_class):
46
+ self.loss_class = loss_class
47
+
48
+ def train(self, X, y, X_test, y_test):
49
+ match self.optimizer:
50
+ case "sgd":
51
+ optimizer = optimization.SGD(alpha=self.alpha)
52
+ case "adam":
53
+ optimizer = optimization.Adam(alpha=self.alpha)
54
+ case _:
55
+ raise ValueError(f"{self.optimizer} is not a valid optimizer.")
56
+
57
+ losses = optimization.train_loop(
58
+ model=self,
59
+ X=X,
60
+ y=y,
61
+ X_test=X_test,
62
+ y_test=y_test,
63
+ optimizer=optimizer,
64
+ loss_class=self.loss_class,
65
+ batch_size=self.batch_size,
66
+ epochs=self.epochs,
67
+ )
68
+
69
+ print("Training complete.")
70
+ print("-" * 60)
71
+ print(f"Starting Training Loss: {losses[0][0]:.4f} | Starting Test Loss: {losses[0][1]:.4f}")
72
+ print(f"Final Training Loss: {losses[-1][0]:.4f} | Final Test Loss: {losses[-1][1]:.4f}")
73
+ print(f"Training Loss Improvement: {losses[0][0] - losses[-1][0]:.4f} | Test Loss Improvement: {losses[0][1] - losses[-1][1]:.4f}")
74
+ print("-" * 60)
75
+ return losses
76
+
77
+ def predict(self, X):
78
+ """
79
+ Forward pass through all layers.
80
+
81
+ Args:
82
+ X: input array
83
+
84
+ Returns:
85
+ output after passing through all layers
86
+ """
87
+ output = X
88
+ for layer in self.layers:
89
+ output = layer.forward(output)
90
+ return output
91
+
92
+ def backward(self, gradient):
93
+ """
94
+ Backward pass through all layers.
95
+
96
+ Args:
97
+ gradient: dL/dY from loss function (shape: batch_size x output_size)
98
+
99
+ Propagates gradient backwards through all layers in reverse order.
100
+ Each layer computes its parameter gradients, updates parameters,
101
+ and returns the gradient for the previous layer.
102
+ """
103
+ # Start with gradient from loss and propagate backwards
104
+ current_gradient = gradient
105
+
106
+ # Iterate through layers in reverse order
107
+ for layer in reversed(self.layers):
108
+ # Pass gradient through layer and get gradient for previous layer
109
+ current_gradient = layer.backward(current_gradient)
110
+
111
+ def __call__(self, X):
112
+ """Allow model(X) syntax."""
113
+ return self.predict(X)
114
+
115
+ def summary(self):
116
+ """Print model architecture."""
117
+ print("Model Summary:")
118
+ print("-" * 60)
119
+ print(
120
+ f"Optimizer: {self.optimizer} | Learning Rate: {self.alpha} | Batch Size: {self.batch_size} \nEpochs: {self.epochs} | Loss: {self.loss_class.name}"
121
+ )
122
+ print("-" * 60)
123
+ for i, layer in enumerate(self.layers):
124
+ if isinstance(layer, b.Dense):
125
+ print(
126
+ f"Layer {i}: {layer.name.upper():<10} | Input: {layer.input_size:<5} Output: {layer.output_size:<5}"
127
+ )
128
+ else:
129
+ print(f"Layer {i}: {layer.name.upper():<10}")
130
+ print("-" * 60)
131
+
132
+
133
+ class SequentialBuilder:
134
+ """Fluent API for building Sequential models."""
135
+
136
+ def __init__(self):
137
+ self.layers = []
138
+ self.alpha_value = 1
139
+ self.optimizer_name = "sgd"
140
+ self.batch_size = 1
141
+ self.epochs_value = 1000
142
+ self.loss_class = ls.MeanSquaredError()
143
+
144
+ def flatten(self):
145
+ """Add a Flatten layer."""
146
+ self.layers.append(b.Flatten())
147
+ return self
148
+
149
+ def dense(self, input_size, output_size):
150
+ """Add a Dense layer."""
151
+ self.layers.append(b.Dense(input_size, output_size))
152
+ return self
153
+
154
+ def relu(self):
155
+ """Add a ReLU activation."""
156
+ self.layers.append(a.ReLu())
157
+ return self
158
+
159
+ def sigmoid(self):
160
+ """Add a Sigmoid activation."""
161
+ self.layers.append(a.Sigmoid())
162
+ return self
163
+
164
+ def tanh(self):
165
+ """Add a Tanh activation."""
166
+ self.layers.append(a.Tanh())
167
+ return self
168
+
169
+ def softmax(self):
170
+ """Add a Softmax activation."""
171
+ self.layers.append(a.Softmax())
172
+ return self
173
+
174
+ def elu(self, alpha_activation=1.0):
175
+ """Add an ELU activation."""
176
+ self.layers.append(a.ELU(alpha_activation))
177
+ return self
178
+
179
+ def optimizer(self, name):
180
+ """Set the optimizer."""
181
+ self.optimizer_name = name
182
+ return self
183
+
184
+ def batch(self, num):
185
+ """Set the batch size."""
186
+ self.batch_size = num
187
+ return self
188
+
189
+ def alpha(self, num):
190
+ """Set the learning rate."""
191
+ self.alpha_value = num
192
+ return self
193
+
194
+ def epochs(self, num):
195
+ """Set the number of epochs."""
196
+ self.epochs_value = num
197
+ return self
198
+
199
+ def loss(self, loss_class):
200
+ """Set the loss function."""
201
+ self.loss_class = loss_class
202
+ return self
203
+
204
+ def build(self):
205
+ """Build and return the Sequential model."""
206
+ return Sequential(
207
+ *self.layers,
208
+ alpha=self.alpha_value,
209
+ optimizer=self.optimizer_name,
210
+ batch_size=self.batch_size,
211
+ epochs=self.epochs_value,
212
+ loss_class=self.loss_class,
213
+ )
@@ -0,0 +1,84 @@
1
+ import numpy as np
2
+
3
+
4
+ class Optimizer:
5
+ def step(self, layers):
6
+ raise NotImplementedError
7
+
8
+
9
+ class SGD(Optimizer):
10
+ def __init__(self, alpha=0.01):
11
+ self.alpha = alpha
12
+
13
+ def step(self, layers):
14
+ for layer in layers:
15
+ if layer.grads:
16
+ layer.W -= self.alpha * layer.grads["W"]
17
+ layer.b -= self.alpha * layer.grads["b"]
18
+
19
+
20
+ class Adam(Optimizer):
21
+ def __init__(self, alpha=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8):
22
+ self.alpha = alpha
23
+ self.beta1 = beta1
24
+ self.beta2 = beta2
25
+ self.epsilon = epsilon
26
+ self.t = 0
27
+ self.m = {}
28
+ self.v = {}
29
+
30
+ def step(self, layers):
31
+ self.t += 1
32
+ for layer in layers:
33
+ if layer.grads:
34
+ for param_name, g in layer.grads.items():
35
+ key = (id(layer), param_name)
36
+
37
+ if key not in self.m:
38
+ self.m[key] = np.zeros_like(g)
39
+ self.v[key] = np.zeros_like(g)
40
+
41
+ self.m[key] = self.beta1 * self.m[key] + (1 - self.beta1) * g
42
+ self.v[key] = self.beta2 * self.v[key] + (1 - self.beta2) * g**2
43
+
44
+ m_hat = self.m[key] / (1 - self.beta1**self.t)
45
+ v_hat = self.v[key] / (1 - self.beta2**self.t)
46
+
47
+ param = getattr(layer, param_name)
48
+ param -= self.alpha * m_hat / (np.sqrt(v_hat) + self.epsilon)
49
+
50
+
51
+ def train_loop(
52
+ model, X, y, X_test, y_test, loss_class, optimizer, epochs=1000, batch_size=1
53
+ ):
54
+ losses = []
55
+
56
+ for epoch in range(epochs):
57
+ for _ in range(len(X) // batch_size):
58
+ indices = np.random.randint(0, len(X), batch_size)
59
+ X_batch = X[indices]
60
+ y_batch = y[indices]
61
+
62
+ # Forward pass
63
+ y_pred = model.predict(X_batch)
64
+ loss = loss_class.loss_func(y_pred, y_batch)
65
+
66
+ # Compute gradient of loss w.r.t. predictions (dy)
67
+ dy = loss_class.loss_gradient(y_pred, y_batch)
68
+
69
+ # Backward pass with gradient of loss
70
+ model.backward(dy)
71
+ optimizer.step(model.layers)
72
+
73
+ y_pred = model.predict(X)
74
+ loss = loss_class.loss_func(y_pred, y)
75
+
76
+ y_pred_test = model.predict(X_test)
77
+ loss_test = loss_class.loss_func(y_pred_test, y_test)
78
+
79
+ losses.append((loss, loss_test))
80
+
81
+ if epoch % 10 == 0:
82
+ print(f"Epoch {epoch}, Loss: {loss:.4f}, Test Loss: {loss_test:.4f}")
83
+
84
+ return losses