phitodeep 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phitodeep-0.1.3/LICENSE +16 -0
- phitodeep-0.1.3/PKG-INFO +82 -0
- phitodeep-0.1.3/README.md +65 -0
- phitodeep-0.1.3/pyproject.toml +27 -0
- phitodeep-0.1.3/src/phitodeep/__init__.py +0 -0
- phitodeep-0.1.3/src/phitodeep/layers/__init__.py +0 -0
- phitodeep-0.1.3/src/phitodeep/layers/activation.py +106 -0
- phitodeep-0.1.3/src/phitodeep/layers/base.py +85 -0
- phitodeep-0.1.3/src/phitodeep/loss.py +56 -0
- phitodeep-0.1.3/src/phitodeep/model.py +213 -0
- phitodeep-0.1.3/src/phitodeep/optimization.py +84 -0
phitodeep-0.1.3/LICENSE
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Apache Software License 2.0
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026, Ralph Dugue
|
|
4
|
+
|
|
5
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
you may not use this file except in compliance with the License.
|
|
7
|
+
You may obtain a copy of the License at
|
|
8
|
+
|
|
9
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
|
|
11
|
+
Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
See the License for the specific language governing permissions and
|
|
15
|
+
limitations under the License.
|
|
16
|
+
|
phitodeep-0.1.3/PKG-INFO
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: phitodeep
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: Deep learning framework built from scratch with numpy!
|
|
5
|
+
License: Apache-2.0
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Author: Ralph Dugue
|
|
8
|
+
Requires-Python: >=3.12,<4.0
|
|
9
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
14
|
+
Requires-Dist: numpy (>=2.4.4,<3.0.0)
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
# phitodeep
|
|
18
|
+
|
|
19
|
+
Deep learning framework built from scratch with numpy!
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
$ pip install phitodeep
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Usage
|
|
28
|
+
MNIST quickstart:
|
|
29
|
+
```python
|
|
30
|
+
import numpy as np
|
|
31
|
+
from datasets import load_dataset
|
|
32
|
+
|
|
33
|
+
import phitodeep.loss as loss
|
|
34
|
+
import phitodeep.model as m
|
|
35
|
+
|
|
36
|
+
train_dataset = load_dataset("ylecun/mnist", split="train")
|
|
37
|
+
test_dataset = load_dataset("ylecun/mnist", split="test")
|
|
38
|
+
|
|
39
|
+
X_train = train_dataset["image"]
|
|
40
|
+
y_train = train_dataset["label"]
|
|
41
|
+
X_test = test_dataset["image"]
|
|
42
|
+
y_test = test_dataset["label"]
|
|
43
|
+
|
|
44
|
+
X_train = np.array(X_train).astype(np.float32) / 255.0
|
|
45
|
+
y_train = np.array(y_train)
|
|
46
|
+
X_test = np.array(X_test).astype(np.float32) / 255.0
|
|
47
|
+
y_test = np.array(y_test)
|
|
48
|
+
print(X_train.shape, y_train.shape)
|
|
49
|
+
|
|
50
|
+
model = (
|
|
51
|
+
m.SequentialBuilder()
|
|
52
|
+
.flatten()
|
|
53
|
+
.dense(784, 128)
|
|
54
|
+
.relu()
|
|
55
|
+
.dense(128, 10)
|
|
56
|
+
.softmax()
|
|
57
|
+
.optimizer("adam")
|
|
58
|
+
.loss(loss.CategoricalCrossEntropy())
|
|
59
|
+
.alpha(0.001)
|
|
60
|
+
.epochs(300)
|
|
61
|
+
.batch(32)
|
|
62
|
+
.build()
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
model.summary()
|
|
66
|
+
|
|
67
|
+
model.train(X_train, y_train, X_test, y_test)
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Contributing
|
|
72
|
+
|
|
73
|
+
Interested in contributing? Check out the contributing guidelines. Please note that this project is released with a Code of Conduct. By contributing to this project, you agree to abide by its terms.
|
|
74
|
+
|
|
75
|
+
## License
|
|
76
|
+
|
|
77
|
+
`phitodeep` was created by Ralph Dugue. It is licensed under the terms of the Apache License 2.0 license.
|
|
78
|
+
|
|
79
|
+
## Credits
|
|
80
|
+
|
|
81
|
+
`phitodeep` was created with [`cookiecutter`](https://cookiecutter.readthedocs.io/en/latest/) and the `py-pkgs-cookiecutter` [template](https://github.com/py-pkgs/py-pkgs-cookiecutter).
|
|
82
|
+
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# phitodeep
|
|
2
|
+
|
|
3
|
+
Deep learning framework built from scratch with numpy!
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
$ pip install phitodeep
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
MNIST quickstart:
|
|
13
|
+
```python
|
|
14
|
+
import numpy as np
|
|
15
|
+
from datasets import load_dataset
|
|
16
|
+
|
|
17
|
+
import phitodeep.loss as loss
|
|
18
|
+
import phitodeep.model as m
|
|
19
|
+
|
|
20
|
+
train_dataset = load_dataset("ylecun/mnist", split="train")
|
|
21
|
+
test_dataset = load_dataset("ylecun/mnist", split="test")
|
|
22
|
+
|
|
23
|
+
X_train = train_dataset["image"]
|
|
24
|
+
y_train = train_dataset["label"]
|
|
25
|
+
X_test = test_dataset["image"]
|
|
26
|
+
y_test = test_dataset["label"]
|
|
27
|
+
|
|
28
|
+
X_train = np.array(X_train).astype(np.float32) / 255.0
|
|
29
|
+
y_train = np.array(y_train)
|
|
30
|
+
X_test = np.array(X_test).astype(np.float32) / 255.0
|
|
31
|
+
y_test = np.array(y_test)
|
|
32
|
+
print(X_train.shape, y_train.shape)
|
|
33
|
+
|
|
34
|
+
model = (
|
|
35
|
+
m.SequentialBuilder()
|
|
36
|
+
.flatten()
|
|
37
|
+
.dense(784, 128)
|
|
38
|
+
.relu()
|
|
39
|
+
.dense(128, 10)
|
|
40
|
+
.softmax()
|
|
41
|
+
.optimizer("adam")
|
|
42
|
+
.loss(loss.CategoricalCrossEntropy())
|
|
43
|
+
.alpha(0.001)
|
|
44
|
+
.epochs(300)
|
|
45
|
+
.batch(32)
|
|
46
|
+
.build()
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
model.summary()
|
|
50
|
+
|
|
51
|
+
model.train(X_train, y_train, X_test, y_test)
|
|
52
|
+
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Contributing
|
|
56
|
+
|
|
57
|
+
Interested in contributing? Check out the contributing guidelines. Please note that this project is released with a Code of Conduct. By contributing to this project, you agree to abide by its terms.
|
|
58
|
+
|
|
59
|
+
## License
|
|
60
|
+
|
|
61
|
+
`phitodeep` was created by Ralph Dugue. It is licensed under the terms of the Apache License 2.0 license.
|
|
62
|
+
|
|
63
|
+
## Credits
|
|
64
|
+
|
|
65
|
+
`phitodeep` was created with [`cookiecutter`](https://cookiecutter.readthedocs.io/en/latest/) and the `py-pkgs-cookiecutter` [template](https://github.com/py-pkgs/py-pkgs-cookiecutter).
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "phitodeep"
|
|
3
|
+
version = "0.1.3"
|
|
4
|
+
description = "Deep learning framework built from scratch with numpy!"
|
|
5
|
+
authors = ["Ralph Dugue"]
|
|
6
|
+
license = "Apache License 2.0"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
packages = [{include = "phitodeep", from = "src"}]
|
|
9
|
+
|
|
10
|
+
[tool.poetry.dependencies]
|
|
11
|
+
python = "^3.12"
|
|
12
|
+
numpy = "^2.4.4"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
[build-system]
|
|
16
|
+
requires = ["poetry-core>=1.0.0"]
|
|
17
|
+
build-backend = "poetry.core.masonry.api"
|
|
18
|
+
|
|
19
|
+
[dependency-groups]
|
|
20
|
+
dev = [
|
|
21
|
+
"ipykernel (>=7.2.0,<8.0.0)",
|
|
22
|
+
"myst-nb (>=1.4.0,<2.0.0)",
|
|
23
|
+
"sphinx-autoapi (>=3.8.0,<4.0.0)",
|
|
24
|
+
"sphinx-rtd-theme (>=3.1.0,<4.0.0)",
|
|
25
|
+
"pillow (>=12.2.0,<13.0.0)",
|
|
26
|
+
"datasets (>=4.8.4,<5.0.0)"
|
|
27
|
+
]
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from .base import Layer
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ReLu(Layer):
|
|
7
|
+
def __init__(self) -> None:
|
|
8
|
+
super().__init__("relu")
|
|
9
|
+
|
|
10
|
+
def forward(self, X):
|
|
11
|
+
self.cache["X"] = X
|
|
12
|
+
return np.maximum(0, X)
|
|
13
|
+
|
|
14
|
+
def backward(self, dL_dZ):
|
|
15
|
+
"""
|
|
16
|
+
Backpropagate through ReLU activation.
|
|
17
|
+
ReLU derivative: 1 if X > 0, else 0
|
|
18
|
+
"""
|
|
19
|
+
X = self.cache["X"]
|
|
20
|
+
dL_dX = dL_dZ * (X > 0).astype(float)
|
|
21
|
+
return dL_dX
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Sigmoid(Layer):
|
|
25
|
+
def __init__(self) -> None:
|
|
26
|
+
super().__init__("sigmoid")
|
|
27
|
+
|
|
28
|
+
def forward(self, X):
|
|
29
|
+
self.cache["X"] = X
|
|
30
|
+
self.cache["Z"] = 1 / (1 + np.exp(-X))
|
|
31
|
+
return self.cache["Z"]
|
|
32
|
+
|
|
33
|
+
def backward(self, dL_dZ):
|
|
34
|
+
"""
|
|
35
|
+
Backpropagate through Sigmoid activation.
|
|
36
|
+
Sigmoid derivative: sigmoid(Z) * (1 - sigmoid(Z))
|
|
37
|
+
"""
|
|
38
|
+
Z = self.cache["Z"]
|
|
39
|
+
dL_dX = dL_dZ * Z * (1 - Z)
|
|
40
|
+
return dL_dX
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class Tanh(Layer):
|
|
44
|
+
def __init__(self) -> None:
|
|
45
|
+
super().__init__("tanh")
|
|
46
|
+
|
|
47
|
+
def forward(self, X):
|
|
48
|
+
self.cache["X"] = X
|
|
49
|
+
e_x = np.exp(X)
|
|
50
|
+
e_neg_x = np.exp(-X)
|
|
51
|
+
self.cache["Z"] = (e_x - e_neg_x) / (e_x + e_neg_x)
|
|
52
|
+
return self.cache["Z"]
|
|
53
|
+
|
|
54
|
+
def backward(self, dL_dZ):
|
|
55
|
+
"""
|
|
56
|
+
Backpropagate through Tanh activation.
|
|
57
|
+
Tanh derivative: 1 - tanh(Z)^2
|
|
58
|
+
"""
|
|
59
|
+
Z = self.cache["Z"]
|
|
60
|
+
dL_dX = dL_dZ * (1 - Z**2)
|
|
61
|
+
return dL_dX
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class Softmax(Layer):
|
|
65
|
+
def __init__(self) -> None:
|
|
66
|
+
super().__init__("softmax")
|
|
67
|
+
|
|
68
|
+
def forward(self, X):
|
|
69
|
+
self.cache["X"] = X
|
|
70
|
+
axis = None if X.ndim < 2 else 1
|
|
71
|
+
max_a = np.max(X, axis=axis, keepdims=True)
|
|
72
|
+
|
|
73
|
+
dividend = np.exp(X - max_a)
|
|
74
|
+
divisor = np.sum(np.exp(X - max_a), axis=axis, keepdims=True)
|
|
75
|
+
|
|
76
|
+
self.cache["Z"] = dividend / divisor
|
|
77
|
+
return self.cache["Z"]
|
|
78
|
+
|
|
79
|
+
def backward(self, dL_dZ):
|
|
80
|
+
"""
|
|
81
|
+
Backpropagate through Softmax activation.
|
|
82
|
+
When paired with CategoricalCrossEntropy, the combined gradient
|
|
83
|
+
(y_pred - one_hot(y_true)) / N is computed entirely in the loss,
|
|
84
|
+
so this layer is a straight pass-through.
|
|
85
|
+
"""
|
|
86
|
+
return dL_dZ
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class ELU(Layer):
|
|
90
|
+
def __init__(self, alpha=1.0) -> None:
|
|
91
|
+
super().__init__("elu")
|
|
92
|
+
self.alpha_activation = alpha
|
|
93
|
+
|
|
94
|
+
def forward(self, X):
|
|
95
|
+
self.cache["X"] = X
|
|
96
|
+
self.cache["Z"] = np.where(X > 0, X, self.alpha_activation * (np.exp(X) - 1))
|
|
97
|
+
return self.cache["Z"]
|
|
98
|
+
|
|
99
|
+
def backward(self, dL_dZ):
|
|
100
|
+
"""
|
|
101
|
+
Backpropagate through ELU activation.
|
|
102
|
+
ELU derivative: 1 if X > 0, else alpha * exp(X)
|
|
103
|
+
"""
|
|
104
|
+
X = self.cache["X"]
|
|
105
|
+
dL_dX = dL_dZ * np.where(X > 0, 1.0, self.alpha_activation * np.exp(X))
|
|
106
|
+
return dL_dX
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Layer:
|
|
5
|
+
def __init__(self, name) -> None:
|
|
6
|
+
self.name = name
|
|
7
|
+
self.cache = {}
|
|
8
|
+
self.grads = {}
|
|
9
|
+
|
|
10
|
+
def forward(self, X):
|
|
11
|
+
raise NotImplementedError(f"Block '{self.name}' must implement forward method")
|
|
12
|
+
|
|
13
|
+
def backward(self, dL_dZ):
|
|
14
|
+
"""
|
|
15
|
+
Backward pass through the block.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
dL_dZ: gradient of loss w.r.t. output of this block
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
dL_dX: gradient of loss w.r.t. input (to pass to previous layer)
|
|
22
|
+
"""
|
|
23
|
+
raise NotImplementedError(f"Block '{self.name}' must implement backward method")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Flatten(Layer):
|
|
27
|
+
def __init__(self):
|
|
28
|
+
super().__init__("flatten")
|
|
29
|
+
|
|
30
|
+
def forward(self, X):
|
|
31
|
+
"""
|
|
32
|
+
X: (batch_size, ...) -> (batch_size, ...)
|
|
33
|
+
"""
|
|
34
|
+
self.cache["X"] = X
|
|
35
|
+
return X.reshape(X.shape[0], -1)
|
|
36
|
+
|
|
37
|
+
def backward(self, dL_dZ):
|
|
38
|
+
"""
|
|
39
|
+
dL_dZ: (batch_size, ...) -> (batch_size, ...)
|
|
40
|
+
"""
|
|
41
|
+
X = self.cache["X"]
|
|
42
|
+
return dL_dZ.reshape(X.shape)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class Dense(Layer):
|
|
46
|
+
def __init__(self, input_size, output_size):
|
|
47
|
+
super().__init__("dense")
|
|
48
|
+
self.grads = {}
|
|
49
|
+
self.input_size = input_size
|
|
50
|
+
self.output_size = output_size
|
|
51
|
+
# Initialize weights and biases
|
|
52
|
+
self.W = np.random.randn(input_size, output_size) * np.sqrt(2.0 / input_size)
|
|
53
|
+
self.b = np.zeros(output_size)
|
|
54
|
+
|
|
55
|
+
def forward(self, X):
|
|
56
|
+
"""
|
|
57
|
+
X: (batch_size, input_size) -> (batch_size, output_size)
|
|
58
|
+
"""
|
|
59
|
+
self.cache["X"] = X
|
|
60
|
+
Z = np.dot(X, self.W) + self.b
|
|
61
|
+
return Z
|
|
62
|
+
|
|
63
|
+
def backward(self, dL_dZ):
|
|
64
|
+
"""
|
|
65
|
+
Backpropagate through Dense layer.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
dL_dZ: (batch_size, output_size) - gradient of loss w.r.t. output
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
dL_dX: (batch_size, input_size) - gradient to pass to previous layer
|
|
72
|
+
"""
|
|
73
|
+
X = self.cache["X"]
|
|
74
|
+
m = X.shape[0] # batch size
|
|
75
|
+
|
|
76
|
+
# Gradient w.r.t. weights: (1/m) * X^T @ dL_dZ
|
|
77
|
+
self.grads["W"] = np.dot(X.T, dL_dZ) / m
|
|
78
|
+
|
|
79
|
+
# Gradient w.r.t. bias: (1/m) * sum(dL_dZ)
|
|
80
|
+
self.grads["b"] = np.sum(dL_dZ, axis=0) / m
|
|
81
|
+
|
|
82
|
+
# Gradient w.r.t. input: dL_dZ @ W^T
|
|
83
|
+
dL_dX = np.dot(dL_dZ, self.W.T)
|
|
84
|
+
|
|
85
|
+
return dL_dX
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class LossBase:
|
|
5
|
+
def __init__(self, name) -> None:
|
|
6
|
+
self.name = name
|
|
7
|
+
|
|
8
|
+
def loss_func(self, y_pred, y_true):
|
|
9
|
+
raise NotImplementedError(f"{self.name} must implement the loss_func method.")
|
|
10
|
+
|
|
11
|
+
def loss_gradient(self, y_pred, y_true):
|
|
12
|
+
raise NotImplementedError(
|
|
13
|
+
f"{self.name} must implement the loss_gradient method."
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MeanSquaredError(LossBase):
|
|
18
|
+
def __init__(self) -> None:
|
|
19
|
+
super().__init__("MeanSquaredError")
|
|
20
|
+
|
|
21
|
+
def loss_func(self, y_pred, y_true):
|
|
22
|
+
return np.mean((y_pred - y_true) ** 2)
|
|
23
|
+
|
|
24
|
+
def loss_gradient(self, y_pred, y_true):
|
|
25
|
+
m = len(y_true)
|
|
26
|
+
return 2 * (y_pred - y_true) / m
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class CategoricalCrossEntropy(LossBase):
|
|
30
|
+
def __init__(self) -> None:
|
|
31
|
+
super().__init__("CategoricalCrossEntropy")
|
|
32
|
+
|
|
33
|
+
def loss_func(self, y_pred, y_true):
|
|
34
|
+
N = len(y_true)
|
|
35
|
+
correct = y_pred[np.arange(N), y_true]
|
|
36
|
+
return -np.mean(np.log(correct + 1e-8))
|
|
37
|
+
|
|
38
|
+
def loss_gradient(self, y_pred, y_true):
|
|
39
|
+
# Fused Softmax + CCE gradient: (y_pred - one_hot(y_true)) / N
|
|
40
|
+
N = len(y_true)
|
|
41
|
+
grad = y_pred.copy()
|
|
42
|
+
grad[np.arange(N), y_true] -= 1.0
|
|
43
|
+
return grad / N
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class BinaryCrossEntropy(LossBase):
|
|
47
|
+
def __init__(self) -> None:
|
|
48
|
+
super().__init__("BinaryCrossEntropy")
|
|
49
|
+
|
|
50
|
+
def loss_func(self, y_pred, y_true):
|
|
51
|
+
return -np.mean(
|
|
52
|
+
y_true * np.log(y_pred + 1e-8) + (1 - y_true) * np.log(1 - y_pred + 1e-8)
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def loss_gradient(self, y_pred, y_true):
|
|
56
|
+
return (y_pred - y_true) / (y_pred * (1 - y_pred) + 1e-8)
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
from . import loss as ls
|
|
2
|
+
from . import optimization
|
|
3
|
+
from .layers import activation as a
|
|
4
|
+
from .layers import base as b
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Sequential:
|
|
8
|
+
def __init__(
|
|
9
|
+
self,
|
|
10
|
+
*layers,
|
|
11
|
+
alpha=0.01,
|
|
12
|
+
optimizer="adam",
|
|
13
|
+
batch_size=1,
|
|
14
|
+
epochs=1000,
|
|
15
|
+
loss_class=ls.MeanSquaredError(),
|
|
16
|
+
) -> None:
|
|
17
|
+
"""
|
|
18
|
+
Initialize with variable number of layers.
|
|
19
|
+
|
|
20
|
+
Usage:
|
|
21
|
+
model = Sequential(
|
|
22
|
+
b.Dense(256, 128),
|
|
23
|
+
a.ReLu(),
|
|
24
|
+
b.Dense(128, 1),
|
|
25
|
+
a.Sigmoid()
|
|
26
|
+
)
|
|
27
|
+
"""
|
|
28
|
+
self.layers = list(layers)
|
|
29
|
+
self.alpha = alpha
|
|
30
|
+
self.optimizer = optimizer
|
|
31
|
+
self.batch_size = batch_size
|
|
32
|
+
self.epochs = epochs
|
|
33
|
+
self.loss_class = loss_class
|
|
34
|
+
|
|
35
|
+
def add(self, layer) -> None:
|
|
36
|
+
"""Add a layer to the network."""
|
|
37
|
+
self.layers.append(layer)
|
|
38
|
+
|
|
39
|
+
def setoptimizer(self, name):
|
|
40
|
+
self.optimizer = name
|
|
41
|
+
|
|
42
|
+
def setbatchsize(self, num):
|
|
43
|
+
self.batch_size = num
|
|
44
|
+
|
|
45
|
+
def setloss(self, loss_class):
|
|
46
|
+
self.loss_class = loss_class
|
|
47
|
+
|
|
48
|
+
def train(self, X, y, X_test, y_test):
|
|
49
|
+
match self.optimizer:
|
|
50
|
+
case "sgd":
|
|
51
|
+
optimizer = optimization.SGD(alpha=self.alpha)
|
|
52
|
+
case "adam":
|
|
53
|
+
optimizer = optimization.Adam(alpha=self.alpha)
|
|
54
|
+
case _:
|
|
55
|
+
raise ValueError(f"{self.optimizer} is not a valid optimizer.")
|
|
56
|
+
|
|
57
|
+
losses = optimization.train_loop(
|
|
58
|
+
model=self,
|
|
59
|
+
X=X,
|
|
60
|
+
y=y,
|
|
61
|
+
X_test=X_test,
|
|
62
|
+
y_test=y_test,
|
|
63
|
+
optimizer=optimizer,
|
|
64
|
+
loss_class=self.loss_class,
|
|
65
|
+
batch_size=self.batch_size,
|
|
66
|
+
epochs=self.epochs,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
print("Training complete.")
|
|
70
|
+
print("-" * 60)
|
|
71
|
+
print(f"Starting Training Loss: {losses[0][0]:.4f} | Starting Test Loss: {losses[0][1]:.4f}")
|
|
72
|
+
print(f"Final Training Loss: {losses[-1][0]:.4f} | Final Test Loss: {losses[-1][1]:.4f}")
|
|
73
|
+
print(f"Training Loss Improvement: {losses[0][0] - losses[-1][0]:.4f} | Test Loss Improvement: {losses[0][1] - losses[-1][1]:.4f}")
|
|
74
|
+
print("-" * 60)
|
|
75
|
+
return losses
|
|
76
|
+
|
|
77
|
+
def predict(self, X):
|
|
78
|
+
"""
|
|
79
|
+
Forward pass through all layers.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
X: input array
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
output after passing through all layers
|
|
86
|
+
"""
|
|
87
|
+
output = X
|
|
88
|
+
for layer in self.layers:
|
|
89
|
+
output = layer.forward(output)
|
|
90
|
+
return output
|
|
91
|
+
|
|
92
|
+
def backward(self, gradient):
|
|
93
|
+
"""
|
|
94
|
+
Backward pass through all layers.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
gradient: dL/dY from loss function (shape: batch_size x output_size)
|
|
98
|
+
|
|
99
|
+
Propagates gradient backwards through all layers in reverse order.
|
|
100
|
+
Each layer computes its parameter gradients, updates parameters,
|
|
101
|
+
and returns the gradient for the previous layer.
|
|
102
|
+
"""
|
|
103
|
+
# Start with gradient from loss and propagate backwards
|
|
104
|
+
current_gradient = gradient
|
|
105
|
+
|
|
106
|
+
# Iterate through layers in reverse order
|
|
107
|
+
for layer in reversed(self.layers):
|
|
108
|
+
# Pass gradient through layer and get gradient for previous layer
|
|
109
|
+
current_gradient = layer.backward(current_gradient)
|
|
110
|
+
|
|
111
|
+
def __call__(self, X):
|
|
112
|
+
"""Allow model(X) syntax."""
|
|
113
|
+
return self.predict(X)
|
|
114
|
+
|
|
115
|
+
def summary(self):
|
|
116
|
+
"""Print model architecture."""
|
|
117
|
+
print("Model Summary:")
|
|
118
|
+
print("-" * 60)
|
|
119
|
+
print(
|
|
120
|
+
f"Optimizer: {self.optimizer} | Learning Rate: {self.alpha} | Batch Size: {self.batch_size} \nEpochs: {self.epochs} | Loss: {self.loss_class.name}"
|
|
121
|
+
)
|
|
122
|
+
print("-" * 60)
|
|
123
|
+
for i, layer in enumerate(self.layers):
|
|
124
|
+
if isinstance(layer, b.Dense):
|
|
125
|
+
print(
|
|
126
|
+
f"Layer {i}: {layer.name.upper():<10} | Input: {layer.input_size:<5} Output: {layer.output_size:<5}"
|
|
127
|
+
)
|
|
128
|
+
else:
|
|
129
|
+
print(f"Layer {i}: {layer.name.upper():<10}")
|
|
130
|
+
print("-" * 60)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class SequentialBuilder:
|
|
134
|
+
"""Fluent API for building Sequential models."""
|
|
135
|
+
|
|
136
|
+
def __init__(self):
|
|
137
|
+
self.layers = []
|
|
138
|
+
self.alpha_value = 1
|
|
139
|
+
self.optimizer_name = "sgd"
|
|
140
|
+
self.batch_size = 1
|
|
141
|
+
self.epochs_value = 1000
|
|
142
|
+
self.loss_class = ls.MeanSquaredError()
|
|
143
|
+
|
|
144
|
+
def flatten(self):
|
|
145
|
+
"""Add a Flatten layer."""
|
|
146
|
+
self.layers.append(b.Flatten())
|
|
147
|
+
return self
|
|
148
|
+
|
|
149
|
+
def dense(self, input_size, output_size):
|
|
150
|
+
"""Add a Dense layer."""
|
|
151
|
+
self.layers.append(b.Dense(input_size, output_size))
|
|
152
|
+
return self
|
|
153
|
+
|
|
154
|
+
def relu(self):
|
|
155
|
+
"""Add a ReLU activation."""
|
|
156
|
+
self.layers.append(a.ReLu())
|
|
157
|
+
return self
|
|
158
|
+
|
|
159
|
+
def sigmoid(self):
|
|
160
|
+
"""Add a Sigmoid activation."""
|
|
161
|
+
self.layers.append(a.Sigmoid())
|
|
162
|
+
return self
|
|
163
|
+
|
|
164
|
+
def tanh(self):
|
|
165
|
+
"""Add a Tanh activation."""
|
|
166
|
+
self.layers.append(a.Tanh())
|
|
167
|
+
return self
|
|
168
|
+
|
|
169
|
+
def softmax(self):
|
|
170
|
+
"""Add a Softmax activation."""
|
|
171
|
+
self.layers.append(a.Softmax())
|
|
172
|
+
return self
|
|
173
|
+
|
|
174
|
+
def elu(self, alpha_activation=1.0):
|
|
175
|
+
"""Add an ELU activation."""
|
|
176
|
+
self.layers.append(a.ELU(alpha_activation))
|
|
177
|
+
return self
|
|
178
|
+
|
|
179
|
+
def optimizer(self, name):
|
|
180
|
+
"""Set the optimizer."""
|
|
181
|
+
self.optimizer_name = name
|
|
182
|
+
return self
|
|
183
|
+
|
|
184
|
+
def batch(self, num):
|
|
185
|
+
"""Set the batch size."""
|
|
186
|
+
self.batch_size = num
|
|
187
|
+
return self
|
|
188
|
+
|
|
189
|
+
def alpha(self, num):
|
|
190
|
+
"""Set the learning rate."""
|
|
191
|
+
self.alpha_value = num
|
|
192
|
+
return self
|
|
193
|
+
|
|
194
|
+
def epochs(self, num):
|
|
195
|
+
"""Set the number of epochs."""
|
|
196
|
+
self.epochs_value = num
|
|
197
|
+
return self
|
|
198
|
+
|
|
199
|
+
def loss(self, loss_class):
|
|
200
|
+
"""Set the loss function."""
|
|
201
|
+
self.loss_class = loss_class
|
|
202
|
+
return self
|
|
203
|
+
|
|
204
|
+
def build(self):
|
|
205
|
+
"""Build and return the Sequential model."""
|
|
206
|
+
return Sequential(
|
|
207
|
+
*self.layers,
|
|
208
|
+
alpha=self.alpha_value,
|
|
209
|
+
optimizer=self.optimizer_name,
|
|
210
|
+
batch_size=self.batch_size,
|
|
211
|
+
epochs=self.epochs_value,
|
|
212
|
+
loss_class=self.loss_class,
|
|
213
|
+
)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Optimizer:
|
|
5
|
+
def step(self, layers):
|
|
6
|
+
raise NotImplementedError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SGD(Optimizer):
|
|
10
|
+
def __init__(self, alpha=0.01):
|
|
11
|
+
self.alpha = alpha
|
|
12
|
+
|
|
13
|
+
def step(self, layers):
|
|
14
|
+
for layer in layers:
|
|
15
|
+
if layer.grads:
|
|
16
|
+
layer.W -= self.alpha * layer.grads["W"]
|
|
17
|
+
layer.b -= self.alpha * layer.grads["b"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Adam(Optimizer):
|
|
21
|
+
def __init__(self, alpha=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8):
|
|
22
|
+
self.alpha = alpha
|
|
23
|
+
self.beta1 = beta1
|
|
24
|
+
self.beta2 = beta2
|
|
25
|
+
self.epsilon = epsilon
|
|
26
|
+
self.t = 0
|
|
27
|
+
self.m = {}
|
|
28
|
+
self.v = {}
|
|
29
|
+
|
|
30
|
+
def step(self, layers):
|
|
31
|
+
self.t += 1
|
|
32
|
+
for layer in layers:
|
|
33
|
+
if layer.grads:
|
|
34
|
+
for param_name, g in layer.grads.items():
|
|
35
|
+
key = (id(layer), param_name)
|
|
36
|
+
|
|
37
|
+
if key not in self.m:
|
|
38
|
+
self.m[key] = np.zeros_like(g)
|
|
39
|
+
self.v[key] = np.zeros_like(g)
|
|
40
|
+
|
|
41
|
+
self.m[key] = self.beta1 * self.m[key] + (1 - self.beta1) * g
|
|
42
|
+
self.v[key] = self.beta2 * self.v[key] + (1 - self.beta2) * g**2
|
|
43
|
+
|
|
44
|
+
m_hat = self.m[key] / (1 - self.beta1**self.t)
|
|
45
|
+
v_hat = self.v[key] / (1 - self.beta2**self.t)
|
|
46
|
+
|
|
47
|
+
param = getattr(layer, param_name)
|
|
48
|
+
param -= self.alpha * m_hat / (np.sqrt(v_hat) + self.epsilon)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def train_loop(
|
|
52
|
+
model, X, y, X_test, y_test, loss_class, optimizer, epochs=1000, batch_size=1
|
|
53
|
+
):
|
|
54
|
+
losses = []
|
|
55
|
+
|
|
56
|
+
for epoch in range(epochs):
|
|
57
|
+
for _ in range(len(X) // batch_size):
|
|
58
|
+
indices = np.random.randint(0, len(X), batch_size)
|
|
59
|
+
X_batch = X[indices]
|
|
60
|
+
y_batch = y[indices]
|
|
61
|
+
|
|
62
|
+
# Forward pass
|
|
63
|
+
y_pred = model.predict(X_batch)
|
|
64
|
+
loss = loss_class.loss_func(y_pred, y_batch)
|
|
65
|
+
|
|
66
|
+
# Compute gradient of loss w.r.t. predictions (dy)
|
|
67
|
+
dy = loss_class.loss_gradient(y_pred, y_batch)
|
|
68
|
+
|
|
69
|
+
# Backward pass with gradient of loss
|
|
70
|
+
model.backward(dy)
|
|
71
|
+
optimizer.step(model.layers)
|
|
72
|
+
|
|
73
|
+
y_pred = model.predict(X)
|
|
74
|
+
loss = loss_class.loss_func(y_pred, y)
|
|
75
|
+
|
|
76
|
+
y_pred_test = model.predict(X_test)
|
|
77
|
+
loss_test = loss_class.loss_func(y_pred_test, y_test)
|
|
78
|
+
|
|
79
|
+
losses.append((loss, loss_test))
|
|
80
|
+
|
|
81
|
+
if epoch % 10 == 0:
|
|
82
|
+
print(f"Epoch {epoch}, Loss: {loss:.4f}, Test Loss: {loss_test:.4f}")
|
|
83
|
+
|
|
84
|
+
return losses
|