matrixflowers 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.4
2
+ Name: matrixflowers
3
+ Version: 0.1.0
4
+ Summary: A lightweight deep learning framework built from scratch
5
+ License: MIT
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: numpy>=1.24
9
+ Provides-Extra: dev
10
+ Requires-Dist: pytest>=7; extra == "dev"
11
+
12
+ # matrixflowers
13
+
14
+ A lightweight deep learning framework built from scratch.
@@ -0,0 +1,3 @@
1
+ # matrixflowers
2
+
3
+ A lightweight deep learning framework built from scratch.
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.4
2
+ Name: matrixflowers
3
+ Version: 0.1.0
4
+ Summary: A lightweight deep learning framework built from scratch
5
+ License: MIT
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: numpy>=1.24
9
+ Provides-Extra: dev
10
+ Requires-Dist: pytest>=7; extra == "dev"
11
+
12
+ # matrixflowers
13
+
14
+ A lightweight deep learning framework built from scratch.
@@ -0,0 +1,24 @@
1
+ README.md
2
+ pyproject.toml
3
+ matrixflowers.egg-info/PKG-INFO
4
+ matrixflowers.egg-info/SOURCES.txt
5
+ matrixflowers.egg-info/dependency_links.txt
6
+ matrixflowers.egg-info/requires.txt
7
+ matrixflowers.egg-info/top_level.txt
8
+ model/__init__.py
9
+ model/linears.py
10
+ model/loss.py
11
+ model/optimizers.py
12
+ model/parameter.py
13
+ nn/__init__.py
14
+ nn/activations.py
15
+ nn/layers.py
16
+ tools/activation.py
17
+ train/__init__.py
18
+ train/losses.py
19
+ train/optimizers.py
20
+ train/tape.py
21
+ train/trainer.py
22
+ type/matrix/py_implementation/bench.py
23
+ type/matrix/py_implementation/matrix.py
24
+ type/matrix/py_implementation/test.py
@@ -0,0 +1,4 @@
1
+ numpy>=1.24
2
+
3
+ [dev]
4
+ pytest>=7
@@ -0,0 +1,5 @@
1
+ model
2
+ nn
3
+ tools
4
+ train
5
+ type
@@ -0,0 +1,4 @@
1
+ from .parameter import Parameter
2
+ from .optimizers import SGD, Adam
3
+ from .loss import mse, mse_grad, cross_entropy, cross_entropy_grad, binary_cross_entropy, binary_cross_entropy_grad
4
+ from .linears import Linear
@@ -0,0 +1,54 @@
1
+ import numpy as np
2
+ from .parameter import Parameter
3
+ from type.matrix.py_implementation.matrix import Matrix
4
+
5
+
6
+ class Linear:
7
+ """
8
+ Fully connected layer: y = x @ W + b
9
+
10
+ Supports forward and backward passes for gradient-based training.
11
+ """
12
+
13
+ def __init__(self, in_features: int, out_features: int):
14
+ # He initialization for better gradient flow
15
+ scale = np.sqrt(2.0 / in_features)
16
+ self.W = Parameter(np.random.randn(in_features, out_features).astype(np.float32) * scale)
17
+ self.b = Parameter(np.zeros((1, out_features), dtype=np.float32))
18
+
19
+ self._x_cache: np.ndarray | None = None
20
+
21
+ def forward(self, x: Matrix) -> Matrix:
22
+ self._x_cache = x.data
23
+ out = x.data @ self.W.data + self.b.data
24
+ return Matrix(out)
25
+
26
+ def backward(self, grad_output: Matrix) -> Matrix:
27
+ """
28
+ Backpropagates the gradient through this layer.
29
+
30
+ Args:
31
+ grad_output: Gradient w.r.t. the output of this layer (dL/dy).
32
+
33
+ Returns:
34
+ Gradient w.r.t. the input of this layer (dL/dx).
35
+ """
36
+ if self._x_cache is None:
37
+ raise RuntimeError("backward() called before forward(). Run forward() first.")
38
+
39
+ g = grad_output.data # (batch, out_features)
40
+
41
+ self.W.grad += self._x_cache.T @ g # (in_features, out_features)
42
+ self.b.grad += g.sum(axis=0, keepdims=True) # (1, out_features)
43
+
44
+ grad_input = g @ self.W.data.T # (batch, in_features)
45
+ return Matrix(grad_input)
46
+
47
+ def parameters(self) -> list[Parameter]:
48
+ return [self.W, self.b]
49
+
50
+ def __repr__(self) -> str:
51
+ return (
52
+ f"Linear(in_features={self.W.data.shape[0]}, "
53
+ f"out_features={self.W.data.shape[1]})"
54
+ )
@@ -0,0 +1,76 @@
1
+ import numpy as np
2
+ from type.matrix.py_implementation.matrix import Matrix
3
+
4
+
5
+ def _validate_shapes(pred: Matrix, target: Matrix) -> None:
6
+ if pred.data.shape != target.data.shape:
7
+ raise ValueError(
8
+ f"Shape mismatch: pred={pred.data.shape}, target={target.data.shape}"
9
+ )
10
+
11
+
12
+ # ── MSE ──────────────────────────────────────────────────────────────────────
13
+
14
+ def mse(pred: Matrix, target: Matrix) -> float:
15
+ """Mean Squared Error loss."""
16
+ _validate_shapes(pred, target)
17
+ diff = pred.data - target.data
18
+ return float(np.mean(diff ** 2))
19
+
20
+
21
+ def mse_grad(pred: Matrix, target: Matrix) -> Matrix:
22
+ """Gradient of MSE w.r.t. predictions: dL/dpred = 2*(pred - target) / N"""
23
+ _validate_shapes(pred, target)
24
+ n = pred.data.size
25
+ grad = (2.0 / n) * (pred.data - target.data)
26
+ return Matrix(grad.astype(np.float32))
27
+
28
+
29
+ # ── Cross Entropy ─────────────────────────────────────────────────────────────
30
+
31
+ def cross_entropy(pred: Matrix, target: Matrix) -> float:
32
+ """
33
+ Categorical cross-entropy loss.
34
+ Expects `pred` to be softmax probabilities and `target` to be one-hot encoded.
35
+ """
36
+ _validate_shapes(pred, target)
37
+ eps = 1e-9
38
+ batch_size = pred.data.shape[0]
39
+ clipped = np.clip(pred.data, eps, 1.0 - eps)
40
+ return float(-np.sum(target.data * np.log(clipped)) / batch_size)
41
+
42
+
43
+ def cross_entropy_grad(pred: Matrix, target: Matrix) -> Matrix:
44
+ """
45
+ Gradient for softmax + cross-entropy (combined, numerically stable).
46
+ dL/dlogits = (pred - target) / batch_size
47
+ """
48
+ _validate_shapes(pred, target)
49
+ batch_size = pred.data.shape[0]
50
+ grad = (pred.data - target.data) / batch_size
51
+ return Matrix(grad.astype(np.float32))
52
+
53
+
54
+ # ── Binary Cross Entropy ──────────────────────────────────────────────────────
55
+
56
+ def binary_cross_entropy(pred: Matrix, target: Matrix) -> float:
57
+ """
58
+ Binary cross-entropy loss for sigmoid outputs.
59
+ Expects values in (0, 1).
60
+ """
61
+ _validate_shapes(pred, target)
62
+ eps = 1e-9
63
+ p = np.clip(pred.data, eps, 1.0 - eps)
64
+ t = target.data
65
+ return float(-np.mean(t * np.log(p) + (1.0 - t) * np.log(1.0 - p)))
66
+
67
+
68
+ def binary_cross_entropy_grad(pred: Matrix, target: Matrix) -> Matrix:
69
+ """Gradient of BCE w.r.t. predictions."""
70
+ _validate_shapes(pred, target)
71
+ eps = 1e-9
72
+ p = np.clip(pred.data, eps, 1.0 - eps)
73
+ t = target.data
74
+ n = pred.data.size
75
+ grad = (-(t / p) + (1.0 - t) / (1.0 - p)) / n
76
+ return Matrix(grad.astype(np.float32))
@@ -0,0 +1,102 @@
1
+ import numpy as np
2
+ from .parameter import Parameter
3
+
4
+
5
+ class SGD:
6
+ """
7
+ Stochastic Gradient Descent with optional momentum.
8
+
9
+ Update rule (no momentum):
10
+ p = p - lr * grad
11
+
12
+ Update rule (with momentum):
13
+ v = momentum * v - lr * grad
14
+ p = p + v
15
+ """
16
+
17
+ def __init__(self, parameters: list[Parameter], lr: float = 0.01, momentum: float = 0.0):
18
+ self.parameters = parameters
19
+ self.lr = lr
20
+ self.momentum = momentum
21
+ self._velocity = [np.zeros_like(p.data) for p in parameters]
22
+
23
+ def step(self) -> None:
24
+ for p, v in zip(self.parameters, self._velocity):
25
+ if self.momentum:
26
+ v[:] = self.momentum * v - self.lr * p.grad
27
+ p.data += v
28
+ else:
29
+ p.data -= self.lr * p.grad
30
+
31
+ def zero_grad(self) -> None:
32
+ for p in self.parameters:
33
+ p.zero_grad()
34
+
35
+ def __repr__(self) -> str:
36
+ return f"SGD(lr={self.lr}, momentum={self.momentum})"
37
+
38
+
39
+ class Adam:
40
+ """
41
+ Adam optimizer (Adaptive Moment Estimation).
42
+
43
+ Reference: Kingma & Ba, 2015 — https://arxiv.org/abs/1412.6980
44
+
45
+ Update rule:
46
+ m = beta1 * m + (1 - beta1) * grad # 1st moment
47
+ v = beta2 * v + (1 - beta2) * grad² # 2nd moment
48
+ m̂ = m / (1 - beta1^t) # bias correction
49
+ v̂ = v / (1 - beta2^t)
50
+ p = p - lr * m̂ / (sqrt(v̂) + eps)
51
+ """
52
+
53
+ def __init__(
54
+ self,
55
+ parameters: list[Parameter],
56
+ lr: float = 1e-3,
57
+ beta1: float = 0.9,
58
+ beta2: float = 0.999,
59
+ eps: float = 1e-8,
60
+ weight_decay: float = 0.0,
61
+ ):
62
+ self.parameters = parameters
63
+ self.lr = lr
64
+ self.beta1 = beta1
65
+ self.beta2 = beta2
66
+ self.eps = eps
67
+ self.weight_decay = weight_decay
68
+
69
+ self._t = 0
70
+ self._m = [np.zeros_like(p.data) for p in parameters]
71
+ self._v = [np.zeros_like(p.data) for p in parameters]
72
+
73
+ def step(self) -> None:
74
+ self._t += 1
75
+ b1, b2, t = self.beta1, self.beta2, self._t
76
+
77
+ bc1 = 1.0 - b1 ** t # bias correction factor 1
78
+ bc2 = 1.0 - b2 ** t # bias correction factor 2
79
+
80
+ for p, m, v in zip(self.parameters, self._m, self._v):
81
+ g = p.grad
82
+
83
+ if self.weight_decay:
84
+ g = g + self.weight_decay * p.data
85
+
86
+ m[:] = b1 * m + (1.0 - b1) * g
87
+ v[:] = b2 * v + (1.0 - b2) * (g ** 2)
88
+
89
+ m_hat = m / bc1
90
+ v_hat = v / bc2
91
+
92
+ p.data -= self.lr * m_hat / (np.sqrt(v_hat) + self.eps)
93
+
94
+ def zero_grad(self) -> None:
95
+ for p in self.parameters:
96
+ p.zero_grad()
97
+
98
+ def __repr__(self) -> str:
99
+ return (
100
+ f"Adam(lr={self.lr}, beta1={self.beta1}, "
101
+ f"beta2={self.beta2}, eps={self.eps})"
102
+ )
@@ -0,0 +1,17 @@
1
+ import numpy as np
2
+
3
+
4
+ class Parameter:
5
+ """
6
+ Wraps a numpy array as a trainable parameter with gradient tracking.
7
+ """
8
+
9
+ def __init__(self, data):
10
+ self.data = np.array(data, dtype=np.float32)
11
+ self.grad = np.zeros_like(self.data)
12
+
13
+ def zero_grad(self):
14
+ self.grad.fill(0.0)
15
+
16
+ def __repr__(self):
17
+ return f"Parameter(shape={self.data.shape}, dtype={self.data.dtype})"
@@ -0,0 +1,2 @@
1
+ from .layers import Dense, Dropout, Sequential
2
+ from .activations import relu, sigmoid, tanh, softmax, leaky_relu, elu, linear
@@ -0,0 +1,159 @@
1
+ """
2
+ matrixflowers.nn.activations
3
+ ======================
4
+ Funções de ativação que funcionam com Tensor (autograd) E com Matrix (legado).
5
+
6
+ As funções originais de tools/activation.py são preservadas como
7
+ *_matrix(x) para compatibilidade. As versões padrão aqui aceitam Tensor
8
+ e propagam gradientes automaticamente.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import numpy as np
14
+ from tensor import Tensor
15
+ from type.matrix.py_implementation.matrix import Matrix
16
+
17
+
18
+ # ------------------------------------------------------------------
19
+ # Helpers
20
+ # ------------------------------------------------------------------
21
+
22
+ def _to_tensor(x) -> Tensor:
23
+ if isinstance(x, Tensor):
24
+ return x
25
+ if isinstance(x, Matrix):
26
+ return Tensor(x, requires_grad=False)
27
+ return Tensor(np.array(x, dtype=np.float32))
28
+
29
+
30
+ # ------------------------------------------------------------------
31
+ # Ativações com autograd
32
+ # ------------------------------------------------------------------
33
+
34
+ def relu(x: Tensor | Matrix) -> Tensor:
35
+ """ReLU: max(0, x)"""
36
+ t = _to_tensor(x)
37
+ mask = (t.data > 0).astype(np.float32)
38
+ out = Tensor(t.data * mask, requires_grad=t.requires_grad, _parents=(t,))
39
+
40
+ def _bwd(g):
41
+ t._accumulate_grad(g * mask)
42
+
43
+ out._backward_fn = _bwd
44
+ return out
45
+
46
+ def manu(x: Tensor | Matrix) -> Tensor:
47
+ """Activation: f(x) = x * sin(x)"""
48
+
49
+ t = _to_tensor(x)
50
+
51
+ out_data = t.data * np.sin(t.data)
52
+ out = Tensor(out_data, requires_grad=t.requires_grad, _parents=(t,))
53
+
54
+ def _bwd(g):
55
+ # f'(x) = sin(x) + x*cos(x)
56
+ grad_input = g * (np.sin(t.data) + t.data * np.cos(t.data))
57
+ t._accumulate_grad(grad_input)
58
+
59
+ out._backward_fn = _bwd
60
+ return out
61
+
62
+ def sigmoid(x: Tensor | Matrix) -> Tensor:
63
+ """Sigmoid: 1 / (1 + e^-x)"""
64
+ t = _to_tensor(x)
65
+ s = 1.0 / (1.0 + np.exp(-t.data))
66
+ out = Tensor(s, requires_grad=t.requires_grad, _parents=(t,))
67
+
68
+ def _bwd(g):
69
+ t._accumulate_grad(g * s * (1.0 - s))
70
+
71
+ out._backward_fn = _bwd
72
+ return out
73
+
74
+
75
+ def tanh(x: Tensor | Matrix) -> Tensor:
76
+ """Tanh: maps to [-1, 1]"""
77
+ t = _to_tensor(x)
78
+ th = np.tanh(t.data)
79
+ out = Tensor(th, requires_grad=t.requires_grad, _parents=(t,))
80
+
81
+ def _bwd(g):
82
+ t._accumulate_grad(g * (1.0 - th ** 2))
83
+
84
+ out._backward_fn = _bwd
85
+ return out
86
+
87
+
88
+ def softmax(x: Tensor | Matrix) -> Tensor:
89
+ """Softmax (numericamente estável). Sem backward próprio — use com cross_entropy."""
90
+ t = _to_tensor(x)
91
+ shifted = t.data - t.data.max(axis=1, keepdims=True)
92
+ e = np.exp(shifted)
93
+ s = e / e.sum(axis=1, keepdims=True)
94
+ # Nota: backward do softmax sozinho é O(n²); na prática combina-se com
95
+ # cross_entropy e o gradiente combinado é simplesmente (pred - target)/batch.
96
+ return Tensor(s, requires_grad=t.requires_grad, _parents=(t,))
97
+
98
+
99
+ def leaky_relu(x: Tensor | Matrix, alpha: float = 0.01) -> Tensor:
100
+ """Leaky ReLU: permite gradiente pequeno nos negativos."""
101
+ t = _to_tensor(x)
102
+ out_data = np.where(t.data > 0, t.data, alpha * t.data)
103
+ out = Tensor(out_data, requires_grad=t.requires_grad, _parents=(t,))
104
+
105
+ def _bwd(g):
106
+ t._accumulate_grad(g * np.where(t.data > 0, 1.0, alpha))
107
+
108
+ out._backward_fn = _bwd
109
+ return out
110
+
111
+
112
+ def elu(x: Tensor | Matrix, alpha: float = 1.0) -> Tensor:
113
+ """ELU: Exponential Linear Unit."""
114
+ t = _to_tensor(x)
115
+ out_data = np.where(t.data > 0, t.data, alpha * (np.exp(t.data) - 1.0))
116
+ out = Tensor(out_data, requires_grad=t.requires_grad, _parents=(t,))
117
+
118
+ def _bwd(g):
119
+ d = np.where(t.data > 0, 1.0, alpha * np.exp(t.data))
120
+ t._accumulate_grad(g * d)
121
+
122
+ out._backward_fn = _bwd
123
+ return out
124
+
125
+
126
+ def linear(x: Tensor | Matrix) -> Tensor:
127
+ """Identidade (sem transformação)."""
128
+ t = _to_tensor(x)
129
+ out = Tensor(t.data.copy(), requires_grad=t.requires_grad, _parents=(t,))
130
+
131
+ def _bwd(g):
132
+ t._accumulate_grad(g)
133
+
134
+ out._backward_fn = _bwd
135
+ return out
136
+
137
+
138
+ # ------------------------------------------------------------------
139
+ # Mapa de nomes → função (útil para Sequential/Dense)
140
+ # ------------------------------------------------------------------
141
+
142
+ ACTIVATIONS: dict[str, callable] = {
143
+ "relu": relu,
144
+ "sigmoid": sigmoid,
145
+ "tanh": tanh,
146
+ "softmax": softmax,
147
+ "leaky_relu": leaky_relu,
148
+ "elu": elu,
149
+ "linear": linear,
150
+ "none": linear,
151
+ "manu": manu,
152
+ }
153
+
154
+
155
+ def get(name: str) -> callable:
156
+ """Retorna a função de ativação pelo nome."""
157
+ if name not in ACTIVATIONS:
158
+ raise ValueError(f"Ativação desconhecida: '{name}'. Disponíveis: {list(ACTIVATIONS)}")
159
+ return ACTIVATIONS[name]