matrixflowers 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrixflowers-0.1.0/PKG-INFO +14 -0
- matrixflowers-0.1.0/README.md +3 -0
- matrixflowers-0.1.0/matrixflowers.egg-info/PKG-INFO +14 -0
- matrixflowers-0.1.0/matrixflowers.egg-info/SOURCES.txt +24 -0
- matrixflowers-0.1.0/matrixflowers.egg-info/dependency_links.txt +1 -0
- matrixflowers-0.1.0/matrixflowers.egg-info/requires.txt +4 -0
- matrixflowers-0.1.0/matrixflowers.egg-info/top_level.txt +5 -0
- matrixflowers-0.1.0/model/__init__.py +4 -0
- matrixflowers-0.1.0/model/linears.py +54 -0
- matrixflowers-0.1.0/model/loss.py +76 -0
- matrixflowers-0.1.0/model/optimizers.py +102 -0
- matrixflowers-0.1.0/model/parameter.py +17 -0
- matrixflowers-0.1.0/nn/__init__.py +2 -0
- matrixflowers-0.1.0/nn/activations.py +159 -0
- matrixflowers-0.1.0/nn/layers.py +267 -0
- matrixflowers-0.1.0/pyproject.toml +26 -0
- matrixflowers-0.1.0/setup.cfg +4 -0
- matrixflowers-0.1.0/tools/activation.py +72 -0
- matrixflowers-0.1.0/train/__init__.py +4 -0
- matrixflowers-0.1.0/train/losses.py +111 -0
- matrixflowers-0.1.0/train/optimizers.py +104 -0
- matrixflowers-0.1.0/train/tape.py +81 -0
- matrixflowers-0.1.0/train/trainer.py +225 -0
- matrixflowers-0.1.0/type/matrix/py_implementation/bench.py +95 -0
- matrixflowers-0.1.0/type/matrix/py_implementation/matrix.py +194 -0
- matrixflowers-0.1.0/type/matrix/py_implementation/test.py +14 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: matrixflowers
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A lightweight deep learning framework built from scratch
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: numpy>=1.24
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
11
|
+
|
|
12
|
+
# matrixflowers
|
|
13
|
+
|
|
14
|
+
A lightweight deep learning framework built from scratch.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: matrixflowers
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A lightweight deep learning framework built from scratch
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: numpy>=1.24
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
11
|
+
|
|
12
|
+
# matrixflowers
|
|
13
|
+
|
|
14
|
+
A lightweight deep learning framework built from scratch.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
matrixflowers.egg-info/PKG-INFO
|
|
4
|
+
matrixflowers.egg-info/SOURCES.txt
|
|
5
|
+
matrixflowers.egg-info/dependency_links.txt
|
|
6
|
+
matrixflowers.egg-info/requires.txt
|
|
7
|
+
matrixflowers.egg-info/top_level.txt
|
|
8
|
+
model/__init__.py
|
|
9
|
+
model/linears.py
|
|
10
|
+
model/loss.py
|
|
11
|
+
model/optimizers.py
|
|
12
|
+
model/parameter.py
|
|
13
|
+
nn/__init__.py
|
|
14
|
+
nn/activations.py
|
|
15
|
+
nn/layers.py
|
|
16
|
+
tools/activation.py
|
|
17
|
+
train/__init__.py
|
|
18
|
+
train/losses.py
|
|
19
|
+
train/optimizers.py
|
|
20
|
+
train/tape.py
|
|
21
|
+
train/trainer.py
|
|
22
|
+
type/matrix/py_implementation/bench.py
|
|
23
|
+
type/matrix/py_implementation/matrix.py
|
|
24
|
+
type/matrix/py_implementation/test.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from .parameter import Parameter
|
|
3
|
+
from type.matrix.py_implementation.matrix import Matrix
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Linear:
|
|
7
|
+
"""
|
|
8
|
+
Fully connected layer: y = x @ W + b
|
|
9
|
+
|
|
10
|
+
Supports forward and backward passes for gradient-based training.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, in_features: int, out_features: int):
|
|
14
|
+
# He initialization for better gradient flow
|
|
15
|
+
scale = np.sqrt(2.0 / in_features)
|
|
16
|
+
self.W = Parameter(np.random.randn(in_features, out_features).astype(np.float32) * scale)
|
|
17
|
+
self.b = Parameter(np.zeros((1, out_features), dtype=np.float32))
|
|
18
|
+
|
|
19
|
+
self._x_cache: np.ndarray | None = None
|
|
20
|
+
|
|
21
|
+
def forward(self, x: Matrix) -> Matrix:
|
|
22
|
+
self._x_cache = x.data
|
|
23
|
+
out = x.data @ self.W.data + self.b.data
|
|
24
|
+
return Matrix(out)
|
|
25
|
+
|
|
26
|
+
def backward(self, grad_output: Matrix) -> Matrix:
|
|
27
|
+
"""
|
|
28
|
+
Backpropagates the gradient through this layer.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
grad_output: Gradient w.r.t. the output of this layer (dL/dy).
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Gradient w.r.t. the input of this layer (dL/dx).
|
|
35
|
+
"""
|
|
36
|
+
if self._x_cache is None:
|
|
37
|
+
raise RuntimeError("backward() called before forward(). Run forward() first.")
|
|
38
|
+
|
|
39
|
+
g = grad_output.data # (batch, out_features)
|
|
40
|
+
|
|
41
|
+
self.W.grad += self._x_cache.T @ g # (in_features, out_features)
|
|
42
|
+
self.b.grad += g.sum(axis=0, keepdims=True) # (1, out_features)
|
|
43
|
+
|
|
44
|
+
grad_input = g @ self.W.data.T # (batch, in_features)
|
|
45
|
+
return Matrix(grad_input)
|
|
46
|
+
|
|
47
|
+
def parameters(self) -> list[Parameter]:
|
|
48
|
+
return [self.W, self.b]
|
|
49
|
+
|
|
50
|
+
def __repr__(self) -> str:
|
|
51
|
+
return (
|
|
52
|
+
f"Linear(in_features={self.W.data.shape[0]}, "
|
|
53
|
+
f"out_features={self.W.data.shape[1]})"
|
|
54
|
+
)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from type.matrix.py_implementation.matrix import Matrix
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def _validate_shapes(pred: Matrix, target: Matrix) -> None:
|
|
6
|
+
if pred.data.shape != target.data.shape:
|
|
7
|
+
raise ValueError(
|
|
8
|
+
f"Shape mismatch: pred={pred.data.shape}, target={target.data.shape}"
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# ── MSE ──────────────────────────────────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
def mse(pred: Matrix, target: Matrix) -> float:
|
|
15
|
+
"""Mean Squared Error loss."""
|
|
16
|
+
_validate_shapes(pred, target)
|
|
17
|
+
diff = pred.data - target.data
|
|
18
|
+
return float(np.mean(diff ** 2))
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def mse_grad(pred: Matrix, target: Matrix) -> Matrix:
|
|
22
|
+
"""Gradient of MSE w.r.t. predictions: dL/dpred = 2*(pred - target) / N"""
|
|
23
|
+
_validate_shapes(pred, target)
|
|
24
|
+
n = pred.data.size
|
|
25
|
+
grad = (2.0 / n) * (pred.data - target.data)
|
|
26
|
+
return Matrix(grad.astype(np.float32))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# ── Cross Entropy ─────────────────────────────────────────────────────────────
|
|
30
|
+
|
|
31
|
+
def cross_entropy(pred: Matrix, target: Matrix) -> float:
|
|
32
|
+
"""
|
|
33
|
+
Categorical cross-entropy loss.
|
|
34
|
+
Expects `pred` to be softmax probabilities and `target` to be one-hot encoded.
|
|
35
|
+
"""
|
|
36
|
+
_validate_shapes(pred, target)
|
|
37
|
+
eps = 1e-9
|
|
38
|
+
batch_size = pred.data.shape[0]
|
|
39
|
+
clipped = np.clip(pred.data, eps, 1.0 - eps)
|
|
40
|
+
return float(-np.sum(target.data * np.log(clipped)) / batch_size)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def cross_entropy_grad(pred: Matrix, target: Matrix) -> Matrix:
|
|
44
|
+
"""
|
|
45
|
+
Gradient for softmax + cross-entropy (combined, numerically stable).
|
|
46
|
+
dL/dlogits = (pred - target) / batch_size
|
|
47
|
+
"""
|
|
48
|
+
_validate_shapes(pred, target)
|
|
49
|
+
batch_size = pred.data.shape[0]
|
|
50
|
+
grad = (pred.data - target.data) / batch_size
|
|
51
|
+
return Matrix(grad.astype(np.float32))
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# ── Binary Cross Entropy ──────────────────────────────────────────────────────
|
|
55
|
+
|
|
56
|
+
def binary_cross_entropy(pred: Matrix, target: Matrix) -> float:
|
|
57
|
+
"""
|
|
58
|
+
Binary cross-entropy loss for sigmoid outputs.
|
|
59
|
+
Expects values in (0, 1).
|
|
60
|
+
"""
|
|
61
|
+
_validate_shapes(pred, target)
|
|
62
|
+
eps = 1e-9
|
|
63
|
+
p = np.clip(pred.data, eps, 1.0 - eps)
|
|
64
|
+
t = target.data
|
|
65
|
+
return float(-np.mean(t * np.log(p) + (1.0 - t) * np.log(1.0 - p)))
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def binary_cross_entropy_grad(pred: Matrix, target: Matrix) -> Matrix:
|
|
69
|
+
"""Gradient of BCE w.r.t. predictions."""
|
|
70
|
+
_validate_shapes(pred, target)
|
|
71
|
+
eps = 1e-9
|
|
72
|
+
p = np.clip(pred.data, eps, 1.0 - eps)
|
|
73
|
+
t = target.data
|
|
74
|
+
n = pred.data.size
|
|
75
|
+
grad = (-(t / p) + (1.0 - t) / (1.0 - p)) / n
|
|
76
|
+
return Matrix(grad.astype(np.float32))
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from .parameter import Parameter
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class SGD:
|
|
6
|
+
"""
|
|
7
|
+
Stochastic Gradient Descent with optional momentum.
|
|
8
|
+
|
|
9
|
+
Update rule (no momentum):
|
|
10
|
+
p = p - lr * grad
|
|
11
|
+
|
|
12
|
+
Update rule (with momentum):
|
|
13
|
+
v = momentum * v - lr * grad
|
|
14
|
+
p = p + v
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, parameters: list[Parameter], lr: float = 0.01, momentum: float = 0.0):
|
|
18
|
+
self.parameters = parameters
|
|
19
|
+
self.lr = lr
|
|
20
|
+
self.momentum = momentum
|
|
21
|
+
self._velocity = [np.zeros_like(p.data) for p in parameters]
|
|
22
|
+
|
|
23
|
+
def step(self) -> None:
|
|
24
|
+
for p, v in zip(self.parameters, self._velocity):
|
|
25
|
+
if self.momentum:
|
|
26
|
+
v[:] = self.momentum * v - self.lr * p.grad
|
|
27
|
+
p.data += v
|
|
28
|
+
else:
|
|
29
|
+
p.data -= self.lr * p.grad
|
|
30
|
+
|
|
31
|
+
def zero_grad(self) -> None:
|
|
32
|
+
for p in self.parameters:
|
|
33
|
+
p.zero_grad()
|
|
34
|
+
|
|
35
|
+
def __repr__(self) -> str:
|
|
36
|
+
return f"SGD(lr={self.lr}, momentum={self.momentum})"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class Adam:
|
|
40
|
+
"""
|
|
41
|
+
Adam optimizer (Adaptive Moment Estimation).
|
|
42
|
+
|
|
43
|
+
Reference: Kingma & Ba, 2015 — https://arxiv.org/abs/1412.6980
|
|
44
|
+
|
|
45
|
+
Update rule:
|
|
46
|
+
m = beta1 * m + (1 - beta1) * grad # 1st moment
|
|
47
|
+
v = beta2 * v + (1 - beta2) * grad² # 2nd moment
|
|
48
|
+
m̂ = m / (1 - beta1^t) # bias correction
|
|
49
|
+
v̂ = v / (1 - beta2^t)
|
|
50
|
+
p = p - lr * m̂ / (sqrt(v̂) + eps)
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
parameters: list[Parameter],
|
|
56
|
+
lr: float = 1e-3,
|
|
57
|
+
beta1: float = 0.9,
|
|
58
|
+
beta2: float = 0.999,
|
|
59
|
+
eps: float = 1e-8,
|
|
60
|
+
weight_decay: float = 0.0,
|
|
61
|
+
):
|
|
62
|
+
self.parameters = parameters
|
|
63
|
+
self.lr = lr
|
|
64
|
+
self.beta1 = beta1
|
|
65
|
+
self.beta2 = beta2
|
|
66
|
+
self.eps = eps
|
|
67
|
+
self.weight_decay = weight_decay
|
|
68
|
+
|
|
69
|
+
self._t = 0
|
|
70
|
+
self._m = [np.zeros_like(p.data) for p in parameters]
|
|
71
|
+
self._v = [np.zeros_like(p.data) for p in parameters]
|
|
72
|
+
|
|
73
|
+
def step(self) -> None:
|
|
74
|
+
self._t += 1
|
|
75
|
+
b1, b2, t = self.beta1, self.beta2, self._t
|
|
76
|
+
|
|
77
|
+
bc1 = 1.0 - b1 ** t # bias correction factor 1
|
|
78
|
+
bc2 = 1.0 - b2 ** t # bias correction factor 2
|
|
79
|
+
|
|
80
|
+
for p, m, v in zip(self.parameters, self._m, self._v):
|
|
81
|
+
g = p.grad
|
|
82
|
+
|
|
83
|
+
if self.weight_decay:
|
|
84
|
+
g = g + self.weight_decay * p.data
|
|
85
|
+
|
|
86
|
+
m[:] = b1 * m + (1.0 - b1) * g
|
|
87
|
+
v[:] = b2 * v + (1.0 - b2) * (g ** 2)
|
|
88
|
+
|
|
89
|
+
m_hat = m / bc1
|
|
90
|
+
v_hat = v / bc2
|
|
91
|
+
|
|
92
|
+
p.data -= self.lr * m_hat / (np.sqrt(v_hat) + self.eps)
|
|
93
|
+
|
|
94
|
+
def zero_grad(self) -> None:
|
|
95
|
+
for p in self.parameters:
|
|
96
|
+
p.zero_grad()
|
|
97
|
+
|
|
98
|
+
def __repr__(self) -> str:
|
|
99
|
+
return (
|
|
100
|
+
f"Adam(lr={self.lr}, beta1={self.beta1}, "
|
|
101
|
+
f"beta2={self.beta2}, eps={self.eps})"
|
|
102
|
+
)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Parameter:
|
|
5
|
+
"""
|
|
6
|
+
Wraps a numpy array as a trainable parameter with gradient tracking.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
def __init__(self, data):
|
|
10
|
+
self.data = np.array(data, dtype=np.float32)
|
|
11
|
+
self.grad = np.zeros_like(self.data)
|
|
12
|
+
|
|
13
|
+
def zero_grad(self):
|
|
14
|
+
self.grad.fill(0.0)
|
|
15
|
+
|
|
16
|
+
def __repr__(self):
|
|
17
|
+
return f"Parameter(shape={self.data.shape}, dtype={self.data.dtype})"
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""
|
|
2
|
+
matrixflowers.nn.activations
|
|
3
|
+
======================
|
|
4
|
+
Funções de ativação que funcionam com Tensor (autograd) E com Matrix (legado).
|
|
5
|
+
|
|
6
|
+
As funções originais de tools/activation.py são preservadas como
|
|
7
|
+
*_matrix(x) para compatibilidade. As versões padrão aqui aceitam Tensor
|
|
8
|
+
e propagam gradientes automaticamente.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
from tensor import Tensor
|
|
15
|
+
from type.matrix.py_implementation.matrix import Matrix
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# ------------------------------------------------------------------
|
|
19
|
+
# Helpers
|
|
20
|
+
# ------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
def _to_tensor(x) -> Tensor:
|
|
23
|
+
if isinstance(x, Tensor):
|
|
24
|
+
return x
|
|
25
|
+
if isinstance(x, Matrix):
|
|
26
|
+
return Tensor(x, requires_grad=False)
|
|
27
|
+
return Tensor(np.array(x, dtype=np.float32))
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# ------------------------------------------------------------------
|
|
31
|
+
# Ativações com autograd
|
|
32
|
+
# ------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
def relu(x: Tensor | Matrix) -> Tensor:
|
|
35
|
+
"""ReLU: max(0, x)"""
|
|
36
|
+
t = _to_tensor(x)
|
|
37
|
+
mask = (t.data > 0).astype(np.float32)
|
|
38
|
+
out = Tensor(t.data * mask, requires_grad=t.requires_grad, _parents=(t,))
|
|
39
|
+
|
|
40
|
+
def _bwd(g):
|
|
41
|
+
t._accumulate_grad(g * mask)
|
|
42
|
+
|
|
43
|
+
out._backward_fn = _bwd
|
|
44
|
+
return out
|
|
45
|
+
|
|
46
|
+
def manu(x: Tensor | Matrix) -> Tensor:
|
|
47
|
+
"""Activation: f(x) = x * sin(x)"""
|
|
48
|
+
|
|
49
|
+
t = _to_tensor(x)
|
|
50
|
+
|
|
51
|
+
out_data = t.data * np.sin(t.data)
|
|
52
|
+
out = Tensor(out_data, requires_grad=t.requires_grad, _parents=(t,))
|
|
53
|
+
|
|
54
|
+
def _bwd(g):
|
|
55
|
+
# f'(x) = sin(x) + x*cos(x)
|
|
56
|
+
grad_input = g * (np.sin(t.data) + t.data * np.cos(t.data))
|
|
57
|
+
t._accumulate_grad(grad_input)
|
|
58
|
+
|
|
59
|
+
out._backward_fn = _bwd
|
|
60
|
+
return out
|
|
61
|
+
|
|
62
|
+
def sigmoid(x: Tensor | Matrix) -> Tensor:
|
|
63
|
+
"""Sigmoid: 1 / (1 + e^-x)"""
|
|
64
|
+
t = _to_tensor(x)
|
|
65
|
+
s = 1.0 / (1.0 + np.exp(-t.data))
|
|
66
|
+
out = Tensor(s, requires_grad=t.requires_grad, _parents=(t,))
|
|
67
|
+
|
|
68
|
+
def _bwd(g):
|
|
69
|
+
t._accumulate_grad(g * s * (1.0 - s))
|
|
70
|
+
|
|
71
|
+
out._backward_fn = _bwd
|
|
72
|
+
return out
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def tanh(x: Tensor | Matrix) -> Tensor:
|
|
76
|
+
"""Tanh: maps to [-1, 1]"""
|
|
77
|
+
t = _to_tensor(x)
|
|
78
|
+
th = np.tanh(t.data)
|
|
79
|
+
out = Tensor(th, requires_grad=t.requires_grad, _parents=(t,))
|
|
80
|
+
|
|
81
|
+
def _bwd(g):
|
|
82
|
+
t._accumulate_grad(g * (1.0 - th ** 2))
|
|
83
|
+
|
|
84
|
+
out._backward_fn = _bwd
|
|
85
|
+
return out
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def softmax(x: Tensor | Matrix) -> Tensor:
|
|
89
|
+
"""Softmax (numericamente estável). Sem backward próprio — use com cross_entropy."""
|
|
90
|
+
t = _to_tensor(x)
|
|
91
|
+
shifted = t.data - t.data.max(axis=1, keepdims=True)
|
|
92
|
+
e = np.exp(shifted)
|
|
93
|
+
s = e / e.sum(axis=1, keepdims=True)
|
|
94
|
+
# Nota: backward do softmax sozinho é O(n²); na prática combina-se com
|
|
95
|
+
# cross_entropy e o gradiente combinado é simplesmente (pred - target)/batch.
|
|
96
|
+
return Tensor(s, requires_grad=t.requires_grad, _parents=(t,))
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def leaky_relu(x: Tensor | Matrix, alpha: float = 0.01) -> Tensor:
|
|
100
|
+
"""Leaky ReLU: permite gradiente pequeno nos negativos."""
|
|
101
|
+
t = _to_tensor(x)
|
|
102
|
+
out_data = np.where(t.data > 0, t.data, alpha * t.data)
|
|
103
|
+
out = Tensor(out_data, requires_grad=t.requires_grad, _parents=(t,))
|
|
104
|
+
|
|
105
|
+
def _bwd(g):
|
|
106
|
+
t._accumulate_grad(g * np.where(t.data > 0, 1.0, alpha))
|
|
107
|
+
|
|
108
|
+
out._backward_fn = _bwd
|
|
109
|
+
return out
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def elu(x: Tensor | Matrix, alpha: float = 1.0) -> Tensor:
|
|
113
|
+
"""ELU: Exponential Linear Unit."""
|
|
114
|
+
t = _to_tensor(x)
|
|
115
|
+
out_data = np.where(t.data > 0, t.data, alpha * (np.exp(t.data) - 1.0))
|
|
116
|
+
out = Tensor(out_data, requires_grad=t.requires_grad, _parents=(t,))
|
|
117
|
+
|
|
118
|
+
def _bwd(g):
|
|
119
|
+
d = np.where(t.data > 0, 1.0, alpha * np.exp(t.data))
|
|
120
|
+
t._accumulate_grad(g * d)
|
|
121
|
+
|
|
122
|
+
out._backward_fn = _bwd
|
|
123
|
+
return out
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def linear(x: Tensor | Matrix) -> Tensor:
|
|
127
|
+
"""Identidade (sem transformação)."""
|
|
128
|
+
t = _to_tensor(x)
|
|
129
|
+
out = Tensor(t.data.copy(), requires_grad=t.requires_grad, _parents=(t,))
|
|
130
|
+
|
|
131
|
+
def _bwd(g):
|
|
132
|
+
t._accumulate_grad(g)
|
|
133
|
+
|
|
134
|
+
out._backward_fn = _bwd
|
|
135
|
+
return out
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# ------------------------------------------------------------------
|
|
139
|
+
# Mapa de nomes → função (útil para Sequential/Dense)
|
|
140
|
+
# ------------------------------------------------------------------
|
|
141
|
+
|
|
142
|
+
ACTIVATIONS: dict[str, callable] = {
|
|
143
|
+
"relu": relu,
|
|
144
|
+
"sigmoid": sigmoid,
|
|
145
|
+
"tanh": tanh,
|
|
146
|
+
"softmax": softmax,
|
|
147
|
+
"leaky_relu": leaky_relu,
|
|
148
|
+
"elu": elu,
|
|
149
|
+
"linear": linear,
|
|
150
|
+
"none": linear,
|
|
151
|
+
"manu": manu,
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def get(name: str) -> callable:
|
|
156
|
+
"""Retorna a função de ativação pelo nome."""
|
|
157
|
+
if name not in ACTIVATIONS:
|
|
158
|
+
raise ValueError(f"Ativação desconhecida: '{name}'. Disponíveis: {list(ACTIVATIONS)}")
|
|
159
|
+
return ACTIVATIONS[name]
|