dnaty 5.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dnaty/__init__.py +21 -0
- dnaty/analysis/__init__.py +1 -0
- dnaty/analysis/cl_metrics.py +40 -0
- dnaty/compress.py +177 -0
- dnaty/core/__init__.py +1 -0
- dnaty/core/arch.py +85 -0
- dnaty/core/arch_cnn.py +140 -0
- dnaty/core/individual.py +34 -0
- dnaty/core/memory.py +88 -0
- dnaty/evolution/__init__.py +1 -0
- dnaty/evolution/evolver.py +285 -0
- dnaty/evolution/selection.py +78 -0
- dnaty/experiments/__init__.py +1 -0
- dnaty/experiments/fast_dataset.py +106 -0
- dnaty/logging_config.py +40 -0
- dnaty/operators/__init__.py +1 -0
- dnaty/operators/mutations.py +256 -0
- dnaty/operators/mutations_cnn.py +259 -0
- dnaty/tracking.py +113 -0
- dnaty/training/__init__.py +1 -0
- dnaty/training/local_train.py +214 -0
- dnaty-5.2.0.dist-info/METADATA +242 -0
- dnaty-5.2.0.dist-info/RECORD +26 -0
- dnaty-5.2.0.dist-info/WHEEL +5 -0
- dnaty-5.2.0.dist-info/licenses/LICENSE +44 -0
- dnaty-5.2.0.dist-info/top_level.txt +1 -0
dnaty/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""
|
|
2
|
+
dNATY — Dynamic Neuro-Adaptive sYstem.
|
|
3
|
+
|
|
4
|
+
Evolutionary Neural Architecture Search with episodic memory.
|
|
5
|
+
Finds compact, efficient models via guided evolution — not random search.
|
|
6
|
+
|
|
7
|
+
Quick start:
|
|
8
|
+
from dnaty import compress
|
|
9
|
+
from dnaty.experiments.fast_dataset import FastDataset
|
|
10
|
+
|
|
11
|
+
ds = FastDataset("MNIST", device="cpu", train_subset=10_000)
|
|
12
|
+
result = compress(your_model, ds, target_flops=0.5)
|
|
13
|
+
print(result.summary())
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
__version__ = "5.2.0"
|
|
17
|
+
|
|
18
|
+
from dnaty.compress import compress, CompressResult
|
|
19
|
+
from dnaty.evolution.evolver import DnatyEvolver
|
|
20
|
+
|
|
21
|
+
__all__ = ["compress", "CompressResult", "DnatyEvolver", "__version__"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Métricas de Continual Learning — Lopez-Paz et al. (2017).
|
|
3
|
+
BWT, FWT, FM implementados conforme formalização seção 1.5.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def compute_cl_metrics(R: np.ndarray, baselines: np.ndarray | None = None) -> dict[str, float]:
|
|
10
|
+
"""
|
|
11
|
+
R[i, j] = acurácia na tarefa j após treinar sequencialmente até tarefa i.
|
|
12
|
+
R é indexado de 0 (após tarefa 0) até T-1 (após tarefa T-1).
|
|
13
|
+
baselines[j] = acurácia single-task na tarefa j (para FWT).
|
|
14
|
+
"""
|
|
15
|
+
T = R.shape[1]
|
|
16
|
+
|
|
17
|
+
# BWT: Backward Transfer — forgetting
|
|
18
|
+
# BWT = (1/(T-1)) * Σ_{i=1}^{T-1} (R[T-1,i] - R[i,i])
|
|
19
|
+
bwt_terms = [R[T - 1, i] - R[i, i] for i in range(T - 1)]
|
|
20
|
+
BWT = float(np.mean(bwt_terms)) if bwt_terms else 0.0
|
|
21
|
+
|
|
22
|
+
# FWT: Forward Transfer
|
|
23
|
+
if baselines is not None:
|
|
24
|
+
fwt_terms = [R[i - 1, i] - baselines[i] for i in range(1, T)]
|
|
25
|
+
FWT = float(np.mean(fwt_terms)) if fwt_terms else 0.0
|
|
26
|
+
else:
|
|
27
|
+
FWT = 0.0
|
|
28
|
+
|
|
29
|
+
# FM: Forgetting Measure — queda do pico
|
|
30
|
+
fm_terms = []
|
|
31
|
+
for i in range(T - 1):
|
|
32
|
+
peak = max(R[j, i] for j in range(T))
|
|
33
|
+
fm_terms.append(peak - R[T - 1, i])
|
|
34
|
+
FM = float(np.mean(fm_terms)) if fm_terms else 0.0
|
|
35
|
+
|
|
36
|
+
return {
|
|
37
|
+
"BWT": round(BWT, 4),
|
|
38
|
+
"FWT": round(FWT, 4),
|
|
39
|
+
"FM": round(FM, 4),
|
|
40
|
+
}
|
dnaty/compress.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""
|
|
2
|
+
dNATY compress — public API for model compression via evolutionary NAS.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
from dnaty import compress
|
|
6
|
+
|
|
7
|
+
result = compress(model, train_data, target_flops=0.5)
|
|
8
|
+
print(f"Compressed {result.flops_reduction_pct:.1f}% FLOPs, acc={result.accuracy:.4f}")
|
|
9
|
+
result.model # ready-to-use compressed PyTorch model
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
import torch
|
|
16
|
+
import torch.nn as nn
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class CompressResult:
|
|
21
|
+
model: nn.Module
|
|
22
|
+
original_flops: int
|
|
23
|
+
compressed_flops: int
|
|
24
|
+
original_params: int
|
|
25
|
+
compressed_params: int
|
|
26
|
+
accuracy: float
|
|
27
|
+
flops_reduction: float # e.g. 0.465 = 46.5% less FLOPs
|
|
28
|
+
generations: int
|
|
29
|
+
arch: list[int] = field(default_factory=list) # hidden layer sizes found
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def flops_reduction_pct(self) -> float:
|
|
33
|
+
return self.flops_reduction * 100
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def params_reduction_pct(self) -> float:
|
|
37
|
+
if self.original_params == 0:
|
|
38
|
+
return 0.0
|
|
39
|
+
return (1.0 - self.compressed_params / self.original_params) * 100
|
|
40
|
+
|
|
41
|
+
def summary(self) -> str:
|
|
42
|
+
return (
|
|
43
|
+
f"CompressResult | arch={self.arch} | "
|
|
44
|
+
f"FLOPs -{self.flops_reduction_pct:.1f}% "
|
|
45
|
+
f"({self.original_flops:,} -> {self.compressed_flops:,}) | "
|
|
46
|
+
f"params -{self.params_reduction_pct:.1f}% "
|
|
47
|
+
f"({self.original_params:,} -> {self.compressed_params:,}) | "
|
|
48
|
+
f"acc={self.accuracy:.4f}"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def compress(
|
|
53
|
+
model: nn.Module,
|
|
54
|
+
train_data,
|
|
55
|
+
*,
|
|
56
|
+
target_flops: float = 0.5,
|
|
57
|
+
n_generations: int = 30,
|
|
58
|
+
n_pop: int = 15,
|
|
59
|
+
device: Optional[str] = None,
|
|
60
|
+
verbose: bool = True,
|
|
61
|
+
seed: Optional[int] = None,
|
|
62
|
+
) -> CompressResult:
|
|
63
|
+
"""
|
|
64
|
+
Find a smaller, faster architecture for the same task using evolutionary NAS.
|
|
65
|
+
|
|
66
|
+
dNATY searches architectures guided by episodic memory — operators that
|
|
67
|
+
helped before are tried more often. The search is Pareto-optimal: it
|
|
68
|
+
maximises accuracy and minimises FLOPs/params simultaneously.
|
|
69
|
+
|
|
70
|
+
Works best with MLP models (nn.Linear layers). The search starts from
|
|
71
|
+
the architecture inferred from ``model`` and evolves from there.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
model: Any PyTorch nn.Module containing nn.Linear layers.
|
|
75
|
+
train_data: DataLoader or FastDataset used to train and evaluate
|
|
76
|
+
candidate architectures.
|
|
77
|
+
target_flops: Target FLOPs as fraction of original (0.5 = 50% less).
|
|
78
|
+
Controls lambda2 pressure — lower = more compression.
|
|
79
|
+
n_generations: Evolutionary generations to run (30 is a good default).
|
|
80
|
+
n_pop: Population size (15 balances diversity vs. speed).
|
|
81
|
+
device: 'cpu' or 'cuda'. Auto-detected when None.
|
|
82
|
+
verbose: Print generation-by-generation progress.
|
|
83
|
+
seed: Fix for reproducibility.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
CompressResult with the best model found and compression metrics.
|
|
87
|
+
|
|
88
|
+
Example:
|
|
89
|
+
>>> from dnaty import compress
|
|
90
|
+
>>> from dnaty.experiments.fast_dataset import FastDataset
|
|
91
|
+
>>> ds = FastDataset("MNIST", device="cpu", train_subset=10_000)
|
|
92
|
+
>>> model = ... # your trained PyTorch model
|
|
93
|
+
>>> result = compress(model, ds, target_flops=0.5, n_generations=30)
|
|
94
|
+
>>> print(result.summary())
|
|
95
|
+
"""
|
|
96
|
+
import numpy as np
|
|
97
|
+
from dnaty.evolution.evolver import DnatyEvolver
|
|
98
|
+
from dnaty.core.individual import Individual
|
|
99
|
+
|
|
100
|
+
if device is None:
|
|
101
|
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
102
|
+
|
|
103
|
+
if seed is not None:
|
|
104
|
+
torch.manual_seed(seed)
|
|
105
|
+
np.random.seed(seed)
|
|
106
|
+
|
|
107
|
+
layer_sizes = _infer_layer_sizes(model)
|
|
108
|
+
input_size = layer_sizes[0]
|
|
109
|
+
n_classes = layer_sizes[-1]
|
|
110
|
+
init_hidden = layer_sizes[1:-1]
|
|
111
|
+
|
|
112
|
+
lambda2 = 3e-6 # strong enough to drive real compression, weak enough to preserve acc
|
|
113
|
+
|
|
114
|
+
evolver = DnatyEvolver(
|
|
115
|
+
n_pop=n_pop,
|
|
116
|
+
n_generations=n_generations,
|
|
117
|
+
t_local=3,
|
|
118
|
+
input_size=input_size,
|
|
119
|
+
n_classes=n_classes,
|
|
120
|
+
init_hidden=init_hidden,
|
|
121
|
+
device=device,
|
|
122
|
+
verbose=verbose,
|
|
123
|
+
lambda2=lambda2,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Baseline: measure the original model before search
|
|
127
|
+
orig_ind = Individual(model)
|
|
128
|
+
orig_flops = orig_ind.count_flops()
|
|
129
|
+
orig_params = orig_ind.count_params()
|
|
130
|
+
|
|
131
|
+
# Disable early stopping so all generations run — with large datasets
|
|
132
|
+
# accuracy plateaus fast and early stop would fire before FLOPs reduction happens.
|
|
133
|
+
evolver.run(train_data, train_data, early_stop_patience=n_generations)
|
|
134
|
+
|
|
135
|
+
# Select most-compressed individual from Pareto population with acc >= 95%.
|
|
136
|
+
# run() returns max-accuracy individual, but the population contains the full
|
|
137
|
+
# Pareto front — smaller models that still meet the accuracy floor live there.
|
|
138
|
+
acc_floor = 0.95
|
|
139
|
+
candidates = [ind for ind in evolver.population if ind.acc >= acc_floor]
|
|
140
|
+
if not candidates:
|
|
141
|
+
candidates = evolver.population
|
|
142
|
+
best = min(candidates, key=lambda ind: ind.count_flops())
|
|
143
|
+
|
|
144
|
+
compressed_flops = best.count_flops()
|
|
145
|
+
compressed_params = best.count_params()
|
|
146
|
+
|
|
147
|
+
# layer_sizes on DynamicMLP includes input (e.g. [784, 512, 128]).
|
|
148
|
+
# arch is hidden-only so callers reconstruct with: DynamicMLP([784] + arch, ...)
|
|
149
|
+
full_sizes = list(getattr(best.model, "layer_sizes", [input_size] + init_hidden))
|
|
150
|
+
arch = full_sizes[1:]
|
|
151
|
+
|
|
152
|
+
return CompressResult(
|
|
153
|
+
model=best.model,
|
|
154
|
+
original_flops=orig_flops,
|
|
155
|
+
compressed_flops=compressed_flops,
|
|
156
|
+
original_params=orig_params,
|
|
157
|
+
compressed_params=compressed_params,
|
|
158
|
+
accuracy=best.acc,
|
|
159
|
+
flops_reduction=max(0.0, 1.0 - compressed_flops / max(orig_flops, 1)),
|
|
160
|
+
generations=n_generations,
|
|
161
|
+
arch=arch,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _infer_layer_sizes(model: nn.Module) -> list[int]:
|
|
166
|
+
"""Extract [in, h1, h2, ..., out] from a Linear-based model."""
|
|
167
|
+
sizes: list[int] = []
|
|
168
|
+
for m in model.modules():
|
|
169
|
+
if isinstance(m, nn.Linear):
|
|
170
|
+
if not sizes:
|
|
171
|
+
sizes.append(m.in_features)
|
|
172
|
+
sizes.append(m.out_features)
|
|
173
|
+
if len(sizes) < 2:
|
|
174
|
+
raise ValueError(
|
|
175
|
+
"Cannot infer architecture: model must contain at least one nn.Linear layer."
|
|
176
|
+
)
|
|
177
|
+
return sizes
|
dnaty/core/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
dnaty/core/arch.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Representação da arquitetura como grafo dirigido acíclico (DAG).
|
|
3
|
+
A_i = (V_i, E_i, φ_i, Ω_i)
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
import torch
|
|
7
|
+
import torch.nn as nn
|
|
8
|
+
import numpy as np
|
|
9
|
+
from copy import deepcopy
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
ACTIVATIONS = {
|
|
13
|
+
"relu": nn.ReLU,
|
|
14
|
+
"tanh": nn.Tanh,
|
|
15
|
+
"gelu": nn.GELU,
|
|
16
|
+
"sigmoid": nn.Sigmoid,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
_innovation_counter = 0
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def next_innovation() -> int:
|
|
23
|
+
global _innovation_counter
|
|
24
|
+
_innovation_counter += 1
|
|
25
|
+
return _innovation_counter
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DynamicMLP(nn.Module):
|
|
29
|
+
"""
|
|
30
|
+
MLP com arquitetura mutável. Representado como lista de camadas lineares.
|
|
31
|
+
Suporta os 8 operadores densos + skip connections.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, layer_sizes: list[int], activations: list[str] | None = None, n_classes: int = 10):
|
|
35
|
+
super().__init__()
|
|
36
|
+
self.layer_sizes = list(layer_sizes)
|
|
37
|
+
self.n_classes = n_classes
|
|
38
|
+
self.activations = activations or ["relu"] * (len(layer_sizes) - 1)
|
|
39
|
+
self.innovation_ids = [next_innovation() for _ in range(len(layer_sizes) - 1)]
|
|
40
|
+
self._build()
|
|
41
|
+
|
|
42
|
+
def _build(self) -> None:
|
|
43
|
+
layers = []
|
|
44
|
+
for i in range(len(self.layer_sizes) - 1):
|
|
45
|
+
layers.append(nn.Linear(self.layer_sizes[i], self.layer_sizes[i + 1]))
|
|
46
|
+
# BatchNorm antes da ativação — estabiliza treino, permite LR maior
|
|
47
|
+
layers.append(nn.BatchNorm1d(self.layer_sizes[i + 1]))
|
|
48
|
+
act = self.activations[i] if i < len(self.activations) else "relu"
|
|
49
|
+
layers.append(ACTIVATIONS.get(act, nn.ReLU)())
|
|
50
|
+
layers.append(nn.Linear(self.layer_sizes[-1], self.n_classes))
|
|
51
|
+
self.net = nn.Sequential(*layers)
|
|
52
|
+
self.skip_connections: list[tuple[int, int, nn.Linear]] = []
|
|
53
|
+
|
|
54
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
55
|
+
x = x.view(x.size(0), -1)
|
|
56
|
+
layer_outputs = [x]
|
|
57
|
+
idx = 0
|
|
58
|
+
for i in range(len(self.layer_sizes) - 1):
|
|
59
|
+
linear = self.net[idx]
|
|
60
|
+
bn = self.net[idx + 1]
|
|
61
|
+
act = self.net[idx + 2]
|
|
62
|
+
out = act(bn(linear(layer_outputs[-1])))
|
|
63
|
+
for src, dst, proj in self.skip_connections:
|
|
64
|
+
if dst == i + 1 and src < len(layer_outputs):
|
|
65
|
+
skip_in = layer_outputs[src]
|
|
66
|
+
if proj is not None:
|
|
67
|
+
skip_in = proj(skip_in)
|
|
68
|
+
if skip_in.shape == out.shape:
|
|
69
|
+
out = out + skip_in
|
|
70
|
+
layer_outputs.append(out)
|
|
71
|
+
idx += 3 # Linear + BN + Activation
|
|
72
|
+
return self.net[idx](layer_outputs[-1])
|
|
73
|
+
|
|
74
|
+
def count_params(self) -> int:
|
|
75
|
+
return sum(p.numel() for p in self.parameters())
|
|
76
|
+
|
|
77
|
+
def count_flops(self) -> int:
|
|
78
|
+
flops = 0
|
|
79
|
+
for i in range(len(self.layer_sizes) - 1):
|
|
80
|
+
flops += 2 * self.layer_sizes[i] * self.layer_sizes[i + 1]
|
|
81
|
+
flops += 2 * self.layer_sizes[-1] * self.n_classes
|
|
82
|
+
return flops
|
|
83
|
+
|
|
84
|
+
def is_valid(self) -> bool:
|
|
85
|
+
return all(s > 0 for s in self.layer_sizes) and len(self.layer_sizes) >= 2
|
dnaty/core/arch_cnn.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DynamicCNN — arquitetura CNN mutável para CIFAR-10.
|
|
3
|
+
Suporta blocos Conv2D+BN+ReLU e depthwise separable.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
import torch
|
|
7
|
+
import torch.nn as nn
|
|
8
|
+
import numpy as np
|
|
9
|
+
from copy import deepcopy
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ConvBlock(nn.Module):
|
|
13
|
+
"""Conv2D + BatchNorm + ReLU — bloco padrão."""
|
|
14
|
+
def __init__(self, in_ch: int, out_ch: int, kernel: int = 3, stride: int = 1):
|
|
15
|
+
super().__init__()
|
|
16
|
+
pad = kernel // 2
|
|
17
|
+
self.block = nn.Sequential(
|
|
18
|
+
nn.Conv2d(in_ch, out_ch, kernel, stride=stride, padding=pad, bias=False),
|
|
19
|
+
nn.BatchNorm2d(out_ch),
|
|
20
|
+
nn.ReLU(inplace=True),
|
|
21
|
+
)
|
|
22
|
+
self.in_ch = in_ch
|
|
23
|
+
self.out_ch = out_ch
|
|
24
|
+
self.stride = stride
|
|
25
|
+
|
|
26
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
27
|
+
return self.block(x)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class DepthwiseSepBlock(nn.Module):
|
|
31
|
+
"""Depthwise Separable Conv — MobileNet style. k² vezes menos FLOPs."""
|
|
32
|
+
def __init__(self, in_ch: int, out_ch: int, stride: int = 1):
|
|
33
|
+
super().__init__()
|
|
34
|
+
self.block = nn.Sequential(
|
|
35
|
+
# Depthwise
|
|
36
|
+
nn.Conv2d(in_ch, in_ch, 3, stride=stride, padding=1, groups=in_ch, bias=False),
|
|
37
|
+
nn.BatchNorm2d(in_ch),
|
|
38
|
+
nn.ReLU(inplace=True),
|
|
39
|
+
# Pointwise
|
|
40
|
+
nn.Conv2d(in_ch, out_ch, 1, bias=False),
|
|
41
|
+
nn.BatchNorm2d(out_ch),
|
|
42
|
+
nn.ReLU(inplace=True),
|
|
43
|
+
)
|
|
44
|
+
self.in_ch = in_ch
|
|
45
|
+
self.out_ch = out_ch
|
|
46
|
+
self.stride = stride
|
|
47
|
+
|
|
48
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
49
|
+
return self.block(x)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class DynamicCNN(nn.Module):
|
|
53
|
+
"""
|
|
54
|
+
CNN com arquitetura mutável para CIFAR-10 (32×32×3).
|
|
55
|
+
Estrutura: [ConvBlocks] → GlobalAvgPool → [FC layers] → classifier
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
conv_configs: list[dict] | None = None,
|
|
61
|
+
fc_sizes: list[int] | None = None,
|
|
62
|
+
n_classes: int = 10,
|
|
63
|
+
in_channels: int = 3,
|
|
64
|
+
):
|
|
65
|
+
super().__init__()
|
|
66
|
+
self.n_classes = n_classes
|
|
67
|
+
self.in_channels = in_channels
|
|
68
|
+
|
|
69
|
+
# Config padrão: 3 blocos conv progressivos
|
|
70
|
+
if conv_configs is None:
|
|
71
|
+
conv_configs = [
|
|
72
|
+
{"type": "conv", "in_ch": 3, "out_ch": 32, "stride": 1},
|
|
73
|
+
{"type": "conv", "in_ch": 32, "out_ch": 64, "stride": 2},
|
|
74
|
+
{"type": "conv", "in_ch": 64, "out_ch": 64, "stride": 2},
|
|
75
|
+
]
|
|
76
|
+
if fc_sizes is None:
|
|
77
|
+
fc_sizes = [128]
|
|
78
|
+
|
|
79
|
+
self.conv_configs = list(conv_configs)
|
|
80
|
+
self.fc_sizes = list(fc_sizes)
|
|
81
|
+
self._build()
|
|
82
|
+
|
|
83
|
+
def _build(self) -> None:
|
|
84
|
+
# Blocos convolucionais
|
|
85
|
+
conv_layers = []
|
|
86
|
+
for cfg in self.conv_configs:
|
|
87
|
+
if cfg["type"] == "depthwise":
|
|
88
|
+
conv_layers.append(DepthwiseSepBlock(cfg["in_ch"], cfg["out_ch"], cfg.get("stride", 1)))
|
|
89
|
+
else:
|
|
90
|
+
conv_layers.append(ConvBlock(cfg["in_ch"], cfg["out_ch"], cfg.get("kernel", 3), cfg.get("stride", 1)))
|
|
91
|
+
self.conv_layers = nn.ModuleList(conv_layers)
|
|
92
|
+
self.pool = nn.AdaptiveAvgPool2d(1) # → (B, C, 1, 1)
|
|
93
|
+
|
|
94
|
+
# Camadas FC
|
|
95
|
+
last_ch = self.conv_configs[-1]["out_ch"] if self.conv_configs else self.in_channels
|
|
96
|
+
fc_layers = []
|
|
97
|
+
prev = last_ch
|
|
98
|
+
for h in self.fc_sizes:
|
|
99
|
+
fc_layers += [nn.Linear(prev, h), nn.ReLU(inplace=True)]
|
|
100
|
+
prev = h
|
|
101
|
+
self.fc = nn.Sequential(*fc_layers)
|
|
102
|
+
self.classifier = nn.Linear(prev, self.n_classes)
|
|
103
|
+
|
|
104
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
105
|
+
for layer in self.conv_layers:
|
|
106
|
+
x = layer(x)
|
|
107
|
+
x = self.pool(x)
|
|
108
|
+
x = x.view(x.size(0), -1)
|
|
109
|
+
x = self.fc(x)
|
|
110
|
+
return self.classifier(x)
|
|
111
|
+
|
|
112
|
+
def count_params(self) -> int:
|
|
113
|
+
return sum(p.numel() for p in self.parameters())
|
|
114
|
+
|
|
115
|
+
def count_flops(self) -> int:
|
|
116
|
+
"""Estimativa de FLOPs para input 32×32."""
|
|
117
|
+
flops = 0
|
|
118
|
+
h, w = 32, 32
|
|
119
|
+
for cfg in self.conv_configs:
|
|
120
|
+
k = cfg.get("kernel", 3)
|
|
121
|
+
s = cfg.get("stride", 1)
|
|
122
|
+
if cfg["type"] == "depthwise":
|
|
123
|
+
# Depthwise: k²×C_in×H×W + C_in×C_out×H×W (pointwise)
|
|
124
|
+
flops += k * k * cfg["in_ch"] * (h // s) * (w // s)
|
|
125
|
+
flops += cfg["in_ch"] * cfg["out_ch"] * (h // s) * (w // s)
|
|
126
|
+
else:
|
|
127
|
+
flops += k * k * cfg["in_ch"] * cfg["out_ch"] * (h // s) * (w // s)
|
|
128
|
+
h, w = h // s, w // s
|
|
129
|
+
last_ch = self.conv_configs[-1]["out_ch"] if self.conv_configs else self.in_channels
|
|
130
|
+
prev = last_ch
|
|
131
|
+
for sz in self.fc_sizes:
|
|
132
|
+
flops += 2 * prev * sz
|
|
133
|
+
prev = sz
|
|
134
|
+
flops += 2 * prev * self.n_classes
|
|
135
|
+
return flops
|
|
136
|
+
|
|
137
|
+
def is_valid(self) -> bool:
|
|
138
|
+
return len(self.conv_configs) >= 1 and all(
|
|
139
|
+
c["in_ch"] > 0 and c["out_ch"] > 0 for c in self.conv_configs
|
|
140
|
+
)
|
dnaty/core/individual.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Indivíduo dNaty: M_i = (θ_i, A_i, 𝓜_i)
|
|
3
|
+
"""
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
from copy import deepcopy
|
|
6
|
+
import torch
|
|
7
|
+
from dnaty.core.arch import DynamicMLP
|
|
8
|
+
from dnaty.core.memory import EpisodicMemory
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Individual:
|
|
12
|
+
def __init__(self, model: DynamicMLP, memory: EpisodicMemory | None = None):
|
|
13
|
+
self.model = model
|
|
14
|
+
self.memory = memory or EpisodicMemory()
|
|
15
|
+
self.last_op: str = "init"
|
|
16
|
+
self.fitness: tuple[float, float, float] = (0.0, 0.0, 0.0) # (acc, -cost, -sharp)
|
|
17
|
+
self.acc: float = 0.0
|
|
18
|
+
self.last_grad_norm: float = 0.0
|
|
19
|
+
self.last_delta_loss: float = 0.0
|
|
20
|
+
|
|
21
|
+
def clone(self) -> "Individual":
|
|
22
|
+
new_model = deepcopy(self.model)
|
|
23
|
+
new_mem = deepcopy(self.memory)
|
|
24
|
+
ind = Individual(new_model, new_mem)
|
|
25
|
+
ind.last_op = self.last_op
|
|
26
|
+
ind.fitness = self.fitness
|
|
27
|
+
ind.acc = self.acc
|
|
28
|
+
return ind
|
|
29
|
+
|
|
30
|
+
def count_params(self) -> int:
|
|
31
|
+
return self.model.count_params()
|
|
32
|
+
|
|
33
|
+
def count_flops(self) -> int:
|
|
34
|
+
return self.model.count_flops()
|
dnaty/core/memory.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""
|
|
2
|
+
EpisodicMemory — componente central do dNaty.
|
|
3
|
+
Implementa eq. 1.4: acumulação com decaimento temporal γ.
|
|
4
|
+
Otimizado: scores acumulados incrementalmente (O(1) por update vs O(n) antes).
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class Experience:
|
|
13
|
+
operator: str
|
|
14
|
+
delta_loss: float
|
|
15
|
+
gradient_norm: float
|
|
16
|
+
generation: int
|
|
17
|
+
weight: float = 1.0
|
|
18
|
+
timestamp: int = 0
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def impact(self) -> float:
|
|
22
|
+
"""𝟙[ΔL < 0] · |ΔL| · ‖∇L‖ — só experiências que melhoraram."""
|
|
23
|
+
if self.delta_loss >= 0:
|
|
24
|
+
return 0.0
|
|
25
|
+
return abs(self.delta_loss) * self.gradient_norm
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class EpisodicMemory:
|
|
29
|
+
"""
|
|
30
|
+
Memória episódica com decaimento temporal γ.
|
|
31
|
+
Scores acumulados incrementalmente — O(1) por update, O(|ops|) por query.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, max_size: int = 500, decay_gamma: float = 0.99):
|
|
35
|
+
self.experiences: list[Experience] = []
|
|
36
|
+
self.max_size = max_size
|
|
37
|
+
self.gamma = decay_gamma
|
|
38
|
+
self._step = 0
|
|
39
|
+
# Scores acumulados por operador — atualização incremental
|
|
40
|
+
self._scores: dict[str, float] = {}
|
|
41
|
+
|
|
42
|
+
def update(self, exp: Experience) -> None:
|
|
43
|
+
# Decaimento global dos scores acumulados — O(|ops|) não O(|mem|)
|
|
44
|
+
for op in self._scores:
|
|
45
|
+
self._scores[op] *= self.gamma
|
|
46
|
+
|
|
47
|
+
imp = exp.impact
|
|
48
|
+
if imp > 0:
|
|
49
|
+
self._scores[exp.operator] = self._scores.get(exp.operator, 0.0) + imp
|
|
50
|
+
|
|
51
|
+
exp.timestamp = self._step
|
|
52
|
+
self._step += 1
|
|
53
|
+
self.experiences.append(exp)
|
|
54
|
+
|
|
55
|
+
if len(self.experiences) > self.max_size:
|
|
56
|
+
self._prune()
|
|
57
|
+
|
|
58
|
+
def _prune(self) -> None:
|
|
59
|
+
# Remove as experiências mais antigas/irrelevantes
|
|
60
|
+
# Recalcula scores do zero após prune
|
|
61
|
+
self.experiences.sort(
|
|
62
|
+
key=lambda e: e.impact * (self.gamma ** max(0, self._step - e.timestamp)),
|
|
63
|
+
reverse=True,
|
|
64
|
+
)
|
|
65
|
+
self.experiences = self.experiences[:self.max_size]
|
|
66
|
+
# Recalcular scores
|
|
67
|
+
self._scores = {}
|
|
68
|
+
for e in self.experiences:
|
|
69
|
+
if e.impact > 0:
|
|
70
|
+
decay = self.gamma ** max(0, self._step - e.timestamp)
|
|
71
|
+
self._scores[e.operator] = self._scores.get(e.operator, 0.0) + e.impact * decay
|
|
72
|
+
|
|
73
|
+
def query_mutation_probs(self, operators: list[str], tau: float = 1.0) -> dict[str, float]:
|
|
74
|
+
"""Softmax sobre scores acumulados — O(|ops|)."""
|
|
75
|
+
vals = np.array(
|
|
76
|
+
[self._scores.get(op, 0.0) for op in operators], dtype=np.float64
|
|
77
|
+
) / max(tau, 1e-8)
|
|
78
|
+
vals -= vals.max()
|
|
79
|
+
exp_vals = np.exp(vals)
|
|
80
|
+
probs = exp_vals / exp_vals.sum()
|
|
81
|
+
return {op: float(p) for op, p in zip(operators, probs)}
|
|
82
|
+
|
|
83
|
+
def operator_counts(self, operators: list[str]) -> dict[str, int]:
|
|
84
|
+
counts: dict[str, int] = {op: 0 for op in operators}
|
|
85
|
+
for e in self.experiences:
|
|
86
|
+
if e.operator in counts and e.impact > 0:
|
|
87
|
+
counts[e.operator] += 1
|
|
88
|
+
return counts
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|