scratchkit 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlscratch/__init__.py +56 -0
- mlscratch/__main__.py +118 -0
- mlscratch/bayesian/__init__.py +53 -0
- mlscratch/bayesian/bayesian_linear_regression.py +171 -0
- mlscratch/bayesian/bayesian_network.py +248 -0
- mlscratch/bayesian/bayesian_nn.py +315 -0
- mlscratch/bayesian/gaussian_process.py +207 -0
- mlscratch/bayesian/hmm.py +277 -0
- mlscratch/bayesian/init.py +52 -0
- mlscratch/bayesian/kalman_filter.py +182 -0
- mlscratch/bayesian/naive_bayes.py +209 -0
- mlscratch/metrics/__init__.py +59 -0
- mlscratch/metrics/classification.py +365 -0
- mlscratch/metrics/regression.py +79 -0
- mlscratch/neural/__init__.py +121 -0
- mlscratch/neural/attention.py +420 -0
- mlscratch/neural/autoencoder.py +543 -0
- mlscratch/neural/boltzmann.py +231 -0
- mlscratch/neural/cnn.py +593 -0
- mlscratch/neural/cvnn.py +322 -0
- mlscratch/neural/gan.py +364 -0
- mlscratch/neural/hopfield.py +193 -0
- mlscratch/neural/perceptron.py +398 -0
- mlscratch/neural/rbf_network.py +230 -0
- mlscratch/neural/recurrent.py +569 -0
- mlscratch/preprocessing/__init__.py +38 -0
- mlscratch/preprocessing/encoders.py +140 -0
- mlscratch/preprocessing/model_selection.py +119 -0
- mlscratch/preprocessing/polynomial.py +105 -0
- mlscratch/preprocessing/scalers.py +220 -0
- mlscratch/py.typed +0 -0
- mlscratch/reinforcement/__init__.py +59 -0
- mlscratch/reinforcement/ddpg.py +363 -0
- mlscratch/reinforcement/dqn.py +319 -0
- mlscratch/reinforcement/ppo.py +452 -0
- mlscratch/reinforcement/q_learning.py +352 -0
- mlscratch/reinforcement/sac.py +382 -0
- mlscratch/reinforcement/utils.py +594 -0
- mlscratch/supervised/__init__.py +76 -0
- mlscratch/supervised/_validation.py +50 -0
- mlscratch/supervised/adaboost.py +255 -0
- mlscratch/supervised/decision_tree.py +495 -0
- mlscratch/supervised/gradient_boosting.py +354 -0
- mlscratch/supervised/knn.py +234 -0
- mlscratch/supervised/lasso_regression.py +125 -0
- mlscratch/supervised/linear_models.py +459 -0
- mlscratch/supervised/linear_regression.py +197 -0
- mlscratch/supervised/logistic_regression.py +119 -0
- mlscratch/supervised/naive_bayes.py +113 -0
- mlscratch/supervised/random_forest.py +321 -0
- mlscratch/supervised/ridge_regression.py +93 -0
- mlscratch/supervised/svm.py +356 -0
- mlscratch/unsupervised/__init__.py +39 -0
- mlscratch/unsupervised/apriori.py +178 -0
- mlscratch/unsupervised/dbscan.py +141 -0
- mlscratch/unsupervised/gmm.py +204 -0
- mlscratch/unsupervised/hierarchical_clustering.py +137 -0
- mlscratch/unsupervised/ica.py +167 -0
- mlscratch/unsupervised/kmeans.py +135 -0
- mlscratch/unsupervised/kmedoids.py +133 -0
- mlscratch/unsupervised/pca.py +103 -0
- mlscratch/unsupervised/tsne.py +200 -0
- scratchkit-0.2.0.dist-info/METADATA +241 -0
- scratchkit-0.2.0.dist-info/RECORD +68 -0
- scratchkit-0.2.0.dist-info/WHEEL +5 -0
- scratchkit-0.2.0.dist-info/entry_points.txt +2 -0
- scratchkit-0.2.0.dist-info/licenses/LICENSE +201 -0
- scratchkit-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,543 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Autoencoder Variants
|
|
3
|
+
=====================
|
|
4
|
+
Unsupervised representation-learning networks that compress input data
|
|
5
|
+
through a bottleneck and reconstruct it:
|
|
6
|
+
|
|
7
|
+
Encoder: X → h = f(WₑX + bₑ) (latent code)
|
|
8
|
+
Decoder: h → X̂ = g(WdX + bd) (reconstruction)
|
|
9
|
+
Loss: MSE(X, X̂)
|
|
10
|
+
|
|
11
|
+
Three variants are implemented
|
|
12
|
+
--------------------------------
|
|
13
|
+
Autoencoder
|
|
14
|
+
Vanilla tied-weight autoencoder. Encoder and decoder share transposed
|
|
15
|
+
weights for parameter efficiency and implicit regularisation.
|
|
16
|
+
|
|
17
|
+
DenoisingAutoencoder
|
|
18
|
+
Corrupts inputs with additive Gaussian or Bernoulli dropout noise
|
|
19
|
+
before encoding, forcing the network to learn robust features.
|
|
20
|
+
Otherwise identical API to ``Autoencoder``.
|
|
21
|
+
|
|
22
|
+
VariationalAutoencoder
|
|
23
|
+
Learns a *distribution* over the latent space rather than a point
|
|
24
|
+
estimate. The encoder outputs (μ, log σ²) and samples via the
|
|
25
|
+
reparameterisation trick:
|
|
26
|
+
z = μ + σ ε, ε ~ N(0,I)
|
|
27
|
+
Loss: Reconstruction (BCE or MSE) + KL[N(μ,σ²) ‖ N(0,I)]
|
|
28
|
+
KL = −½ Σ(1 + log σ² − μ² − σ²)
|
|
29
|
+
|
|
30
|
+
References
|
|
31
|
+
----------
|
|
32
|
+
Hinton & Salakhutdinov (2006). Reducing the dimensionality of data with
|
|
33
|
+
neural networks. Science, 313(5786), 504-507.
|
|
34
|
+
|
|
35
|
+
Vincent et al. (2008). Extracting and composing robust features with
|
|
36
|
+
denoising autoencoders. ICML.
|
|
37
|
+
|
|
38
|
+
Kingma & Welling (2013). Auto-encoding variational Bayes. ICLR 2014.
|
|
39
|
+
|
|
40
|
+
Only numpy is used.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
from __future__ import annotations
|
|
44
|
+
|
|
45
|
+
import numpy as np
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# ============================================================
|
|
49
|
+
# Activations
|
|
50
|
+
# ============================================================
|
|
51
|
+
|
|
52
|
+
def _sigmoid(x: np.ndarray) -> np.ndarray:
|
|
53
|
+
return 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _relu(x: np.ndarray) -> np.ndarray:
|
|
57
|
+
return np.maximum(0.0, x)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _relu_grad(x: np.ndarray) -> np.ndarray:
|
|
61
|
+
return (x > 0).astype(float)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# ============================================================
|
|
65
|
+
# Vanilla Autoencoder
|
|
66
|
+
# ============================================================
|
|
67
|
+
|
|
68
|
+
class Autoencoder:
|
|
69
|
+
"""
|
|
70
|
+
Vanilla Autoencoder with tied encoder / decoder weights.
|
|
71
|
+
|
|
72
|
+
Architecture: X → [Linear → ReLU] × n_hidden_layers → code
|
|
73
|
+
code → [Linear → ReLU] × n_hidden_layers → X̂
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
input_size : int
|
|
78
|
+
hidden_sizes : list[int]
|
|
79
|
+
Sizes of hidden layers in the encoder. The decoder mirrors them
|
|
80
|
+
in reverse. The last element is the code (bottleneck) dimension.
|
|
81
|
+
learning_rate : float
|
|
82
|
+
epochs : int
|
|
83
|
+
batch_size : int or None
|
|
84
|
+
random_state : int or None
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
def __init__(
|
|
88
|
+
self,
|
|
89
|
+
input_size: int,
|
|
90
|
+
hidden_sizes: list[int] | None = None,
|
|
91
|
+
learning_rate: float = 1e-3,
|
|
92
|
+
epochs: int = 100,
|
|
93
|
+
batch_size: int | None = 64,
|
|
94
|
+
random_state: int | None = None,
|
|
95
|
+
) -> None:
|
|
96
|
+
self.input_size = input_size
|
|
97
|
+
self.hidden_sizes = hidden_sizes or [64, 32]
|
|
98
|
+
self.learning_rate = learning_rate
|
|
99
|
+
self.epochs = epochs
|
|
100
|
+
self.batch_size = batch_size
|
|
101
|
+
self._rng = np.random.default_rng(random_state)
|
|
102
|
+
|
|
103
|
+
# Built during fit
|
|
104
|
+
self._enc_W: list[np.ndarray] = []
|
|
105
|
+
self._enc_b: list[np.ndarray] = []
|
|
106
|
+
self._dec_b: list[np.ndarray] = []
|
|
107
|
+
self.losses_: list[float] = []
|
|
108
|
+
|
|
109
|
+
# ------------------------------------------------------------------
|
|
110
|
+
# Build
|
|
111
|
+
# ------------------------------------------------------------------
|
|
112
|
+
|
|
113
|
+
def _build(self) -> None:
|
|
114
|
+
sizes = [self.input_size] + list(self.hidden_sizes)
|
|
115
|
+
self._enc_W = []
|
|
116
|
+
self._enc_b = []
|
|
117
|
+
self._dec_b = []
|
|
118
|
+
for i in range(len(sizes) - 1):
|
|
119
|
+
fan_in = sizes[i]
|
|
120
|
+
fan_out = sizes[i + 1]
|
|
121
|
+
scale = np.sqrt(2.0 / fan_in)
|
|
122
|
+
self._enc_W.append(self._rng.normal(0, scale, (fan_in, fan_out)))
|
|
123
|
+
self._enc_b.append(np.zeros(fan_out))
|
|
124
|
+
self._dec_b.append(np.zeros(fan_in))
|
|
125
|
+
|
|
126
|
+
# ------------------------------------------------------------------
|
|
127
|
+
# Forward
|
|
128
|
+
# ------------------------------------------------------------------
|
|
129
|
+
|
|
130
|
+
def _encode(self, X: np.ndarray) -> tuple[list, list]:
|
|
131
|
+
"""Return (pre_acts, activations) for encoder."""
|
|
132
|
+
pre_acts, acts = [], [X]
|
|
133
|
+
a = X
|
|
134
|
+
for W, b in zip(self._enc_W, self._enc_b):
|
|
135
|
+
z = a @ W + b
|
|
136
|
+
pre_acts.append(z)
|
|
137
|
+
a = _relu(z)
|
|
138
|
+
acts.append(a)
|
|
139
|
+
return pre_acts, acts
|
|
140
|
+
|
|
141
|
+
def _decode(self, code: np.ndarray) -> tuple[list, list]:
|
|
142
|
+
"""Return (pre_acts, activations) for decoder (tied weights)."""
|
|
143
|
+
pre_acts, acts = [], [code]
|
|
144
|
+
a = code
|
|
145
|
+
for W, b in zip(reversed(self._enc_W), reversed(self._dec_b)):
|
|
146
|
+
z = a @ W.T + b
|
|
147
|
+
pre_acts.append(z)
|
|
148
|
+
a = _relu(z)
|
|
149
|
+
acts.append(a)
|
|
150
|
+
return pre_acts, acts
|
|
151
|
+
|
|
152
|
+
# ------------------------------------------------------------------
|
|
153
|
+
# Public API
|
|
154
|
+
# ------------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
def fit(self, X: np.ndarray) -> "Autoencoder":
|
|
157
|
+
"""
|
|
158
|
+
Train autoencoder to reconstruct X.
|
|
159
|
+
|
|
160
|
+
Parameters
|
|
161
|
+
----------
|
|
162
|
+
X : ndarray of shape (n_samples, input_size)
|
|
163
|
+
|
|
164
|
+
Returns
|
|
165
|
+
-------
|
|
166
|
+
self
|
|
167
|
+
"""
|
|
168
|
+
self._build()
|
|
169
|
+
n_samples = len(X)
|
|
170
|
+
bs = self.batch_size or n_samples
|
|
171
|
+
self.losses_ = []
|
|
172
|
+
|
|
173
|
+
for _ in range(self.epochs):
|
|
174
|
+
idx = self._rng.permutation(n_samples)
|
|
175
|
+
epoch_loss = 0.0
|
|
176
|
+
|
|
177
|
+
for start in range(0, n_samples, bs):
|
|
178
|
+
mb = idx[start:start + bs]
|
|
179
|
+
Xb = X[mb]
|
|
180
|
+
|
|
181
|
+
# Forward
|
|
182
|
+
enc_pre, enc_acts = self._encode(Xb)
|
|
183
|
+
code = enc_acts[-1]
|
|
184
|
+
dec_pre, dec_acts = self._decode(code)
|
|
185
|
+
X_hat = dec_acts[-1]
|
|
186
|
+
|
|
187
|
+
# Loss
|
|
188
|
+
loss = float(np.mean((X_hat - Xb) ** 2))
|
|
189
|
+
epoch_loss += loss
|
|
190
|
+
|
|
191
|
+
# Backward — decoder first
|
|
192
|
+
delta = 2.0 * (X_hat - Xb) / len(mb)
|
|
193
|
+
for i, (dec_z, dec_a_prev) in enumerate(
|
|
194
|
+
zip(reversed(dec_pre), reversed(dec_acts[:-1]))
|
|
195
|
+
):
|
|
196
|
+
delta = delta * _relu_grad(dec_z)
|
|
197
|
+
layer_idx = i
|
|
198
|
+
W = self._enc_W[layer_idx]
|
|
199
|
+
dW = delta.T @ dec_a_prev # == W.shape
|
|
200
|
+
db_dec = delta.mean(axis=0)
|
|
201
|
+
self._enc_W[layer_idx] -= self.learning_rate * dW
|
|
202
|
+
self._dec_b[layer_idx] -= self.learning_rate * db_dec
|
|
203
|
+
delta = delta @ W # propagate back
|
|
204
|
+
|
|
205
|
+
# Continue through encoder
|
|
206
|
+
for i in reversed(range(len(self._enc_W))):
|
|
207
|
+
delta = delta * _relu_grad(enc_pre[i])
|
|
208
|
+
db_enc = delta.mean(axis=0)
|
|
209
|
+
self._enc_b[i] -= self.learning_rate * db_enc
|
|
210
|
+
if i > 0:
|
|
211
|
+
delta = delta @ self._enc_W[i].T
|
|
212
|
+
|
|
213
|
+
self.losses_.append(epoch_loss / max(1, n_samples // bs))
|
|
214
|
+
|
|
215
|
+
return self
|
|
216
|
+
|
|
217
|
+
def encode(self, X: np.ndarray) -> np.ndarray:
|
|
218
|
+
"""Compress X to latent codes."""
|
|
219
|
+
_, acts = self._encode(X)
|
|
220
|
+
return acts[-1]
|
|
221
|
+
|
|
222
|
+
def decode(self, code: np.ndarray) -> np.ndarray:
|
|
223
|
+
"""Reconstruct from latent codes."""
|
|
224
|
+
_, acts = self._decode(code)
|
|
225
|
+
return acts[-1]
|
|
226
|
+
|
|
227
|
+
def reconstruct(self, X: np.ndarray) -> np.ndarray:
|
|
228
|
+
"""Encode then decode X."""
|
|
229
|
+
return self.decode(self.encode(X))
|
|
230
|
+
|
|
231
|
+
def reconstruction_error(self, X: np.ndarray) -> np.ndarray:
|
|
232
|
+
"""
|
|
233
|
+
Per-sample MSE reconstruction error. Useful for anomaly detection:
|
|
234
|
+
high error ≈ anomalous sample.
|
|
235
|
+
|
|
236
|
+
Returns
|
|
237
|
+
-------
|
|
238
|
+
ndarray of shape (n_samples,)
|
|
239
|
+
"""
|
|
240
|
+
X_hat = self.reconstruct(X)
|
|
241
|
+
return np.mean((X - X_hat) ** 2, axis=1)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
# ============================================================
|
|
245
|
+
# Denoising Autoencoder
|
|
246
|
+
# ============================================================
|
|
247
|
+
|
|
248
|
+
class DenoisingAutoencoder(Autoencoder):
|
|
249
|
+
"""
|
|
250
|
+
Denoising Autoencoder — adds noise to inputs before encoding.
|
|
251
|
+
|
|
252
|
+
Parameters
|
|
253
|
+
----------
|
|
254
|
+
noise_type : str
|
|
255
|
+
``'gaussian'`` — adds N(0, noise_level²) noise.
|
|
256
|
+
``'dropout'`` — randomly zeros out inputs with probability noise_level.
|
|
257
|
+
noise_level : float
|
|
258
|
+
Std dev for Gaussian noise, or drop probability for dropout noise.
|
|
259
|
+
All other parameters : see ``Autoencoder``.
|
|
260
|
+
"""
|
|
261
|
+
|
|
262
|
+
def __init__(
|
|
263
|
+
self,
|
|
264
|
+
input_size: int,
|
|
265
|
+
hidden_sizes: list[int] | None = None,
|
|
266
|
+
noise_type: str = "gaussian",
|
|
267
|
+
noise_level: float = 0.1,
|
|
268
|
+
learning_rate: float = 1e-3,
|
|
269
|
+
epochs: int = 100,
|
|
270
|
+
batch_size: int | None = 64,
|
|
271
|
+
random_state: int | None = None,
|
|
272
|
+
) -> None:
|
|
273
|
+
super().__init__(input_size, hidden_sizes, learning_rate,
|
|
274
|
+
epochs, batch_size, random_state)
|
|
275
|
+
if noise_type not in {"gaussian", "dropout"}:
|
|
276
|
+
raise ValueError("noise_type must be 'gaussian' or 'dropout'.")
|
|
277
|
+
self.noise_type = noise_type
|
|
278
|
+
self.noise_level = noise_level
|
|
279
|
+
|
|
280
|
+
def _corrupt(self, X: np.ndarray) -> np.ndarray:
|
|
281
|
+
if self.noise_type == "gaussian":
|
|
282
|
+
return X + self._rng.normal(0, self.noise_level, X.shape)
|
|
283
|
+
# dropout
|
|
284
|
+
mask = self._rng.random(X.shape) > self.noise_level
|
|
285
|
+
return X * mask
|
|
286
|
+
|
|
287
|
+
def fit(self, X: np.ndarray) -> "DenoisingAutoencoder":
|
|
288
|
+
"""Train on corrupted inputs, reconstruct clean targets."""
|
|
289
|
+
self._build()
|
|
290
|
+
n_samples = len(X)
|
|
291
|
+
bs = self.batch_size or n_samples
|
|
292
|
+
self.losses_ = []
|
|
293
|
+
|
|
294
|
+
for _ in range(self.epochs):
|
|
295
|
+
idx = self._rng.permutation(n_samples)
|
|
296
|
+
epoch_loss = 0.0
|
|
297
|
+
|
|
298
|
+
for start in range(0, n_samples, bs):
|
|
299
|
+
mb = idx[start:start + bs]
|
|
300
|
+
Xb_clean = X[mb]
|
|
301
|
+
Xb_noisy = self._corrupt(Xb_clean)
|
|
302
|
+
|
|
303
|
+
enc_pre, enc_acts = self._encode(Xb_noisy)
|
|
304
|
+
code = enc_acts[-1]
|
|
305
|
+
dec_pre, dec_acts = self._decode(code)
|
|
306
|
+
X_hat = dec_acts[-1]
|
|
307
|
+
|
|
308
|
+
loss = float(np.mean((X_hat - Xb_clean) ** 2))
|
|
309
|
+
epoch_loss += loss
|
|
310
|
+
|
|
311
|
+
# Identical backward to Autoencoder but against clean target
|
|
312
|
+
delta = 2.0 * (X_hat - Xb_clean) / len(mb)
|
|
313
|
+
for i, (dec_z, dec_a_prev) in enumerate(
|
|
314
|
+
zip(reversed(dec_pre), reversed(dec_acts[:-1]))
|
|
315
|
+
):
|
|
316
|
+
delta = delta * _relu_grad(dec_z)
|
|
317
|
+
layer_idx = i
|
|
318
|
+
W = self._enc_W[layer_idx]
|
|
319
|
+
dW = delta.T @ dec_a_prev # == W.shape
|
|
320
|
+
db_dec = delta.mean(axis=0)
|
|
321
|
+
self._enc_W[layer_idx] -= self.learning_rate * dW
|
|
322
|
+
self._dec_b[layer_idx] -= self.learning_rate * db_dec
|
|
323
|
+
delta = delta @ W
|
|
324
|
+
|
|
325
|
+
for i in reversed(range(len(self._enc_W))):
|
|
326
|
+
delta = delta * _relu_grad(enc_pre[i])
|
|
327
|
+
db_enc = delta.mean(axis=0)
|
|
328
|
+
self._enc_b[i] -= self.learning_rate * db_enc
|
|
329
|
+
if i > 0:
|
|
330
|
+
delta = delta @ self._enc_W[i].T
|
|
331
|
+
|
|
332
|
+
self.losses_.append(epoch_loss / max(1, n_samples // bs))
|
|
333
|
+
|
|
334
|
+
return self
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
# ============================================================
|
|
338
|
+
# Variational Autoencoder
|
|
339
|
+
# ============================================================
|
|
340
|
+
|
|
341
|
+
class VariationalAutoencoder:
|
|
342
|
+
"""
|
|
343
|
+
Variational Autoencoder (VAE).
|
|
344
|
+
|
|
345
|
+
Encoder outputs μ and log σ² for a Gaussian latent distribution.
|
|
346
|
+
Samples via reparameterisation: z = μ + σ ε, ε ~ N(0,I).
|
|
347
|
+
Decoder reconstructs X̂ from z.
|
|
348
|
+
Loss = Reconstruction (MSE) + β * KL[N(μ,σ²) ‖ N(0,I)]
|
|
349
|
+
|
|
350
|
+
Parameters
|
|
351
|
+
----------
|
|
352
|
+
input_size : int
|
|
353
|
+
hidden_size : int
|
|
354
|
+
Size of the single hidden layer in both encoder and decoder.
|
|
355
|
+
latent_dim : int
|
|
356
|
+
Dimensionality of the latent space.
|
|
357
|
+
beta : float
|
|
358
|
+
KL weight (β-VAE: β > 1 encourages disentanglement).
|
|
359
|
+
learning_rate : float
|
|
360
|
+
epochs : int
|
|
361
|
+
batch_size : int or None
|
|
362
|
+
random_state : int or None
|
|
363
|
+
"""
|
|
364
|
+
|
|
365
|
+
def __init__(
|
|
366
|
+
self,
|
|
367
|
+
input_size: int,
|
|
368
|
+
hidden_size: int = 64,
|
|
369
|
+
latent_dim: int = 8,
|
|
370
|
+
beta: float = 1.0,
|
|
371
|
+
learning_rate: float = 1e-3,
|
|
372
|
+
epochs: int = 100,
|
|
373
|
+
batch_size: int | None = 64,
|
|
374
|
+
random_state: int | None = None,
|
|
375
|
+
) -> None:
|
|
376
|
+
self.input_size = input_size
|
|
377
|
+
self.hidden_size = hidden_size
|
|
378
|
+
self.latent_dim = latent_dim
|
|
379
|
+
self.beta = beta
|
|
380
|
+
self.learning_rate = learning_rate
|
|
381
|
+
self.epochs = epochs
|
|
382
|
+
self.batch_size = batch_size
|
|
383
|
+
self._rng = np.random.default_rng(random_state)
|
|
384
|
+
|
|
385
|
+
self._init_params()
|
|
386
|
+
self.losses_: list[float] = []
|
|
387
|
+
|
|
388
|
+
# ------------------------------------------------------------------
|
|
389
|
+
# Parameter initialisation
|
|
390
|
+
# ------------------------------------------------------------------
|
|
391
|
+
|
|
392
|
+
def _init_params(self) -> None:
|
|
393
|
+
D, H, L = self.input_size, self.hidden_size, self.latent_dim
|
|
394
|
+
s = lambda fi, fo: np.sqrt(2.0 / fi)
|
|
395
|
+
|
|
396
|
+
# Encoder: input → hidden
|
|
397
|
+
self._We1 = self._rng.normal(0, s(D, H), (D, H))
|
|
398
|
+
self._be1 = np.zeros(H)
|
|
399
|
+
# Encoder: hidden → μ
|
|
400
|
+
self._Wmu = self._rng.normal(0, s(H, L), (H, L))
|
|
401
|
+
self._bmu = np.zeros(L)
|
|
402
|
+
# Encoder: hidden → log σ²
|
|
403
|
+
self._Wlv = self._rng.normal(0, s(H, L), (H, L))
|
|
404
|
+
self._blv = np.zeros(L)
|
|
405
|
+
|
|
406
|
+
# Decoder: z → hidden
|
|
407
|
+
self._Wd1 = self._rng.normal(0, s(L, H), (L, H))
|
|
408
|
+
self._bd1 = np.zeros(H)
|
|
409
|
+
# Decoder: hidden → X̂
|
|
410
|
+
self._Wd2 = self._rng.normal(0, s(H, D), (H, D))
|
|
411
|
+
self._bd2 = np.zeros(D)
|
|
412
|
+
|
|
413
|
+
# ------------------------------------------------------------------
|
|
414
|
+
# Encoder / Decoder
|
|
415
|
+
# ------------------------------------------------------------------
|
|
416
|
+
|
|
417
|
+
def _encode(self, X: np.ndarray) -> tuple:
|
|
418
|
+
h_enc = _relu(X @ self._We1 + self._be1)
|
|
419
|
+
mu = h_enc @ self._Wmu + self._bmu
|
|
420
|
+
log_var = np.clip(h_enc @ self._Wlv + self._blv, -10, 10)
|
|
421
|
+
return h_enc, mu, log_var
|
|
422
|
+
|
|
423
|
+
def _reparameterise(self, mu: np.ndarray, log_var: np.ndarray) -> np.ndarray:
|
|
424
|
+
eps = self._rng.standard_normal(mu.shape)
|
|
425
|
+
return mu + np.exp(0.5 * log_var) * eps
|
|
426
|
+
|
|
427
|
+
def _decode(self, z: np.ndarray) -> tuple:
|
|
428
|
+
h_dec = _relu(z @ self._Wd1 + self._bd1)
|
|
429
|
+
X_hat = h_dec @ self._Wd2 + self._bd2
|
|
430
|
+
return h_dec, X_hat
|
|
431
|
+
|
|
432
|
+
# ------------------------------------------------------------------
|
|
433
|
+
# Public API
|
|
434
|
+
# ------------------------------------------------------------------
|
|
435
|
+
|
|
436
|
+
def fit(self, X: np.ndarray) -> "VariationalAutoencoder":
|
|
437
|
+
"""
|
|
438
|
+
Train VAE on X.
|
|
439
|
+
|
|
440
|
+
Parameters
|
|
441
|
+
----------
|
|
442
|
+
X : ndarray of shape (n_samples, input_size)
|
|
443
|
+
"""
|
|
444
|
+
n_samples = len(X)
|
|
445
|
+
bs = self.batch_size or n_samples
|
|
446
|
+
lr = self.learning_rate
|
|
447
|
+
self.losses_ = []
|
|
448
|
+
|
|
449
|
+
for _ in range(self.epochs):
|
|
450
|
+
idx = self._rng.permutation(n_samples)
|
|
451
|
+
epoch_loss = 0.0
|
|
452
|
+
|
|
453
|
+
for start in range(0, n_samples, bs):
|
|
454
|
+
mb = idx[start:start + bs]
|
|
455
|
+
Xb = X[mb]
|
|
456
|
+
n = len(mb)
|
|
457
|
+
|
|
458
|
+
# ── Forward ─────────────────────────────────────────
|
|
459
|
+
h_enc, mu, log_var = self._encode(Xb)
|
|
460
|
+
z = self._reparameterise(mu, log_var)
|
|
461
|
+
h_dec, X_hat = self._decode(z)
|
|
462
|
+
|
|
463
|
+
# ── Loss ─────────────────────────────────────────────
|
|
464
|
+
rec_loss = float(np.mean((X_hat - Xb) ** 2))
|
|
465
|
+
kl_loss = float(-0.5 * np.mean(
|
|
466
|
+
1 + log_var - mu ** 2 - np.exp(log_var)
|
|
467
|
+
))
|
|
468
|
+
loss = rec_loss + self.beta * kl_loss
|
|
469
|
+
epoch_loss += loss
|
|
470
|
+
|
|
471
|
+
# ── Backward decoder ─────────────────────────────────
|
|
472
|
+
d_Xhat = 2.0 * (X_hat - Xb) / n
|
|
473
|
+
d_Wd2 = h_dec.T @ d_Xhat
|
|
474
|
+
d_bd2 = d_Xhat.mean(axis=0)
|
|
475
|
+
d_hdec = d_Xhat @ self._Wd2.T * _relu_grad(z @ self._Wd1 + self._bd1)
|
|
476
|
+
d_Wd1 = z.T @ d_hdec
|
|
477
|
+
d_bd1 = d_hdec.mean(axis=0)
|
|
478
|
+
d_z = d_hdec @ self._Wd1.T # gradient w.r.t. z
|
|
479
|
+
|
|
480
|
+
# ── Backward through reparameterise ──────────────────
|
|
481
|
+
sigma = np.exp(0.5 * log_var)
|
|
482
|
+
d_mu = d_z + self.beta * mu / n
|
|
483
|
+
d_lv = (d_z * sigma * 0.5
|
|
484
|
+
+ self.beta * 0.5 * (np.exp(log_var) - 1) / n)
|
|
485
|
+
|
|
486
|
+
# ── Backward encoder ─────────────────────────────────
|
|
487
|
+
d_Wmu = h_enc.T @ d_mu
|
|
488
|
+
d_bmu = d_mu.mean(axis=0)
|
|
489
|
+
d_Wlv = h_enc.T @ d_lv
|
|
490
|
+
d_blv = d_lv.mean(axis=0)
|
|
491
|
+
d_henc = (d_mu @ self._Wmu.T + d_lv @ self._Wlv.T) * \
|
|
492
|
+
_relu_grad(Xb @ self._We1 + self._be1)
|
|
493
|
+
d_We1 = Xb.T @ d_henc
|
|
494
|
+
d_be1 = d_henc.mean(axis=0)
|
|
495
|
+
|
|
496
|
+
# ── Gradient clipping (prevents overflow) ─────────────
|
|
497
|
+
_clip = 5.0
|
|
498
|
+
for _g in [d_We1, d_be1, d_Wmu, d_bmu, d_Wlv, d_blv,
|
|
499
|
+
d_Wd1, d_bd1, d_Wd2, d_bd2]:
|
|
500
|
+
np.clip(_g, -_clip, _clip, out=_g)
|
|
501
|
+
|
|
502
|
+
# ── Gradient descent ─────────────────────────────────
|
|
503
|
+
self._We1 -= lr * d_We1
|
|
504
|
+
self._be1 -= lr * d_be1
|
|
505
|
+
self._Wmu -= lr * d_Wmu
|
|
506
|
+
self._bmu -= lr * d_bmu
|
|
507
|
+
self._Wlv -= lr * d_Wlv
|
|
508
|
+
self._blv -= lr * d_blv
|
|
509
|
+
self._Wd1 -= lr * d_Wd1
|
|
510
|
+
self._bd1 -= lr * d_bd1
|
|
511
|
+
self._Wd2 -= lr * d_Wd2
|
|
512
|
+
self._bd2 -= lr * d_bd2
|
|
513
|
+
|
|
514
|
+
self.losses_.append(epoch_loss / max(1, n_samples // bs))
|
|
515
|
+
|
|
516
|
+
return self
|
|
517
|
+
|
|
518
|
+
def encode(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
|
519
|
+
"""Return (μ, log σ²) for each sample."""
|
|
520
|
+
_, mu, log_var = self._encode(X)
|
|
521
|
+
return mu, log_var
|
|
522
|
+
|
|
523
|
+
def decode(self, z: np.ndarray) -> np.ndarray:
|
|
524
|
+
"""Decode latent codes to reconstruction."""
|
|
525
|
+
_, X_hat = self._decode(z)
|
|
526
|
+
return X_hat
|
|
527
|
+
|
|
528
|
+
def reconstruct(self, X: np.ndarray) -> np.ndarray:
|
|
529
|
+
"""Encode (use μ, no sampling) and decode."""
|
|
530
|
+
_, mu, _ = self._encode(X)
|
|
531
|
+
_, X_hat = self._decode(mu)
|
|
532
|
+
return X_hat
|
|
533
|
+
|
|
534
|
+
def sample(self, n_samples: int) -> np.ndarray:
|
|
535
|
+
"""
|
|
536
|
+
Generate new samples by sampling z ~ N(0,I) and decoding.
|
|
537
|
+
|
|
538
|
+
Returns
|
|
539
|
+
-------
|
|
540
|
+
ndarray of shape (n_samples, input_size)
|
|
541
|
+
"""
|
|
542
|
+
z = self._rng.standard_normal((n_samples, self.latent_dim))
|
|
543
|
+
return self.decode(z)
|