scratchkit 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlscratch/__init__.py +56 -0
- mlscratch/__main__.py +118 -0
- mlscratch/bayesian/__init__.py +53 -0
- mlscratch/bayesian/bayesian_linear_regression.py +171 -0
- mlscratch/bayesian/bayesian_network.py +248 -0
- mlscratch/bayesian/bayesian_nn.py +315 -0
- mlscratch/bayesian/gaussian_process.py +207 -0
- mlscratch/bayesian/hmm.py +277 -0
- mlscratch/bayesian/init.py +52 -0
- mlscratch/bayesian/kalman_filter.py +182 -0
- mlscratch/bayesian/naive_bayes.py +209 -0
- mlscratch/metrics/__init__.py +59 -0
- mlscratch/metrics/classification.py +365 -0
- mlscratch/metrics/regression.py +79 -0
- mlscratch/neural/__init__.py +121 -0
- mlscratch/neural/attention.py +420 -0
- mlscratch/neural/autoencoder.py +543 -0
- mlscratch/neural/boltzmann.py +231 -0
- mlscratch/neural/cnn.py +593 -0
- mlscratch/neural/cvnn.py +322 -0
- mlscratch/neural/gan.py +364 -0
- mlscratch/neural/hopfield.py +193 -0
- mlscratch/neural/perceptron.py +398 -0
- mlscratch/neural/rbf_network.py +230 -0
- mlscratch/neural/recurrent.py +569 -0
- mlscratch/preprocessing/__init__.py +38 -0
- mlscratch/preprocessing/encoders.py +140 -0
- mlscratch/preprocessing/model_selection.py +119 -0
- mlscratch/preprocessing/polynomial.py +105 -0
- mlscratch/preprocessing/scalers.py +220 -0
- mlscratch/py.typed +0 -0
- mlscratch/reinforcement/__init__.py +59 -0
- mlscratch/reinforcement/ddpg.py +363 -0
- mlscratch/reinforcement/dqn.py +319 -0
- mlscratch/reinforcement/ppo.py +452 -0
- mlscratch/reinforcement/q_learning.py +352 -0
- mlscratch/reinforcement/sac.py +382 -0
- mlscratch/reinforcement/utils.py +594 -0
- mlscratch/supervised/__init__.py +76 -0
- mlscratch/supervised/_validation.py +50 -0
- mlscratch/supervised/adaboost.py +255 -0
- mlscratch/supervised/decision_tree.py +495 -0
- mlscratch/supervised/gradient_boosting.py +354 -0
- mlscratch/supervised/knn.py +234 -0
- mlscratch/supervised/lasso_regression.py +125 -0
- mlscratch/supervised/linear_models.py +459 -0
- mlscratch/supervised/linear_regression.py +197 -0
- mlscratch/supervised/logistic_regression.py +119 -0
- mlscratch/supervised/naive_bayes.py +113 -0
- mlscratch/supervised/random_forest.py +321 -0
- mlscratch/supervised/ridge_regression.py +93 -0
- mlscratch/supervised/svm.py +356 -0
- mlscratch/unsupervised/__init__.py +39 -0
- mlscratch/unsupervised/apriori.py +178 -0
- mlscratch/unsupervised/dbscan.py +141 -0
- mlscratch/unsupervised/gmm.py +204 -0
- mlscratch/unsupervised/hierarchical_clustering.py +137 -0
- mlscratch/unsupervised/ica.py +167 -0
- mlscratch/unsupervised/kmeans.py +135 -0
- mlscratch/unsupervised/kmedoids.py +133 -0
- mlscratch/unsupervised/pca.py +103 -0
- mlscratch/unsupervised/tsne.py +200 -0
- scratchkit-0.2.0.dist-info/METADATA +241 -0
- scratchkit-0.2.0.dist-info/RECORD +68 -0
- scratchkit-0.2.0.dist-info/WHEEL +5 -0
- scratchkit-0.2.0.dist-info/entry_points.txt +2 -0
- scratchkit-0.2.0.dist-info/licenses/LICENSE +201 -0
- scratchkit-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Bayesian Neural Network (BNN) — Mean-Field Variational Inference
|
|
3
|
+
================================================================
|
|
4
|
+
Replaces deterministic weights with distributions:
|
|
5
|
+
|
|
6
|
+
w ~ N(μ, σ²)
|
|
7
|
+
|
|
8
|
+
The variational posterior q(w | θ) = N(μ, σ²) is optimised to minimise
|
|
9
|
+
the Evidence Lower BOund (ELBO):
|
|
10
|
+
|
|
11
|
+
ELBO = E_q[log p(y | x, w)] − KL[q(w | θ) || p(w)]
|
|
12
|
+
|
|
13
|
+
The KL term acts as weight regularisation; the likelihood term is the
|
|
14
|
+
negative cross-entropy for classification or negative Gaussian log-likelihood
|
|
15
|
+
for regression.
|
|
16
|
+
|
|
17
|
+
Training uses the "local reparameterisation trick":
|
|
18
|
+
w = μ + σ * ε, ε ~ N(0, 1)
|
|
19
|
+
so gradients flow through μ and σ (via log σ).
|
|
20
|
+
|
|
21
|
+
Only numpy and Python stdlib are used.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import numpy as np
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# ============================================================
|
|
28
|
+
# Activations
|
|
29
|
+
# ============================================================
|
|
30
|
+
|
|
31
|
+
def _relu(x: np.ndarray) -> np.ndarray:
|
|
32
|
+
return np.maximum(0.0, x)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _relu_grad(x: np.ndarray) -> np.ndarray:
|
|
36
|
+
return (x > 0).astype(float)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _sigmoid(x: np.ndarray) -> np.ndarray:
|
|
40
|
+
return 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _softmax(x: np.ndarray) -> np.ndarray:
|
|
44
|
+
e = np.exp(x - x.max(axis=-1, keepdims=True))
|
|
45
|
+
return e / e.sum(axis=-1, keepdims=True)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# ============================================================
|
|
49
|
+
# BNN Layer (variational weights)
|
|
50
|
+
# ============================================================
|
|
51
|
+
|
|
52
|
+
class BayesianLayer:
|
|
53
|
+
"""
|
|
54
|
+
A single fully-connected layer with variational weight distribution.
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
in_features : int
|
|
59
|
+
out_features : int
|
|
60
|
+
prior_std : float
|
|
61
|
+
Std of isotropic Gaussian prior N(0, prior_std²).
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
in_features: int,
|
|
67
|
+
out_features: int,
|
|
68
|
+
prior_std: float = 1.0,
|
|
69
|
+
rng: np.random.Generator = None,
|
|
70
|
+
):
|
|
71
|
+
self.in_features = in_features
|
|
72
|
+
self.out_features = out_features
|
|
73
|
+
self.prior_std = prior_std
|
|
74
|
+
self._rng = rng or np.random.default_rng()
|
|
75
|
+
|
|
76
|
+
# Variational parameters: mean and log-std for W and b
|
|
77
|
+
scale = 1.0 / np.sqrt(in_features)
|
|
78
|
+
self.mu_W = self._rng.normal(0, scale, (in_features, out_features))
|
|
79
|
+
self.log_sigma_W = np.full((in_features, out_features), -3.0)
|
|
80
|
+
self.mu_b = np.zeros(out_features)
|
|
81
|
+
self.log_sigma_b = np.full(out_features, -3.0)
|
|
82
|
+
|
|
83
|
+
# Sampled weights (set during forward pass)
|
|
84
|
+
self.W_sample = None
|
|
85
|
+
self.b_sample = None
|
|
86
|
+
self._eps_W = None
|
|
87
|
+
self._eps_b = None
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def sigma_W(self):
|
|
91
|
+
return np.exp(self.log_sigma_W)
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def sigma_b(self):
|
|
95
|
+
return np.exp(self.log_sigma_b)
|
|
96
|
+
|
|
97
|
+
def forward(self, x: np.ndarray) -> np.ndarray:
|
|
98
|
+
"""Sample weights and compute linear transform."""
|
|
99
|
+
self._eps_W = self._rng.standard_normal(self.mu_W.shape)
|
|
100
|
+
self._eps_b = self._rng.standard_normal(self.mu_b.shape)
|
|
101
|
+
self.W_sample = self.mu_W + self.sigma_W * self._eps_W
|
|
102
|
+
self.b_sample = self.mu_b + self.sigma_b * self._eps_b
|
|
103
|
+
self._input = x
|
|
104
|
+
return x @ self.W_sample + self.b_sample
|
|
105
|
+
|
|
106
|
+
def kl_divergence(self) -> float:
|
|
107
|
+
"""
|
|
108
|
+
Closed-form KL[N(μ,σ²) || N(0, prior_std²)] for all weights.
|
|
109
|
+
|
|
110
|
+
KL = 0.5 * [σ²/prior_std² + μ²/prior_std² - 1 + 2 log(prior_std/σ)]
|
|
111
|
+
"""
|
|
112
|
+
prior_var = self.prior_std ** 2
|
|
113
|
+
kl_W = 0.5 * np.sum(
|
|
114
|
+
self.sigma_W ** 2 / prior_var
|
|
115
|
+
+ self.mu_W ** 2 / prior_var
|
|
116
|
+
- 1.0
|
|
117
|
+
+ 2.0 * (np.log(self.prior_std) - self.log_sigma_W)
|
|
118
|
+
)
|
|
119
|
+
kl_b = 0.5 * np.sum(
|
|
120
|
+
self.sigma_b ** 2 / prior_var
|
|
121
|
+
+ self.mu_b ** 2 / prior_var
|
|
122
|
+
- 1.0
|
|
123
|
+
+ 2.0 * (np.log(self.prior_std) - self.log_sigma_b)
|
|
124
|
+
)
|
|
125
|
+
return float(kl_W + kl_b)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# ============================================================
|
|
129
|
+
# Bayesian Neural Network
|
|
130
|
+
# ============================================================
|
|
131
|
+
|
|
132
|
+
class BayesianNeuralNetwork:
|
|
133
|
+
"""
|
|
134
|
+
Bayesian Neural Network trained via mean-field variational inference.
|
|
135
|
+
|
|
136
|
+
Supports binary classification (sigmoid output) and multi-class
|
|
137
|
+
classification (softmax output). A single hidden layer is used by
|
|
138
|
+
default; pass a list to `hidden_sizes` for deeper networks.
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
hidden_sizes : list of int
|
|
143
|
+
Sizes of hidden layers.
|
|
144
|
+
task : str
|
|
145
|
+
'binary' or 'multiclass'.
|
|
146
|
+
n_classes : int
|
|
147
|
+
Number of output classes (ignored for binary).
|
|
148
|
+
prior_std : float
|
|
149
|
+
Std of the Gaussian prior on weights.
|
|
150
|
+
lr : float
|
|
151
|
+
Learning rate for gradient updates.
|
|
152
|
+
n_samples : int
|
|
153
|
+
Number of MC samples per gradient estimate.
|
|
154
|
+
n_epochs : int
|
|
155
|
+
Training epochs.
|
|
156
|
+
batch_size : int or None
|
|
157
|
+
Mini-batch size. None = full-batch.
|
|
158
|
+
kl_weight : float
|
|
159
|
+
Scaling factor for the KL term (1/N is a common choice).
|
|
160
|
+
random_state : int or None
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
def __init__(
|
|
164
|
+
self,
|
|
165
|
+
hidden_sizes: list | None = None,
|
|
166
|
+
task: str = "binary",
|
|
167
|
+
n_classes: int = 2,
|
|
168
|
+
prior_std: float = 1.0,
|
|
169
|
+
lr: float = 0.01,
|
|
170
|
+
n_samples: int = 1,
|
|
171
|
+
n_epochs: int = 100,
|
|
172
|
+
batch_size: int | None = 32,
|
|
173
|
+
kl_weight: float = 1.0,
|
|
174
|
+
random_state: int | None = None,
|
|
175
|
+
):
|
|
176
|
+
self.hidden_sizes = hidden_sizes or [64]
|
|
177
|
+
self.task = task
|
|
178
|
+
self.n_classes = n_classes
|
|
179
|
+
self.prior_std = prior_std
|
|
180
|
+
self.lr = lr
|
|
181
|
+
self.n_samples = n_samples
|
|
182
|
+
self.n_epochs = n_epochs
|
|
183
|
+
self.batch_size = batch_size
|
|
184
|
+
self.kl_weight = kl_weight
|
|
185
|
+
self.random_state = random_state
|
|
186
|
+
self.layers_: list[BayesianLayer] = []
|
|
187
|
+
self.losses_: list[float] = []
|
|
188
|
+
|
|
189
|
+
# ------------------------------------------------------------------
|
|
190
|
+
# Build
|
|
191
|
+
# ------------------------------------------------------------------
|
|
192
|
+
|
|
193
|
+
def _build(self, n_input: int) -> None:
|
|
194
|
+
rng = np.random.default_rng(self.random_state)
|
|
195
|
+
sizes = [n_input] + list(self.hidden_sizes)
|
|
196
|
+
n_out = 1 if self.task == "binary" else self.n_classes
|
|
197
|
+
|
|
198
|
+
self.layers_ = []
|
|
199
|
+
for i in range(len(sizes) - 1):
|
|
200
|
+
self.layers_.append(
|
|
201
|
+
BayesianLayer(sizes[i], sizes[i + 1], self.prior_std, rng)
|
|
202
|
+
)
|
|
203
|
+
self.layers_.append(
|
|
204
|
+
BayesianLayer(sizes[-1], n_out, self.prior_std, rng)
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# ------------------------------------------------------------------
|
|
208
|
+
# Forward
|
|
209
|
+
# ------------------------------------------------------------------
|
|
210
|
+
|
|
211
|
+
def _forward(self, X: np.ndarray) -> np.ndarray:
|
|
212
|
+
"""Single forward pass with sampled weights."""
|
|
213
|
+
h = X
|
|
214
|
+
for layer in self.layers_[:-1]:
|
|
215
|
+
h = _relu(layer.forward(h))
|
|
216
|
+
logit = self.layers_[-1].forward(h)
|
|
217
|
+
if self.task == "binary":
|
|
218
|
+
return _sigmoid(logit).squeeze(-1)
|
|
219
|
+
return _softmax(logit)
|
|
220
|
+
|
|
221
|
+
# ------------------------------------------------------------------
|
|
222
|
+
# ELBO / loss
|
|
223
|
+
# ------------------------------------------------------------------
|
|
224
|
+
|
|
225
|
+
def _elbo(self, X: np.ndarray, y: np.ndarray) -> float:
|
|
226
|
+
"""Compute -ELBO (loss to minimise) averaged over MC samples."""
|
|
227
|
+
n = len(X)
|
|
228
|
+
total_nll = 0.0
|
|
229
|
+
for _ in range(self.n_samples):
|
|
230
|
+
pred = self._forward(X)
|
|
231
|
+
if self.task == "binary":
|
|
232
|
+
pred = np.clip(pred, 1e-7, 1 - 1e-7)
|
|
233
|
+
nll = -np.mean(y * np.log(pred) + (1 - y) * np.log(1 - pred))
|
|
234
|
+
else:
|
|
235
|
+
pred = np.clip(pred, 1e-7, 1.0)
|
|
236
|
+
nll = -np.mean(np.log(pred[np.arange(n), y.astype(int)]))
|
|
237
|
+
total_nll += nll
|
|
238
|
+
|
|
239
|
+
avg_nll = total_nll / self.n_samples
|
|
240
|
+
kl = sum(layer.kl_divergence() for layer in self.layers_)
|
|
241
|
+
return avg_nll + self.kl_weight * kl / n
|
|
242
|
+
|
|
243
|
+
# ------------------------------------------------------------------
|
|
244
|
+
# Gradient step (finite differences for simplicity)
|
|
245
|
+
# ------------------------------------------------------------------
|
|
246
|
+
|
|
247
|
+
def _update_params(self, X: np.ndarray, y: np.ndarray) -> None:
|
|
248
|
+
"""Gradient update using a stochastic estimate of the ELBO gradient."""
|
|
249
|
+
eps = 1e-5
|
|
250
|
+
for layer in self.layers_:
|
|
251
|
+
for param_name in ("mu_W", "log_sigma_W", "mu_b", "log_sigma_b"):
|
|
252
|
+
param = getattr(layer, param_name)
|
|
253
|
+
grad = np.zeros_like(param)
|
|
254
|
+
flat = param.ravel()
|
|
255
|
+
for idx in range(len(flat)):
|
|
256
|
+
orig = flat[idx]
|
|
257
|
+
flat[idx] = orig + eps
|
|
258
|
+
loss_p = self._elbo(X, y)
|
|
259
|
+
flat[idx] = orig - eps
|
|
260
|
+
loss_m = self._elbo(X, y)
|
|
261
|
+
flat[idx] = orig
|
|
262
|
+
grad.ravel()[idx] = (loss_p - loss_m) / (2 * eps)
|
|
263
|
+
param -= self.lr * grad
|
|
264
|
+
|
|
265
|
+
# ------------------------------------------------------------------
|
|
266
|
+
# Public API
|
|
267
|
+
# ------------------------------------------------------------------
|
|
268
|
+
|
|
269
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> "BayesianNeuralNetwork":
|
|
270
|
+
"""
|
|
271
|
+
Train the BNN.
|
|
272
|
+
|
|
273
|
+
Parameters
|
|
274
|
+
----------
|
|
275
|
+
X : ndarray (n_samples, n_features)
|
|
276
|
+
y : ndarray (n_samples,) — integer class labels
|
|
277
|
+
"""
|
|
278
|
+
n_samples = len(X)
|
|
279
|
+
self._build(X.shape[1])
|
|
280
|
+
rng = np.random.default_rng(self.random_state)
|
|
281
|
+
bs = self.batch_size or n_samples
|
|
282
|
+
|
|
283
|
+
for epoch in range(self.n_epochs):
|
|
284
|
+
idx = rng.permutation(n_samples)
|
|
285
|
+
epoch_loss = 0.0
|
|
286
|
+
n_batches = 0
|
|
287
|
+
for start in range(0, n_samples, bs):
|
|
288
|
+
batch_idx = idx[start:start + bs]
|
|
289
|
+
Xb, yb = X[batch_idx], y[batch_idx]
|
|
290
|
+
self._update_params(Xb, yb)
|
|
291
|
+
epoch_loss += self._elbo(Xb, yb)
|
|
292
|
+
n_batches += 1
|
|
293
|
+
self.losses_.append(epoch_loss / n_batches)
|
|
294
|
+
|
|
295
|
+
return self
|
|
296
|
+
|
|
297
|
+
def predict_proba(self, X: np.ndarray, n_samples: int = 50) -> np.ndarray:
|
|
298
|
+
"""
|
|
299
|
+
Monte-Carlo predictive probabilities (averaged over weight samples).
|
|
300
|
+
|
|
301
|
+
Returns
|
|
302
|
+
-------
|
|
303
|
+
proba : ndarray of shape (n_samples, n_classes)
|
|
304
|
+
or (n_samples,) for binary task
|
|
305
|
+
"""
|
|
306
|
+
preds = []
|
|
307
|
+
for _ in range(n_samples):
|
|
308
|
+
preds.append(self._forward(X))
|
|
309
|
+
return np.stack(preds).mean(axis=0)
|
|
310
|
+
|
|
311
|
+
def predict(self, X: np.ndarray, n_samples: int = 50) -> np.ndarray:
|
|
312
|
+
proba = self.predict_proba(X, n_samples)
|
|
313
|
+
if self.task == "binary":
|
|
314
|
+
return (proba >= 0.5).astype(int)
|
|
315
|
+
return np.argmax(proba, axis=1)
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gaussian Process Regression (GPR)
|
|
3
|
+
===================================
|
|
4
|
+
Non-parametric Bayesian regression. A Gaussian Process defines a
|
|
5
|
+
distribution over functions; conditioning on observed data gives a
|
|
6
|
+
posterior GP whose mean is used for prediction and whose variance
|
|
7
|
+
quantifies uncertainty.
|
|
8
|
+
|
|
9
|
+
f ~ GP(0, k(x, x'))
|
|
10
|
+
y = f(x) + ε, ε ~ N(0, σ_n²)
|
|
11
|
+
|
|
12
|
+
Posterior predictive:
|
|
13
|
+
μ* = K(X*, X) [K(X,X) + σ_n² I]⁻¹ y
|
|
14
|
+
Σ* = K(X*, X*) - K(X*, X) [K(X,X) + σ_n² I]⁻¹ K(X, X*)
|
|
15
|
+
|
|
16
|
+
Kernels implemented
|
|
17
|
+
--------------------
|
|
18
|
+
- RBF (Squared Exponential): k(x,x') = σ_f² exp(-||x-x'||²/(2l²))
|
|
19
|
+
- Matern52 : k(x,x') = σ_f²(1+√5 r/l + 5r²/(3l²)) exp(-√5 r/l)
|
|
20
|
+
- Linear : k(x,x') = σ_f² x·x'
|
|
21
|
+
- Periodic : k(x,x') = σ_f² exp(-2 sin²(π|x-x'|/p)/l²)
|
|
22
|
+
|
|
23
|
+
Only numpy is used.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
import numpy as np
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# ============================================================
|
|
30
|
+
# Kernels
|
|
31
|
+
# ============================================================
|
|
32
|
+
|
|
33
|
+
class RBFKernel:
|
|
34
|
+
"""Radial Basis Function (Squared Exponential) kernel."""
|
|
35
|
+
|
|
36
|
+
def __init__(self, length_scale: float = 1.0, signal_variance: float = 1.0):
|
|
37
|
+
self.length_scale = length_scale
|
|
38
|
+
self.signal_variance = signal_variance
|
|
39
|
+
|
|
40
|
+
def __call__(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray:
|
|
41
|
+
X1, X2 = np.atleast_2d(X1), np.atleast_2d(X2)
|
|
42
|
+
sq_dist = np.sum(
|
|
43
|
+
(X1[:, np.newaxis, :] - X2[np.newaxis, :, :]) ** 2, axis=2
|
|
44
|
+
)
|
|
45
|
+
return self.signal_variance ** 2 * np.exp(-0.5 * sq_dist / self.length_scale ** 2)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class Matern52Kernel:
|
|
49
|
+
"""Matérn 5/2 kernel — rougher than RBF, common for real-world data."""
|
|
50
|
+
|
|
51
|
+
def __init__(self, length_scale: float = 1.0, signal_variance: float = 1.0):
|
|
52
|
+
self.length_scale = length_scale
|
|
53
|
+
self.signal_variance = signal_variance
|
|
54
|
+
|
|
55
|
+
def __call__(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray:
|
|
56
|
+
X1, X2 = np.atleast_2d(X1), np.atleast_2d(X2)
|
|
57
|
+
r = np.sqrt(np.sum(
|
|
58
|
+
(X1[:, np.newaxis, :] - X2[np.newaxis, :, :]) ** 2, axis=2
|
|
59
|
+
))
|
|
60
|
+
sqrt5_r = np.sqrt(5.0) * r / self.length_scale
|
|
61
|
+
return self.signal_variance ** 2 * (
|
|
62
|
+
1.0 + sqrt5_r + sqrt5_r ** 2 / 3.0
|
|
63
|
+
) * np.exp(-sqrt5_r)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class LinearKernel:
|
|
67
|
+
"""Linear (dot-product) kernel."""
|
|
68
|
+
|
|
69
|
+
def __init__(self, signal_variance: float = 1.0):
|
|
70
|
+
self.signal_variance = signal_variance
|
|
71
|
+
|
|
72
|
+
def __call__(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray:
|
|
73
|
+
X1, X2 = np.atleast_2d(X1), np.atleast_2d(X2)
|
|
74
|
+
return self.signal_variance ** 2 * (X1 @ X2.T)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class PeriodicKernel:
|
|
78
|
+
"""Periodic kernel for modelling repeating patterns."""
|
|
79
|
+
|
|
80
|
+
def __init__(
|
|
81
|
+
self,
|
|
82
|
+
length_scale: float = 1.0,
|
|
83
|
+
period: float = 1.0,
|
|
84
|
+
signal_variance: float = 1.0,
|
|
85
|
+
):
|
|
86
|
+
self.length_scale = length_scale
|
|
87
|
+
self.period = period
|
|
88
|
+
self.signal_variance = signal_variance
|
|
89
|
+
|
|
90
|
+
def __call__(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray:
|
|
91
|
+
X1, X2 = np.atleast_2d(X1), np.atleast_2d(X2)
|
|
92
|
+
# Works for 1-D; use norm for multi-D
|
|
93
|
+
dist = np.sqrt(np.sum(
|
|
94
|
+
(X1[:, np.newaxis, :] - X2[np.newaxis, :, :]) ** 2, axis=2
|
|
95
|
+
))
|
|
96
|
+
return self.signal_variance ** 2 * np.exp(
|
|
97
|
+
-2.0 * np.sin(np.pi * dist / self.period) ** 2 / self.length_scale ** 2
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ============================================================
|
|
102
|
+
# Gaussian Process Regressor
|
|
103
|
+
# ============================================================
|
|
104
|
+
|
|
105
|
+
class GaussianProcessRegressor:
|
|
106
|
+
"""
|
|
107
|
+
Gaussian Process Regression.
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
kernel : callable
|
|
112
|
+
Covariance kernel k(X1, X2). Defaults to RBFKernel().
|
|
113
|
+
noise_variance : float
|
|
114
|
+
Observation noise σ_n².
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
def __init__(self, kernel=None, noise_variance: float = 1e-6):
|
|
118
|
+
self.kernel = kernel if kernel is not None else RBFKernel()
|
|
119
|
+
self.noise_variance = noise_variance
|
|
120
|
+
self.X_train_ = None
|
|
121
|
+
self.alpha_ = None # (K + σ²I)^{-1} y
|
|
122
|
+
self.L_ = None # Cholesky factor of (K + σ²I)
|
|
123
|
+
|
|
124
|
+
# ------------------------------------------------------------------
|
|
125
|
+
# Internal
|
|
126
|
+
# ------------------------------------------------------------------
|
|
127
|
+
|
|
128
|
+
def _cholesky_solve(self, A: np.ndarray, b: np.ndarray) -> np.ndarray:
|
|
129
|
+
"""Solve A x = b via Cholesky decomposition of A."""
|
|
130
|
+
L = np.linalg.cholesky(A)
|
|
131
|
+
# Forward substitution: L z = b
|
|
132
|
+
z = np.linalg.solve(L, b)
|
|
133
|
+
# Back substitution: L^T x = z
|
|
134
|
+
return np.linalg.solve(L.T, z), L
|
|
135
|
+
|
|
136
|
+
# ------------------------------------------------------------------
|
|
137
|
+
# Public API
|
|
138
|
+
# ------------------------------------------------------------------
|
|
139
|
+
|
|
140
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> "GaussianProcessRegressor":
|
|
141
|
+
"""
|
|
142
|
+
Fit GP to training data.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
X : ndarray of shape (n_samples, n_features) or (n_samples,)
|
|
147
|
+
y : ndarray of shape (n_samples,)
|
|
148
|
+
"""
|
|
149
|
+
self.X_train_ = np.atleast_2d(X) if X.ndim == 1 else X
|
|
150
|
+
self.y_train_ = y.copy()
|
|
151
|
+
|
|
152
|
+
K = self.kernel(self.X_train_, self.X_train_)
|
|
153
|
+
K_noisy = K + self.noise_variance * np.eye(len(y))
|
|
154
|
+
|
|
155
|
+
self.alpha_, self.L_ = self._cholesky_solve(K_noisy, y)
|
|
156
|
+
return self
|
|
157
|
+
|
|
158
|
+
def predict(
|
|
159
|
+
self, X: np.ndarray, return_std: bool = False
|
|
160
|
+
):
|
|
161
|
+
"""
|
|
162
|
+
Predictive mean and (optionally) standard deviation.
|
|
163
|
+
|
|
164
|
+
Parameters
|
|
165
|
+
----------
|
|
166
|
+
X : ndarray of shape (n_samples, n_features) or (n_samples,)
|
|
167
|
+
return_std : bool
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
y_mean : ndarray of shape (n_samples,)
|
|
172
|
+
y_std : ndarray of shape (n_samples,) [only if return_std]
|
|
173
|
+
"""
|
|
174
|
+
X_ = np.atleast_2d(X) if X.ndim == 1 else X
|
|
175
|
+
K_star = self.kernel(X_, self.X_train_) # (n_test, n_train)
|
|
176
|
+
y_mean = K_star @ self.alpha_
|
|
177
|
+
|
|
178
|
+
if not return_std:
|
|
179
|
+
return y_mean
|
|
180
|
+
|
|
181
|
+
# Predictive variance: diag(K** - K*^T (K+σ²I)^{-1} K*)
|
|
182
|
+
v = np.linalg.solve(self.L_, K_star.T) # (n_train, n_test)
|
|
183
|
+
K_ss = self.kernel(X_, X_)
|
|
184
|
+
var = np.diag(K_ss) - np.sum(v ** 2, axis=0)
|
|
185
|
+
return y_mean, np.sqrt(np.maximum(var, 0.0))
|
|
186
|
+
|
|
187
|
+
def sample_posterior(
|
|
188
|
+
self, X: np.ndarray, n_samples: int = 1, random_state=None
|
|
189
|
+
) -> np.ndarray:
|
|
190
|
+
"""
|
|
191
|
+
Draw samples from the posterior distribution.
|
|
192
|
+
|
|
193
|
+
Returns
|
|
194
|
+
-------
|
|
195
|
+
samples : ndarray of shape (n_samples, n_test_points)
|
|
196
|
+
"""
|
|
197
|
+
rng = np.random.default_rng(random_state)
|
|
198
|
+
X_ = np.atleast_2d(X) if X.ndim == 1 else X
|
|
199
|
+
K_star = self.kernel(X_, self.X_train_)
|
|
200
|
+
K_ss = self.kernel(X_, X_)
|
|
201
|
+
|
|
202
|
+
mu = K_star @ self.alpha_
|
|
203
|
+
v = np.linalg.solve(self.L_, K_star.T)
|
|
204
|
+
cov = K_ss - v.T @ v
|
|
205
|
+
# Regularise
|
|
206
|
+
cov += 1e-10 * np.eye(len(mu))
|
|
207
|
+
return rng.multivariate_normal(mu, cov, size=n_samples)
|