scratchkit 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. mlscratch/__init__.py +56 -0
  2. mlscratch/__main__.py +118 -0
  3. mlscratch/bayesian/__init__.py +53 -0
  4. mlscratch/bayesian/bayesian_linear_regression.py +171 -0
  5. mlscratch/bayesian/bayesian_network.py +248 -0
  6. mlscratch/bayesian/bayesian_nn.py +315 -0
  7. mlscratch/bayesian/gaussian_process.py +207 -0
  8. mlscratch/bayesian/hmm.py +277 -0
  9. mlscratch/bayesian/init.py +52 -0
  10. mlscratch/bayesian/kalman_filter.py +182 -0
  11. mlscratch/bayesian/naive_bayes.py +209 -0
  12. mlscratch/metrics/__init__.py +59 -0
  13. mlscratch/metrics/classification.py +365 -0
  14. mlscratch/metrics/regression.py +79 -0
  15. mlscratch/neural/__init__.py +121 -0
  16. mlscratch/neural/attention.py +420 -0
  17. mlscratch/neural/autoencoder.py +543 -0
  18. mlscratch/neural/boltzmann.py +231 -0
  19. mlscratch/neural/cnn.py +593 -0
  20. mlscratch/neural/cvnn.py +322 -0
  21. mlscratch/neural/gan.py +364 -0
  22. mlscratch/neural/hopfield.py +193 -0
  23. mlscratch/neural/perceptron.py +398 -0
  24. mlscratch/neural/rbf_network.py +230 -0
  25. mlscratch/neural/recurrent.py +569 -0
  26. mlscratch/preprocessing/__init__.py +38 -0
  27. mlscratch/preprocessing/encoders.py +140 -0
  28. mlscratch/preprocessing/model_selection.py +119 -0
  29. mlscratch/preprocessing/polynomial.py +105 -0
  30. mlscratch/preprocessing/scalers.py +220 -0
  31. mlscratch/py.typed +0 -0
  32. mlscratch/reinforcement/__init__.py +59 -0
  33. mlscratch/reinforcement/ddpg.py +363 -0
  34. mlscratch/reinforcement/dqn.py +319 -0
  35. mlscratch/reinforcement/ppo.py +452 -0
  36. mlscratch/reinforcement/q_learning.py +352 -0
  37. mlscratch/reinforcement/sac.py +382 -0
  38. mlscratch/reinforcement/utils.py +594 -0
  39. mlscratch/supervised/__init__.py +76 -0
  40. mlscratch/supervised/_validation.py +50 -0
  41. mlscratch/supervised/adaboost.py +255 -0
  42. mlscratch/supervised/decision_tree.py +495 -0
  43. mlscratch/supervised/gradient_boosting.py +354 -0
  44. mlscratch/supervised/knn.py +234 -0
  45. mlscratch/supervised/lasso_regression.py +125 -0
  46. mlscratch/supervised/linear_models.py +459 -0
  47. mlscratch/supervised/linear_regression.py +197 -0
  48. mlscratch/supervised/logistic_regression.py +119 -0
  49. mlscratch/supervised/naive_bayes.py +113 -0
  50. mlscratch/supervised/random_forest.py +321 -0
  51. mlscratch/supervised/ridge_regression.py +93 -0
  52. mlscratch/supervised/svm.py +356 -0
  53. mlscratch/unsupervised/__init__.py +39 -0
  54. mlscratch/unsupervised/apriori.py +178 -0
  55. mlscratch/unsupervised/dbscan.py +141 -0
  56. mlscratch/unsupervised/gmm.py +204 -0
  57. mlscratch/unsupervised/hierarchical_clustering.py +137 -0
  58. mlscratch/unsupervised/ica.py +167 -0
  59. mlscratch/unsupervised/kmeans.py +135 -0
  60. mlscratch/unsupervised/kmedoids.py +133 -0
  61. mlscratch/unsupervised/pca.py +103 -0
  62. mlscratch/unsupervised/tsne.py +200 -0
  63. scratchkit-0.2.0.dist-info/METADATA +241 -0
  64. scratchkit-0.2.0.dist-info/RECORD +68 -0
  65. scratchkit-0.2.0.dist-info/WHEEL +5 -0
  66. scratchkit-0.2.0.dist-info/entry_points.txt +2 -0
  67. scratchkit-0.2.0.dist-info/licenses/LICENSE +201 -0
  68. scratchkit-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,315 @@
1
+ """
2
+ Bayesian Neural Network (BNN) — Mean-Field Variational Inference
3
+ ================================================================
4
+ Replaces deterministic weights with distributions:
5
+
6
+ w ~ N(μ, σ²)
7
+
8
+ The variational posterior q(w | θ) = N(μ, σ²) is optimised to minimise
9
+ the Evidence Lower BOund (ELBO):
10
+
11
+ ELBO = E_q[log p(y | x, w)] − KL[q(w | θ) || p(w)]
12
+
13
+ The KL term acts as weight regularisation; the likelihood term is the
14
+ negative cross-entropy for classification or negative Gaussian log-likelihood
15
+ for regression.
16
+
17
+ Training uses the "local reparameterisation trick":
18
+ w = μ + σ * ε, ε ~ N(0, 1)
19
+ so gradients flow through μ and σ (via log σ).
20
+
21
+ Only numpy and Python stdlib are used.
22
+ """
23
+
24
+ import numpy as np
25
+
26
+
27
+ # ============================================================
28
+ # Activations
29
+ # ============================================================
30
+
31
+ def _relu(x: np.ndarray) -> np.ndarray:
32
+ return np.maximum(0.0, x)
33
+
34
+
35
+ def _relu_grad(x: np.ndarray) -> np.ndarray:
36
+ return (x > 0).astype(float)
37
+
38
+
39
+ def _sigmoid(x: np.ndarray) -> np.ndarray:
40
+ return 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
41
+
42
+
43
+ def _softmax(x: np.ndarray) -> np.ndarray:
44
+ e = np.exp(x - x.max(axis=-1, keepdims=True))
45
+ return e / e.sum(axis=-1, keepdims=True)
46
+
47
+
48
+ # ============================================================
49
+ # BNN Layer (variational weights)
50
+ # ============================================================
51
+
52
+ class BayesianLayer:
53
+ """
54
+ A single fully-connected layer with variational weight distribution.
55
+
56
+ Parameters
57
+ ----------
58
+ in_features : int
59
+ out_features : int
60
+ prior_std : float
61
+ Std of isotropic Gaussian prior N(0, prior_std²).
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ in_features: int,
67
+ out_features: int,
68
+ prior_std: float = 1.0,
69
+ rng: np.random.Generator = None,
70
+ ):
71
+ self.in_features = in_features
72
+ self.out_features = out_features
73
+ self.prior_std = prior_std
74
+ self._rng = rng or np.random.default_rng()
75
+
76
+ # Variational parameters: mean and log-std for W and b
77
+ scale = 1.0 / np.sqrt(in_features)
78
+ self.mu_W = self._rng.normal(0, scale, (in_features, out_features))
79
+ self.log_sigma_W = np.full((in_features, out_features), -3.0)
80
+ self.mu_b = np.zeros(out_features)
81
+ self.log_sigma_b = np.full(out_features, -3.0)
82
+
83
+ # Sampled weights (set during forward pass)
84
+ self.W_sample = None
85
+ self.b_sample = None
86
+ self._eps_W = None
87
+ self._eps_b = None
88
+
89
+ @property
90
+ def sigma_W(self):
91
+ return np.exp(self.log_sigma_W)
92
+
93
+ @property
94
+ def sigma_b(self):
95
+ return np.exp(self.log_sigma_b)
96
+
97
+ def forward(self, x: np.ndarray) -> np.ndarray:
98
+ """Sample weights and compute linear transform."""
99
+ self._eps_W = self._rng.standard_normal(self.mu_W.shape)
100
+ self._eps_b = self._rng.standard_normal(self.mu_b.shape)
101
+ self.W_sample = self.mu_W + self.sigma_W * self._eps_W
102
+ self.b_sample = self.mu_b + self.sigma_b * self._eps_b
103
+ self._input = x
104
+ return x @ self.W_sample + self.b_sample
105
+
106
+ def kl_divergence(self) -> float:
107
+ """
108
+ Closed-form KL[N(μ,σ²) || N(0, prior_std²)] for all weights.
109
+
110
+ KL = 0.5 * [σ²/prior_std² + μ²/prior_std² - 1 + 2 log(prior_std/σ)]
111
+ """
112
+ prior_var = self.prior_std ** 2
113
+ kl_W = 0.5 * np.sum(
114
+ self.sigma_W ** 2 / prior_var
115
+ + self.mu_W ** 2 / prior_var
116
+ - 1.0
117
+ + 2.0 * (np.log(self.prior_std) - self.log_sigma_W)
118
+ )
119
+ kl_b = 0.5 * np.sum(
120
+ self.sigma_b ** 2 / prior_var
121
+ + self.mu_b ** 2 / prior_var
122
+ - 1.0
123
+ + 2.0 * (np.log(self.prior_std) - self.log_sigma_b)
124
+ )
125
+ return float(kl_W + kl_b)
126
+
127
+
128
+ # ============================================================
129
+ # Bayesian Neural Network
130
+ # ============================================================
131
+
132
+ class BayesianNeuralNetwork:
133
+ """
134
+ Bayesian Neural Network trained via mean-field variational inference.
135
+
136
+ Supports binary classification (sigmoid output) and multi-class
137
+ classification (softmax output). A single hidden layer is used by
138
+ default; pass a list to `hidden_sizes` for deeper networks.
139
+
140
+ Parameters
141
+ ----------
142
+ hidden_sizes : list of int
143
+ Sizes of hidden layers.
144
+ task : str
145
+ 'binary' or 'multiclass'.
146
+ n_classes : int
147
+ Number of output classes (ignored for binary).
148
+ prior_std : float
149
+ Std of the Gaussian prior on weights.
150
+ lr : float
151
+ Learning rate for gradient updates.
152
+ n_samples : int
153
+ Number of MC samples per gradient estimate.
154
+ n_epochs : int
155
+ Training epochs.
156
+ batch_size : int or None
157
+ Mini-batch size. None = full-batch.
158
+ kl_weight : float
159
+ Scaling factor for the KL term (1/N is a common choice).
160
+ random_state : int or None
161
+ """
162
+
163
+ def __init__(
164
+ self,
165
+ hidden_sizes: list | None = None,
166
+ task: str = "binary",
167
+ n_classes: int = 2,
168
+ prior_std: float = 1.0,
169
+ lr: float = 0.01,
170
+ n_samples: int = 1,
171
+ n_epochs: int = 100,
172
+ batch_size: int | None = 32,
173
+ kl_weight: float = 1.0,
174
+ random_state: int | None = None,
175
+ ):
176
+ self.hidden_sizes = hidden_sizes or [64]
177
+ self.task = task
178
+ self.n_classes = n_classes
179
+ self.prior_std = prior_std
180
+ self.lr = lr
181
+ self.n_samples = n_samples
182
+ self.n_epochs = n_epochs
183
+ self.batch_size = batch_size
184
+ self.kl_weight = kl_weight
185
+ self.random_state = random_state
186
+ self.layers_: list[BayesianLayer] = []
187
+ self.losses_: list[float] = []
188
+
189
+ # ------------------------------------------------------------------
190
+ # Build
191
+ # ------------------------------------------------------------------
192
+
193
+ def _build(self, n_input: int) -> None:
194
+ rng = np.random.default_rng(self.random_state)
195
+ sizes = [n_input] + list(self.hidden_sizes)
196
+ n_out = 1 if self.task == "binary" else self.n_classes
197
+
198
+ self.layers_ = []
199
+ for i in range(len(sizes) - 1):
200
+ self.layers_.append(
201
+ BayesianLayer(sizes[i], sizes[i + 1], self.prior_std, rng)
202
+ )
203
+ self.layers_.append(
204
+ BayesianLayer(sizes[-1], n_out, self.prior_std, rng)
205
+ )
206
+
207
+ # ------------------------------------------------------------------
208
+ # Forward
209
+ # ------------------------------------------------------------------
210
+
211
+ def _forward(self, X: np.ndarray) -> np.ndarray:
212
+ """Single forward pass with sampled weights."""
213
+ h = X
214
+ for layer in self.layers_[:-1]:
215
+ h = _relu(layer.forward(h))
216
+ logit = self.layers_[-1].forward(h)
217
+ if self.task == "binary":
218
+ return _sigmoid(logit).squeeze(-1)
219
+ return _softmax(logit)
220
+
221
+ # ------------------------------------------------------------------
222
+ # ELBO / loss
223
+ # ------------------------------------------------------------------
224
+
225
+ def _elbo(self, X: np.ndarray, y: np.ndarray) -> float:
226
+ """Compute -ELBO (loss to minimise) averaged over MC samples."""
227
+ n = len(X)
228
+ total_nll = 0.0
229
+ for _ in range(self.n_samples):
230
+ pred = self._forward(X)
231
+ if self.task == "binary":
232
+ pred = np.clip(pred, 1e-7, 1 - 1e-7)
233
+ nll = -np.mean(y * np.log(pred) + (1 - y) * np.log(1 - pred))
234
+ else:
235
+ pred = np.clip(pred, 1e-7, 1.0)
236
+ nll = -np.mean(np.log(pred[np.arange(n), y.astype(int)]))
237
+ total_nll += nll
238
+
239
+ avg_nll = total_nll / self.n_samples
240
+ kl = sum(layer.kl_divergence() for layer in self.layers_)
241
+ return avg_nll + self.kl_weight * kl / n
242
+
243
+ # ------------------------------------------------------------------
244
+ # Gradient step (finite differences for simplicity)
245
+ # ------------------------------------------------------------------
246
+
247
+ def _update_params(self, X: np.ndarray, y: np.ndarray) -> None:
248
+ """Gradient update using a stochastic estimate of the ELBO gradient."""
249
+ eps = 1e-5
250
+ for layer in self.layers_:
251
+ for param_name in ("mu_W", "log_sigma_W", "mu_b", "log_sigma_b"):
252
+ param = getattr(layer, param_name)
253
+ grad = np.zeros_like(param)
254
+ flat = param.ravel()
255
+ for idx in range(len(flat)):
256
+ orig = flat[idx]
257
+ flat[idx] = orig + eps
258
+ loss_p = self._elbo(X, y)
259
+ flat[idx] = orig - eps
260
+ loss_m = self._elbo(X, y)
261
+ flat[idx] = orig
262
+ grad.ravel()[idx] = (loss_p - loss_m) / (2 * eps)
263
+ param -= self.lr * grad
264
+
265
+ # ------------------------------------------------------------------
266
+ # Public API
267
+ # ------------------------------------------------------------------
268
+
269
+ def fit(self, X: np.ndarray, y: np.ndarray) -> "BayesianNeuralNetwork":
270
+ """
271
+ Train the BNN.
272
+
273
+ Parameters
274
+ ----------
275
+ X : ndarray (n_samples, n_features)
276
+ y : ndarray (n_samples,) — integer class labels
277
+ """
278
+ n_samples = len(X)
279
+ self._build(X.shape[1])
280
+ rng = np.random.default_rng(self.random_state)
281
+ bs = self.batch_size or n_samples
282
+
283
+ for epoch in range(self.n_epochs):
284
+ idx = rng.permutation(n_samples)
285
+ epoch_loss = 0.0
286
+ n_batches = 0
287
+ for start in range(0, n_samples, bs):
288
+ batch_idx = idx[start:start + bs]
289
+ Xb, yb = X[batch_idx], y[batch_idx]
290
+ self._update_params(Xb, yb)
291
+ epoch_loss += self._elbo(Xb, yb)
292
+ n_batches += 1
293
+ self.losses_.append(epoch_loss / n_batches)
294
+
295
+ return self
296
+
297
+ def predict_proba(self, X: np.ndarray, n_samples: int = 50) -> np.ndarray:
298
+ """
299
+ Monte-Carlo predictive probabilities (averaged over weight samples).
300
+
301
+ Returns
302
+ -------
303
+ proba : ndarray of shape (n_samples, n_classes)
304
+ or (n_samples,) for binary task
305
+ """
306
+ preds = []
307
+ for _ in range(n_samples):
308
+ preds.append(self._forward(X))
309
+ return np.stack(preds).mean(axis=0)
310
+
311
+ def predict(self, X: np.ndarray, n_samples: int = 50) -> np.ndarray:
312
+ proba = self.predict_proba(X, n_samples)
313
+ if self.task == "binary":
314
+ return (proba >= 0.5).astype(int)
315
+ return np.argmax(proba, axis=1)
@@ -0,0 +1,207 @@
1
+ """
2
+ Gaussian Process Regression (GPR)
3
+ ===================================
4
+ Non-parametric Bayesian regression. A Gaussian Process defines a
5
+ distribution over functions; conditioning on observed data gives a
6
+ posterior GP whose mean is used for prediction and whose variance
7
+ quantifies uncertainty.
8
+
9
+ f ~ GP(0, k(x, x'))
10
+ y = f(x) + ε, ε ~ N(0, σ_n²)
11
+
12
+ Posterior predictive:
13
+ μ* = K(X*, X) [K(X,X) + σ_n² I]⁻¹ y
14
+ Σ* = K(X*, X*) - K(X*, X) [K(X,X) + σ_n² I]⁻¹ K(X, X*)
15
+
16
+ Kernels implemented
17
+ --------------------
18
+ - RBF (Squared Exponential): k(x,x') = σ_f² exp(-||x-x'||²/(2l²))
19
+ - Matern52 : k(x,x') = σ_f²(1+√5 r/l + 5r²/(3l²)) exp(-√5 r/l)
20
+ - Linear : k(x,x') = σ_f² x·x'
21
+ - Periodic : k(x,x') = σ_f² exp(-2 sin²(π|x-x'|/p)/l²)
22
+
23
+ Only numpy is used.
24
+ """
25
+
26
+ import numpy as np
27
+
28
+
29
+ # ============================================================
30
+ # Kernels
31
+ # ============================================================
32
+
33
+ class RBFKernel:
34
+ """Radial Basis Function (Squared Exponential) kernel."""
35
+
36
+ def __init__(self, length_scale: float = 1.0, signal_variance: float = 1.0):
37
+ self.length_scale = length_scale
38
+ self.signal_variance = signal_variance
39
+
40
+ def __call__(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray:
41
+ X1, X2 = np.atleast_2d(X1), np.atleast_2d(X2)
42
+ sq_dist = np.sum(
43
+ (X1[:, np.newaxis, :] - X2[np.newaxis, :, :]) ** 2, axis=2
44
+ )
45
+ return self.signal_variance ** 2 * np.exp(-0.5 * sq_dist / self.length_scale ** 2)
46
+
47
+
48
+ class Matern52Kernel:
49
+ """Matérn 5/2 kernel — rougher than RBF, common for real-world data."""
50
+
51
+ def __init__(self, length_scale: float = 1.0, signal_variance: float = 1.0):
52
+ self.length_scale = length_scale
53
+ self.signal_variance = signal_variance
54
+
55
+ def __call__(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray:
56
+ X1, X2 = np.atleast_2d(X1), np.atleast_2d(X2)
57
+ r = np.sqrt(np.sum(
58
+ (X1[:, np.newaxis, :] - X2[np.newaxis, :, :]) ** 2, axis=2
59
+ ))
60
+ sqrt5_r = np.sqrt(5.0) * r / self.length_scale
61
+ return self.signal_variance ** 2 * (
62
+ 1.0 + sqrt5_r + sqrt5_r ** 2 / 3.0
63
+ ) * np.exp(-sqrt5_r)
64
+
65
+
66
+ class LinearKernel:
67
+ """Linear (dot-product) kernel."""
68
+
69
+ def __init__(self, signal_variance: float = 1.0):
70
+ self.signal_variance = signal_variance
71
+
72
+ def __call__(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray:
73
+ X1, X2 = np.atleast_2d(X1), np.atleast_2d(X2)
74
+ return self.signal_variance ** 2 * (X1 @ X2.T)
75
+
76
+
77
+ class PeriodicKernel:
78
+ """Periodic kernel for modelling repeating patterns."""
79
+
80
+ def __init__(
81
+ self,
82
+ length_scale: float = 1.0,
83
+ period: float = 1.0,
84
+ signal_variance: float = 1.0,
85
+ ):
86
+ self.length_scale = length_scale
87
+ self.period = period
88
+ self.signal_variance = signal_variance
89
+
90
+ def __call__(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray:
91
+ X1, X2 = np.atleast_2d(X1), np.atleast_2d(X2)
92
+ # Works for 1-D; use norm for multi-D
93
+ dist = np.sqrt(np.sum(
94
+ (X1[:, np.newaxis, :] - X2[np.newaxis, :, :]) ** 2, axis=2
95
+ ))
96
+ return self.signal_variance ** 2 * np.exp(
97
+ -2.0 * np.sin(np.pi * dist / self.period) ** 2 / self.length_scale ** 2
98
+ )
99
+
100
+
101
+ # ============================================================
102
+ # Gaussian Process Regressor
103
+ # ============================================================
104
+
105
+ class GaussianProcessRegressor:
106
+ """
107
+ Gaussian Process Regression.
108
+
109
+ Parameters
110
+ ----------
111
+ kernel : callable
112
+ Covariance kernel k(X1, X2). Defaults to RBFKernel().
113
+ noise_variance : float
114
+ Observation noise σ_n².
115
+ """
116
+
117
+ def __init__(self, kernel=None, noise_variance: float = 1e-6):
118
+ self.kernel = kernel if kernel is not None else RBFKernel()
119
+ self.noise_variance = noise_variance
120
+ self.X_train_ = None
121
+ self.alpha_ = None # (K + σ²I)^{-1} y
122
+ self.L_ = None # Cholesky factor of (K + σ²I)
123
+
124
+ # ------------------------------------------------------------------
125
+ # Internal
126
+ # ------------------------------------------------------------------
127
+
128
+ def _cholesky_solve(self, A: np.ndarray, b: np.ndarray) -> np.ndarray:
129
+ """Solve A x = b via Cholesky decomposition of A."""
130
+ L = np.linalg.cholesky(A)
131
+ # Forward substitution: L z = b
132
+ z = np.linalg.solve(L, b)
133
+ # Back substitution: L^T x = z
134
+ return np.linalg.solve(L.T, z), L
135
+
136
+ # ------------------------------------------------------------------
137
+ # Public API
138
+ # ------------------------------------------------------------------
139
+
140
+ def fit(self, X: np.ndarray, y: np.ndarray) -> "GaussianProcessRegressor":
141
+ """
142
+ Fit GP to training data.
143
+
144
+ Parameters
145
+ ----------
146
+ X : ndarray of shape (n_samples, n_features) or (n_samples,)
147
+ y : ndarray of shape (n_samples,)
148
+ """
149
+ self.X_train_ = np.atleast_2d(X) if X.ndim == 1 else X
150
+ self.y_train_ = y.copy()
151
+
152
+ K = self.kernel(self.X_train_, self.X_train_)
153
+ K_noisy = K + self.noise_variance * np.eye(len(y))
154
+
155
+ self.alpha_, self.L_ = self._cholesky_solve(K_noisy, y)
156
+ return self
157
+
158
+ def predict(
159
+ self, X: np.ndarray, return_std: bool = False
160
+ ):
161
+ """
162
+ Predictive mean and (optionally) standard deviation.
163
+
164
+ Parameters
165
+ ----------
166
+ X : ndarray of shape (n_samples, n_features) or (n_samples,)
167
+ return_std : bool
168
+
169
+ Returns
170
+ -------
171
+ y_mean : ndarray of shape (n_samples,)
172
+ y_std : ndarray of shape (n_samples,) [only if return_std]
173
+ """
174
+ X_ = np.atleast_2d(X) if X.ndim == 1 else X
175
+ K_star = self.kernel(X_, self.X_train_) # (n_test, n_train)
176
+ y_mean = K_star @ self.alpha_
177
+
178
+ if not return_std:
179
+ return y_mean
180
+
181
+ # Predictive variance: diag(K** - K*^T (K+σ²I)^{-1} K*)
182
+ v = np.linalg.solve(self.L_, K_star.T) # (n_train, n_test)
183
+ K_ss = self.kernel(X_, X_)
184
+ var = np.diag(K_ss) - np.sum(v ** 2, axis=0)
185
+ return y_mean, np.sqrt(np.maximum(var, 0.0))
186
+
187
+ def sample_posterior(
188
+ self, X: np.ndarray, n_samples: int = 1, random_state=None
189
+ ) -> np.ndarray:
190
+ """
191
+ Draw samples from the posterior distribution.
192
+
193
+ Returns
194
+ -------
195
+ samples : ndarray of shape (n_samples, n_test_points)
196
+ """
197
+ rng = np.random.default_rng(random_state)
198
+ X_ = np.atleast_2d(X) if X.ndim == 1 else X
199
+ K_star = self.kernel(X_, self.X_train_)
200
+ K_ss = self.kernel(X_, X_)
201
+
202
+ mu = K_star @ self.alpha_
203
+ v = np.linalg.solve(self.L_, K_star.T)
204
+ cov = K_ss - v.T @ v
205
+ # Regularise
206
+ cov += 1e-10 * np.eye(len(mu))
207
+ return rng.multivariate_normal(mu, cov, size=n_samples)