scratchkit 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. mlscratch/__init__.py +56 -0
  2. mlscratch/__main__.py +118 -0
  3. mlscratch/bayesian/__init__.py +53 -0
  4. mlscratch/bayesian/bayesian_linear_regression.py +171 -0
  5. mlscratch/bayesian/bayesian_network.py +248 -0
  6. mlscratch/bayesian/bayesian_nn.py +315 -0
  7. mlscratch/bayesian/gaussian_process.py +207 -0
  8. mlscratch/bayesian/hmm.py +277 -0
  9. mlscratch/bayesian/init.py +52 -0
  10. mlscratch/bayesian/kalman_filter.py +182 -0
  11. mlscratch/bayesian/naive_bayes.py +209 -0
  12. mlscratch/metrics/__init__.py +59 -0
  13. mlscratch/metrics/classification.py +365 -0
  14. mlscratch/metrics/regression.py +79 -0
  15. mlscratch/neural/__init__.py +121 -0
  16. mlscratch/neural/attention.py +420 -0
  17. mlscratch/neural/autoencoder.py +543 -0
  18. mlscratch/neural/boltzmann.py +231 -0
  19. mlscratch/neural/cnn.py +593 -0
  20. mlscratch/neural/cvnn.py +322 -0
  21. mlscratch/neural/gan.py +364 -0
  22. mlscratch/neural/hopfield.py +193 -0
  23. mlscratch/neural/perceptron.py +398 -0
  24. mlscratch/neural/rbf_network.py +230 -0
  25. mlscratch/neural/recurrent.py +569 -0
  26. mlscratch/preprocessing/__init__.py +38 -0
  27. mlscratch/preprocessing/encoders.py +140 -0
  28. mlscratch/preprocessing/model_selection.py +119 -0
  29. mlscratch/preprocessing/polynomial.py +105 -0
  30. mlscratch/preprocessing/scalers.py +220 -0
  31. mlscratch/py.typed +0 -0
  32. mlscratch/reinforcement/__init__.py +59 -0
  33. mlscratch/reinforcement/ddpg.py +363 -0
  34. mlscratch/reinforcement/dqn.py +319 -0
  35. mlscratch/reinforcement/ppo.py +452 -0
  36. mlscratch/reinforcement/q_learning.py +352 -0
  37. mlscratch/reinforcement/sac.py +382 -0
  38. mlscratch/reinforcement/utils.py +594 -0
  39. mlscratch/supervised/__init__.py +76 -0
  40. mlscratch/supervised/_validation.py +50 -0
  41. mlscratch/supervised/adaboost.py +255 -0
  42. mlscratch/supervised/decision_tree.py +495 -0
  43. mlscratch/supervised/gradient_boosting.py +354 -0
  44. mlscratch/supervised/knn.py +234 -0
  45. mlscratch/supervised/lasso_regression.py +125 -0
  46. mlscratch/supervised/linear_models.py +459 -0
  47. mlscratch/supervised/linear_regression.py +197 -0
  48. mlscratch/supervised/logistic_regression.py +119 -0
  49. mlscratch/supervised/naive_bayes.py +113 -0
  50. mlscratch/supervised/random_forest.py +321 -0
  51. mlscratch/supervised/ridge_regression.py +93 -0
  52. mlscratch/supervised/svm.py +356 -0
  53. mlscratch/unsupervised/__init__.py +39 -0
  54. mlscratch/unsupervised/apriori.py +178 -0
  55. mlscratch/unsupervised/dbscan.py +141 -0
  56. mlscratch/unsupervised/gmm.py +204 -0
  57. mlscratch/unsupervised/hierarchical_clustering.py +137 -0
  58. mlscratch/unsupervised/ica.py +167 -0
  59. mlscratch/unsupervised/kmeans.py +135 -0
  60. mlscratch/unsupervised/kmedoids.py +133 -0
  61. mlscratch/unsupervised/pca.py +103 -0
  62. mlscratch/unsupervised/tsne.py +200 -0
  63. scratchkit-0.2.0.dist-info/METADATA +241 -0
  64. scratchkit-0.2.0.dist-info/RECORD +68 -0
  65. scratchkit-0.2.0.dist-info/WHEEL +5 -0
  66. scratchkit-0.2.0.dist-info/entry_points.txt +2 -0
  67. scratchkit-0.2.0.dist-info/licenses/LICENSE +201 -0
  68. scratchkit-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,193 @@
1
+ """
2
+ Hopfield Network
3
+ =================
4
+ A recurrent, fully-connected network that stores patterns as attractors
5
+ of an energy landscape (Hopfield, 1982). Used as a content-addressable
6
+ associative memory.
7
+
8
+ Storage (Hebbian learning rule)
9
+ ---------------------------------
10
+ For patterns p^(1), ..., p^(M) ∈ {-1, +1}^N:
11
+
12
+ W_ij = (1/N) Σ_μ p_i^(μ) p_j^(μ), W_ii = 0
13
+
14
+ Energy function
15
+ -----------------
16
+ E(s) = -½ Σ_ij W_ij s_i s_j
17
+
18
+ Recall dynamics
19
+ -----------------
20
+ Asynchronous update (one neuron at a time, random order) or synchronous
21
+ update (all neurons at once):
22
+
23
+ s_i ← sign(Σ_j W_ij s_j)
24
+
25
+ The network converges to a local minimum of E — ideally the nearest
26
+ stored pattern, enabling error correction / pattern completion.
27
+
28
+ Capacity
29
+ ---------
30
+ Approximately 0.138 N patterns can be stored reliably (Amit et al., 1985).
31
+
32
+ Reference
33
+ ----------
34
+ Hopfield, J. J. (1982). Neural networks and physical systems with emergent
35
+ collective computational abilities. PNAS, 79(8), 2554-2558.
36
+
37
+ Only numpy is used.
38
+ """
39
+
40
+ from __future__ import annotations
41
+
42
+ import numpy as np
43
+
44
+
45
+ class HopfieldNetwork:
46
+ """
47
+ Discrete Hopfield Network with bipolar {-1, +1} states.
48
+
49
+ Parameters
50
+ ----------
51
+ n_units : int
52
+ Number of neurons (= dimensionality of stored patterns).
53
+ random_state : int or None
54
+ Seed for asynchronous update ordering.
55
+ """
56
+
57
+ def __init__(self, n_units: int, random_state: int | None = None) -> None:
58
+ self.n_units = n_units
59
+ self.weights = np.zeros((n_units, n_units))
60
+ self._rng = np.random.default_rng(random_state)
61
+ self.n_patterns_stored_ = 0
62
+
63
+ # ------------------------------------------------------------------
64
+ # Storage
65
+ # ------------------------------------------------------------------
66
+
67
+ def fit(self, patterns: np.ndarray) -> "HopfieldNetwork":
68
+ """
69
+ Store patterns via the Hebbian outer-product rule.
70
+
71
+ Parameters
72
+ ----------
73
+ patterns : ndarray of shape (n_patterns, n_units)
74
+ Each row is a bipolar pattern with values in {-1, +1}.
75
+
76
+ Returns
77
+ -------
78
+ self
79
+ """
80
+ patterns = np.atleast_2d(patterns).astype(float)
81
+ if patterns.shape[1] != self.n_units:
82
+ raise ValueError(
83
+ f"Pattern dimension {patterns.shape[1]} != n_units {self.n_units}."
84
+ )
85
+
86
+ self.weights = (patterns.T @ patterns) / self.n_units
87
+ np.fill_diagonal(self.weights, 0.0)
88
+ self.n_patterns_stored_ = len(patterns)
89
+ return self
90
+
91
+ # ------------------------------------------------------------------
92
+ # Energy
93
+ # ------------------------------------------------------------------
94
+
95
+ def energy(self, state: np.ndarray) -> float:
96
+ """
97
+ Compute E(s) = -½ sᵗ W s.
98
+
99
+ Parameters
100
+ ----------
101
+ state : ndarray of shape (n_units,)
102
+
103
+ Returns
104
+ -------
105
+ float
106
+ """
107
+ return float(-0.5 * state @ self.weights @ state)
108
+
109
+ # ------------------------------------------------------------------
110
+ # Recall
111
+ # ------------------------------------------------------------------
112
+
113
+ def recall(
114
+ self,
115
+ state: np.ndarray,
116
+ mode: str = "async",
117
+ max_iter: int = 100,
118
+ ) -> np.ndarray:
119
+ """
120
+ Run network dynamics from an initial state to convergence.
121
+
122
+ Parameters
123
+ ----------
124
+ state : ndarray of shape (n_units,)
125
+ Initial state, values in {-1, +1} (or any sign-able reals).
126
+ mode : str
127
+ ``'async'`` — update one randomly-chosen neuron at a time
128
+ (classic Hopfield dynamics, guaranteed convergence
129
+ for symmetric W with zero diagonal).
130
+ ``'sync'`` — update all neurons simultaneously each step.
131
+ max_iter : int
132
+ Maximum number of update sweeps.
133
+
134
+ Returns
135
+ -------
136
+ ndarray of shape (n_units,) — converged state
137
+ """
138
+ if mode not in {"async", "sync"}:
139
+ raise ValueError("mode must be 'async' or 'sync'.")
140
+
141
+ s = np.sign(state.astype(float))
142
+ s[s == 0] = 1.0 # break ties
143
+
144
+ if mode == "sync":
145
+ for _ in range(max_iter):
146
+ s_new = np.sign(self.weights @ s)
147
+ s_new[s_new == 0] = 1.0
148
+ if np.array_equal(s_new, s):
149
+ break
150
+ s = s_new
151
+ return s
152
+
153
+ # Asynchronous updates
154
+ for _ in range(max_iter):
155
+ order = self._rng.permutation(self.n_units)
156
+ changed = False
157
+ for i in order:
158
+ activation = self.weights[i] @ s
159
+ new_val = 1.0 if activation >= 0 else -1.0
160
+ if new_val != s[i]:
161
+ s[i] = new_val
162
+ changed = True
163
+ if not changed:
164
+ break
165
+
166
+ return s
167
+
168
+ # ------------------------------------------------------------------
169
+ # Evaluation helpers
170
+ # ------------------------------------------------------------------
171
+
172
+ def is_stable(self, pattern: np.ndarray) -> bool:
173
+ """
174
+ Check whether ``pattern`` is a fixed point of the dynamics
175
+ (i.e. recall(pattern) == pattern).
176
+
177
+ Returns
178
+ -------
179
+ bool
180
+ """
181
+ recalled = self.recall(pattern.copy(), mode="sync", max_iter=1)
182
+ return bool(np.array_equal(recalled, np.sign(pattern)))
183
+
184
+ def hamming_distance(self, a: np.ndarray, b: np.ndarray) -> int:
185
+ """Number of differing bipolar units between two states."""
186
+ return int(np.sum(np.sign(a) != np.sign(b)))
187
+
188
+ def overlap(self, a: np.ndarray, b: np.ndarray) -> float:
189
+ """
190
+ Normalised overlap (similarity) between two bipolar states,
191
+ in [-1, 1]. +1 = identical, -1 = exact inverse, 0 = orthogonal.
192
+ """
193
+ return float(np.dot(np.sign(a), np.sign(b)) / self.n_units)
@@ -0,0 +1,398 @@
1
+ """
2
+ Perceptrons — Single-Layer and Multi-Layer
3
+ ===========================================
4
+ The foundational building blocks of neural networks.
5
+
6
+ SingleLayerPerceptron
7
+ ---------------------
8
+ A single layer of neurons with a configurable activation function.
9
+ Supports binary classification (sigmoid + binary cross-entropy) and
10
+ regression (linear + MSE), making the original two scripts a single
11
+ clean class with a ``task`` switch.
12
+
13
+ z = X W + b
14
+ ŷ = σ(z) # classification
15
+ ŷ = z # regression
16
+
17
+ Multi-Layer Perceptron
18
+ -----------------------
19
+ Fully-connected feedforward network with:
20
+ - Arbitrary depth via ``hidden_sizes``
21
+ - ReLU hidden activations
22
+ - Softmax output for multi-class classification
23
+ - Linear output for regression
24
+ - Mini-batch SGD with momentum
25
+ - He weight initialisation
26
+
27
+ References
28
+ ----------
29
+ Rosenblatt, F. (1958). The perceptron: a probabilistic model for information
30
+ storage and organization in the brain. Psychological Review, 65(6), 386–408.
31
+
32
+ Rumelhart, D. E., Hinton, G. E., & Williams, R. J. (1986). Learning
33
+ representations by back-propagating errors. Nature, 323, 533–536.
34
+
35
+ Only numpy is used.
36
+ """
37
+
38
+ from __future__ import annotations
39
+
40
+ import numpy as np
41
+
42
+
43
+ # ============================================================
44
+ # Activations (module-level helpers)
45
+ # ============================================================
46
+
47
+ def _sigmoid(x: np.ndarray) -> np.ndarray:
48
+ return 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
49
+
50
+
51
+ def _relu(x: np.ndarray) -> np.ndarray:
52
+ return np.maximum(0.0, x)
53
+
54
+
55
+ def _relu_grad(x: np.ndarray) -> np.ndarray:
56
+ return (x > 0.0).astype(float)
57
+
58
+
59
+ def _softmax(x: np.ndarray) -> np.ndarray:
60
+ e = np.exp(x - x.max(axis=-1, keepdims=True))
61
+ return e / e.sum(axis=-1, keepdims=True)
62
+
63
+
64
+ # ============================================================
65
+ # Single-Layer Perceptron
66
+ # ============================================================
67
+
68
+ class SingleLayerPerceptron:
69
+ """
70
+ Single-Layer Perceptron for binary classification or regression.
71
+
72
+ Parameters
73
+ ----------
74
+ input_size : int
75
+ Number of input features.
76
+ task : str
77
+ ``'classification'`` (sigmoid + binary cross-entropy) or
78
+ ``'regression'`` (linear + MSE).
79
+ learning_rate : float
80
+ Gradient-descent step size.
81
+ epochs : int
82
+ Number of full passes over the training data.
83
+ random_state : int or None
84
+ Seed for reproducible weight initialisation.
85
+ """
86
+
87
+ def __init__(
88
+ self,
89
+ input_size: int,
90
+ task: str = "classification",
91
+ learning_rate: float = 0.01,
92
+ epochs: int = 1000,
93
+ random_state: int | None = None,
94
+ ) -> None:
95
+ if task not in {"classification", "regression"}:
96
+ raise ValueError("task must be 'classification' or 'regression'.")
97
+ self.input_size = input_size
98
+ self.task = task
99
+ self.learning_rate = learning_rate
100
+ self.epochs = epochs
101
+ self._rng = np.random.default_rng(random_state)
102
+
103
+ # Parameters (initialised in fit)
104
+ self.weights_: np.ndarray | None = None
105
+ self.bias_: float | None = None
106
+ self.losses_: list[float] = []
107
+
108
+ # ------------------------------------------------------------------
109
+ # Internal helpers
110
+ # ------------------------------------------------------------------
111
+
112
+ def _activate(self, z: np.ndarray) -> np.ndarray:
113
+ return _sigmoid(z) if self.task == "classification" else z
114
+
115
+ def _loss(self, y: np.ndarray, y_hat: np.ndarray, eps: float = 1e-8) -> float:
116
+ if self.task == "classification":
117
+ return float(-np.mean(
118
+ y * np.log(y_hat + eps) + (1 - y) * np.log(1 - y_hat + eps)
119
+ ))
120
+ return float(np.mean((y_hat - y) ** 2))
121
+
122
+ # ------------------------------------------------------------------
123
+ # Public API
124
+ # ------------------------------------------------------------------
125
+
126
+ def fit(self, X: np.ndarray, y: np.ndarray) -> "SingleLayerPerceptron":
127
+ """
128
+ Train the perceptron on (X, y).
129
+
130
+ Parameters
131
+ ----------
132
+ X : ndarray of shape (n_samples, n_features)
133
+ y : ndarray of shape (n_samples,)
134
+
135
+ Returns
136
+ -------
137
+ self
138
+ """
139
+ n_samples = len(X)
140
+ scale = np.sqrt(2.0 / self.input_size)
141
+ self.weights_ = self._rng.normal(0, scale, self.input_size)
142
+ self.bias_ = 0.0
143
+ self.losses_ = []
144
+
145
+ for _ in range(self.epochs):
146
+ z = X @ self.weights_ + self.bias_
147
+ y_hat = self._activate(z)
148
+
149
+ self.losses_.append(self._loss(y, y_hat))
150
+
151
+ # Gradient (identical form for both tasks)
152
+ error = y_hat - y
153
+ dw = X.T @ error / n_samples
154
+ db = error.mean()
155
+
156
+ self.weights_ -= self.learning_rate * dw
157
+ self.bias_ -= self.learning_rate * db
158
+
159
+ return self
160
+
161
+ def predict(self, X: np.ndarray) -> np.ndarray:
162
+ """
163
+ Predict class labels (classification) or values (regression).
164
+
165
+ Returns
166
+ -------
167
+ ndarray of shape (n_samples,)
168
+ """
169
+ z = X @ self.weights_ + self.bias_
170
+ y_hat = self._activate(z)
171
+ if self.task == "classification":
172
+ return (y_hat >= 0.5).astype(int)
173
+ return y_hat
174
+
175
+ def predict_proba(self, X: np.ndarray) -> np.ndarray:
176
+ """
177
+ Return sigmoid probabilities (classification only).
178
+
179
+ Returns
180
+ -------
181
+ ndarray of shape (n_samples,)
182
+ """
183
+ if self.task != "classification":
184
+ raise ValueError("predict_proba is only available for classification.")
185
+ return _sigmoid(X @ self.weights_ + self.bias_)
186
+
187
+
188
+ # ============================================================
189
+ # Multi-Layer Perceptron
190
+ # ============================================================
191
+
192
+ class MultiLayerPerceptron:
193
+ """
194
+ Multi-Layer Perceptron (fully-connected feedforward network).
195
+
196
+ Parameters
197
+ ----------
198
+ hidden_sizes : list[int]
199
+ Sizes of the hidden layers (e.g. [64, 64]).
200
+ task : str
201
+ ``'classification'`` (softmax + cross-entropy) or
202
+ ``'regression'`` (linear + MSE).
203
+ n_classes : int
204
+ Number of output classes (ignored for regression).
205
+ learning_rate : float
206
+ momentum : float
207
+ Momentum coefficient for SGD (0 = vanilla SGD).
208
+ epochs : int
209
+ batch_size : int or None
210
+ Mini-batch size. None = full-batch.
211
+ random_state : int or None
212
+ """
213
+
214
+ def __init__(
215
+ self,
216
+ hidden_sizes: list[int] | None = None,
217
+ task: str = "classification",
218
+ n_classes: int = 2,
219
+ learning_rate: float = 0.01,
220
+ momentum: float = 0.9,
221
+ epochs: int = 200,
222
+ batch_size: int | None = 32,
223
+ random_state: int | None = None,
224
+ ) -> None:
225
+ if task not in {"classification", "regression"}:
226
+ raise ValueError("task must be 'classification' or 'regression'.")
227
+ self.hidden_sizes = hidden_sizes or [64, 64]
228
+ self.task = task
229
+ self.n_classes = n_classes
230
+ self.learning_rate = learning_rate
231
+ self.momentum = momentum
232
+ self.epochs = epochs
233
+ self.batch_size = batch_size
234
+ self._rng = np.random.default_rng(random_state)
235
+
236
+ # Built in fit()
237
+ self.weights_: list[np.ndarray] = []
238
+ self.biases_: list[np.ndarray] = []
239
+ self.losses_: list[float] = []
240
+
241
+ # ------------------------------------------------------------------
242
+ # Build
243
+ # ------------------------------------------------------------------
244
+
245
+ def _build(self, n_features: int) -> None:
246
+ n_out = 1 if self.task == "regression" else self.n_classes
247
+ sizes = [n_features] + list(self.hidden_sizes) + [n_out]
248
+
249
+ self.weights_ = []
250
+ self.biases_ = []
251
+ for i in range(len(sizes) - 1):
252
+ scale = np.sqrt(2.0 / sizes[i]) # He initialisation
253
+ self.weights_.append(self._rng.normal(0, scale, (sizes[i], sizes[i + 1])))
254
+ self.biases_.append(np.zeros(sizes[i + 1]))
255
+
256
+ # ------------------------------------------------------------------
257
+ # Forward
258
+ # ------------------------------------------------------------------
259
+
260
+ def _forward(self, X: np.ndarray) -> tuple[list, list]:
261
+ """Return (pre_activations, activations) for backprop."""
262
+ pre_acts, acts = [], [X]
263
+ a = X
264
+ for i, (W, b) in enumerate(zip(self.weights_, self.biases_)):
265
+ z = a @ W + b
266
+ pre_acts.append(z)
267
+ if i < len(self.weights_) - 1:
268
+ a = _relu(z)
269
+ else:
270
+ a = _softmax(z) if self.task == "classification" else z
271
+ acts.append(a)
272
+ return pre_acts, acts
273
+
274
+ # ------------------------------------------------------------------
275
+ # Loss
276
+ # ------------------------------------------------------------------
277
+
278
+ def _loss(self, y_hot: np.ndarray, y_hat: np.ndarray, eps: float = 1e-8) -> float:
279
+ if self.task == "classification":
280
+ return float(-np.mean(np.sum(y_hot * np.log(y_hat + eps), axis=1)))
281
+ return float(np.mean((y_hat.ravel() - y_hot.ravel()) ** 2))
282
+
283
+ # ------------------------------------------------------------------
284
+ # Backward
285
+ # ------------------------------------------------------------------
286
+
287
+ def _backward(
288
+ self,
289
+ pre_acts: list,
290
+ acts: list,
291
+ y_hot: np.ndarray,
292
+ vel_w: list,
293
+ vel_b: list,
294
+ ) -> None:
295
+ n = len(y_hot)
296
+ y_hat = acts[-1]
297
+
298
+ # Output delta
299
+ if self.task == "classification":
300
+ delta = (y_hat - y_hot) / n
301
+ else:
302
+ delta = 2.0 * (y_hat - y_hot) / n
303
+
304
+ for i in reversed(range(len(self.weights_))):
305
+ dW = acts[i].T @ delta
306
+ db = delta.sum(axis=0)
307
+
308
+ # Momentum update
309
+ vel_w[i] = self.momentum * vel_w[i] + self.learning_rate * dW
310
+ vel_b[i] = self.momentum * vel_b[i] + self.learning_rate * db
311
+
312
+ self.weights_[i] -= vel_w[i]
313
+ self.biases_[i] -= vel_b[i]
314
+
315
+ if i > 0:
316
+ delta = (delta @ self.weights_[i].T) * _relu_grad(pre_acts[i - 1])
317
+
318
+ # ------------------------------------------------------------------
319
+ # Public API
320
+ # ------------------------------------------------------------------
321
+
322
+ def fit(self, X: np.ndarray, y: np.ndarray) -> "MultiLayerPerceptron":
323
+ """
324
+ Train the MLP.
325
+
326
+ Parameters
327
+ ----------
328
+ X : ndarray of shape (n_samples, n_features)
329
+ y : ndarray of shape (n_samples,) — integer class labels or floats
330
+
331
+ Returns
332
+ -------
333
+ self
334
+ """
335
+ n_samples = len(X)
336
+ self._build(X.shape[1])
337
+
338
+ # One-hot encode targets for classification
339
+ if self.task == "classification":
340
+ n_cls = self.n_classes
341
+ y_hot = np.zeros((n_samples, n_cls))
342
+ y_hot[np.arange(n_samples), y.astype(int)] = 1.0
343
+ else:
344
+ y_hot = y.reshape(-1, 1).astype(float)
345
+
346
+ # Velocity buffers for momentum
347
+ vel_w = [np.zeros_like(w) for w in self.weights_]
348
+ vel_b = [np.zeros_like(b) for b in self.biases_]
349
+
350
+ bs = self.batch_size or n_samples
351
+ self.losses_ = []
352
+
353
+ for _ in range(self.epochs):
354
+ idx = self._rng.permutation(n_samples)
355
+ epoch_loss = 0.0
356
+ n_batches = 0
357
+
358
+ for start in range(0, n_samples, bs):
359
+ mb = idx[start:start + bs]
360
+ Xb = X[mb]
361
+ yb = y_hot[mb]
362
+
363
+ pre_acts, acts = self._forward(Xb)
364
+ epoch_loss += self._loss(yb, acts[-1])
365
+ n_batches += 1
366
+
367
+ self._backward(pre_acts, acts, yb, vel_w, vel_b)
368
+
369
+ self.losses_.append(epoch_loss / n_batches)
370
+
371
+ return self
372
+
373
+ def predict(self, X: np.ndarray) -> np.ndarray:
374
+ """
375
+ Predict class labels (classification) or values (regression).
376
+
377
+ Returns
378
+ -------
379
+ ndarray of shape (n_samples,)
380
+ """
381
+ _, acts = self._forward(X)
382
+ y_hat = acts[-1]
383
+ if self.task == "classification":
384
+ return np.argmax(y_hat, axis=1)
385
+ return y_hat.ravel()
386
+
387
+ def predict_proba(self, X: np.ndarray) -> np.ndarray:
388
+ """
389
+ Return softmax probabilities (classification only).
390
+
391
+ Returns
392
+ -------
393
+ ndarray of shape (n_samples, n_classes)
394
+ """
395
+ if self.task != "classification":
396
+ raise ValueError("predict_proba is only available for classification.")
397
+ _, acts = self._forward(X)
398
+ return acts[-1]