scratchkit 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. mlscratch/__init__.py +56 -0
  2. mlscratch/__main__.py +118 -0
  3. mlscratch/bayesian/__init__.py +53 -0
  4. mlscratch/bayesian/bayesian_linear_regression.py +171 -0
  5. mlscratch/bayesian/bayesian_network.py +248 -0
  6. mlscratch/bayesian/bayesian_nn.py +315 -0
  7. mlscratch/bayesian/gaussian_process.py +207 -0
  8. mlscratch/bayesian/hmm.py +277 -0
  9. mlscratch/bayesian/init.py +52 -0
  10. mlscratch/bayesian/kalman_filter.py +182 -0
  11. mlscratch/bayesian/naive_bayes.py +209 -0
  12. mlscratch/metrics/__init__.py +59 -0
  13. mlscratch/metrics/classification.py +365 -0
  14. mlscratch/metrics/regression.py +79 -0
  15. mlscratch/neural/__init__.py +121 -0
  16. mlscratch/neural/attention.py +420 -0
  17. mlscratch/neural/autoencoder.py +543 -0
  18. mlscratch/neural/boltzmann.py +231 -0
  19. mlscratch/neural/cnn.py +593 -0
  20. mlscratch/neural/cvnn.py +322 -0
  21. mlscratch/neural/gan.py +364 -0
  22. mlscratch/neural/hopfield.py +193 -0
  23. mlscratch/neural/perceptron.py +398 -0
  24. mlscratch/neural/rbf_network.py +230 -0
  25. mlscratch/neural/recurrent.py +569 -0
  26. mlscratch/preprocessing/__init__.py +38 -0
  27. mlscratch/preprocessing/encoders.py +140 -0
  28. mlscratch/preprocessing/model_selection.py +119 -0
  29. mlscratch/preprocessing/polynomial.py +105 -0
  30. mlscratch/preprocessing/scalers.py +220 -0
  31. mlscratch/py.typed +0 -0
  32. mlscratch/reinforcement/__init__.py +59 -0
  33. mlscratch/reinforcement/ddpg.py +363 -0
  34. mlscratch/reinforcement/dqn.py +319 -0
  35. mlscratch/reinforcement/ppo.py +452 -0
  36. mlscratch/reinforcement/q_learning.py +352 -0
  37. mlscratch/reinforcement/sac.py +382 -0
  38. mlscratch/reinforcement/utils.py +594 -0
  39. mlscratch/supervised/__init__.py +76 -0
  40. mlscratch/supervised/_validation.py +50 -0
  41. mlscratch/supervised/adaboost.py +255 -0
  42. mlscratch/supervised/decision_tree.py +495 -0
  43. mlscratch/supervised/gradient_boosting.py +354 -0
  44. mlscratch/supervised/knn.py +234 -0
  45. mlscratch/supervised/lasso_regression.py +125 -0
  46. mlscratch/supervised/linear_models.py +459 -0
  47. mlscratch/supervised/linear_regression.py +197 -0
  48. mlscratch/supervised/logistic_regression.py +119 -0
  49. mlscratch/supervised/naive_bayes.py +113 -0
  50. mlscratch/supervised/random_forest.py +321 -0
  51. mlscratch/supervised/ridge_regression.py +93 -0
  52. mlscratch/supervised/svm.py +356 -0
  53. mlscratch/unsupervised/__init__.py +39 -0
  54. mlscratch/unsupervised/apriori.py +178 -0
  55. mlscratch/unsupervised/dbscan.py +141 -0
  56. mlscratch/unsupervised/gmm.py +204 -0
  57. mlscratch/unsupervised/hierarchical_clustering.py +137 -0
  58. mlscratch/unsupervised/ica.py +167 -0
  59. mlscratch/unsupervised/kmeans.py +135 -0
  60. mlscratch/unsupervised/kmedoids.py +133 -0
  61. mlscratch/unsupervised/pca.py +103 -0
  62. mlscratch/unsupervised/tsne.py +200 -0
  63. scratchkit-0.2.0.dist-info/METADATA +241 -0
  64. scratchkit-0.2.0.dist-info/RECORD +68 -0
  65. scratchkit-0.2.0.dist-info/WHEEL +5 -0
  66. scratchkit-0.2.0.dist-info/entry_points.txt +2 -0
  67. scratchkit-0.2.0.dist-info/licenses/LICENSE +201 -0
  68. scratchkit-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,125 @@
1
+ """
2
+ Lasso Regression
3
+ ================
4
+
5
+ Lasso regression using coordinate descent and an explicit intercept.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import numpy as np
11
+ from numpy.typing import ArrayLike, NDArray
12
+
13
+ FloatArray = NDArray[np.float64]
14
+
15
+
16
+ def _validate_regression_inputs(
17
+ X: ArrayLike, y: ArrayLike,
18
+ ) -> tuple[FloatArray, FloatArray]:
19
+ X_arr = np.asarray(X, dtype=float)
20
+ y_arr = np.asarray(y, dtype=float).flatten()
21
+ if X_arr.ndim != 2:
22
+ raise ValueError("X must be a 2D array of shape (n_samples, n_features).")
23
+ if X_arr.shape[0] != y_arr.shape[0]:
24
+ raise ValueError(
25
+ f"X has {X_arr.shape[0]} samples but y has {y_arr.shape[0]}."
26
+ )
27
+ return X_arr, y_arr
28
+
29
+
30
+ class LassoRegression:
31
+ """Lasso regression using coordinate descent.
32
+
33
+ Parameters
34
+ ----------
35
+ alpha : float, default=1.0
36
+ Regularization strength for the L1 penalty.
37
+ max_iter : int, default=1000
38
+ Maximum number of coordinate descent iterations.
39
+ tol : float, default=1e-4
40
+ Convergence threshold for coefficient updates.
41
+
42
+ Attributes
43
+ ----------
44
+ coef_ : FloatArray
45
+ Estimated coefficients for each feature.
46
+ intercept_ : float
47
+ Estimated intercept term.
48
+ loss_history_ : list[float]
49
+ Training loss on each iteration.
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ alpha: float = 1.0,
55
+ max_iter: int = 1000,
56
+ tol: float = 1e-4,
57
+ ) -> None:
58
+ self.alpha = float(alpha)
59
+ self.max_iter = int(max_iter)
60
+ self.tol = float(tol)
61
+ self.coef_: FloatArray | None = None
62
+ self.intercept_: float | None = None
63
+ self.loss_history_: list[float] = []
64
+ self.feature_means_: FloatArray | None = None
65
+ self.y_mean_: float | None = None
66
+
67
+ def fit(self, X: ArrayLike, y: ArrayLike) -> "LassoRegression":
68
+ """Fit the Lasso regression model to the training data."""
69
+ X_arr, y_arr = _validate_regression_inputs(X, y)
70
+ n_samples, n_features = X_arr.shape
71
+ self.feature_means_ = np.mean(X_arr, axis=0)
72
+ self.y_mean_ = np.mean(y_arr)
73
+ X_centered = X_arr - self.feature_means_
74
+ y_centered = y_arr - self.y_mean_
75
+
76
+ self.coef_ = np.zeros(n_features, dtype=np.float64)
77
+ self.intercept_ = 0.0
78
+ self.loss_history_ = []
79
+
80
+ X_norm_sq = np.sum(X_centered**2, axis=0) / n_samples
81
+ X_norm_sq = np.where(X_norm_sq == 0.0, 1.0, X_norm_sq)
82
+
83
+ for iteration in range(self.max_iter):
84
+ coef_old = self.coef_.copy()
85
+
86
+ for j in range(n_features):
87
+ residual = y_centered - (X_centered @ self.coef_ - X_centered[:, j] * self.coef_[j])
88
+ rho = (X_centered[:, j] @ residual) / n_samples
89
+ if rho < -self.alpha:
90
+ self.coef_[j] = (rho + self.alpha) / X_norm_sq[j]
91
+ elif rho > self.alpha:
92
+ self.coef_[j] = (rho - self.alpha) / X_norm_sq[j]
93
+ else:
94
+ self.coef_[j] = 0.0
95
+
96
+ max_coef_change = np.max(np.abs(self.coef_ - coef_old))
97
+ self.intercept_ = self.y_mean_ - float(self.feature_means_ @ self.coef_)
98
+ loss = self._objective(X_arr, y_arr)
99
+ self.loss_history_.append(float(loss))
100
+ if max_coef_change < self.tol:
101
+ break
102
+
103
+ return self
104
+
105
+ def predict(self, X: ArrayLike) -> FloatArray:
106
+ """Predict using the fitted Lasso model."""
107
+ if self.coef_ is None or self.intercept_ is None:
108
+ raise RuntimeError("Call fit() before predict().")
109
+ X_arr = np.asarray(X, dtype=float)
110
+ if X_arr.ndim != 2:
111
+ raise ValueError("X must be a 2D array.")
112
+ return (X_arr @ self.coef_ + self.intercept_).astype(np.float64)
113
+
114
+ def score(self, X: ArrayLike, y: ArrayLike) -> float:
115
+ """Return R² of the fitted model on the given data."""
116
+ X_arr, y_arr = _validate_regression_inputs(X, y)
117
+ y_pred = self.predict(X_arr)
118
+ ss_res = np.sum((y_arr - y_pred) ** 2)
119
+ ss_tot = np.sum((y_arr - np.mean(y_arr)) ** 2)
120
+ return float(1.0 - ss_res / ss_tot) if ss_tot > 0 else 0.0
121
+
122
+ def _objective(self, X: FloatArray, y: FloatArray) -> float:
123
+ y_pred = X @ self.coef_ + self.intercept_
124
+ mse = np.mean((y - y_pred) ** 2) / 2.0
125
+ return float(mse + self.alpha * np.sum(np.abs(self.coef_)))
@@ -0,0 +1,459 @@
1
+ """
2
+ Linear Models
3
+ =============
4
+ Five regularised linear models implemented from scratch.
5
+
6
+ LinearRegression
7
+ -----------------
8
+ Ordinary Least Squares via the normal equations and optional gradient descent.
9
+ L(w) = ‖y - Xw‖²
10
+
11
+ RidgeRegression (L2)
12
+ ----------------------
13
+ Adds an L2 penalty on weights:
14
+ L(w) = ‖y - Xw‖² + α‖w‖²
15
+ Closed-form: w = (XᵀX + αI)⁻¹ Xᵀy
16
+
17
+ LassoRegression (L1)
18
+ ----------------------
19
+ Adds an L1 penalty; solved via coordinate descent:
20
+ L(w) = ‖y - Xw‖² + α Σ|wⱼ|
21
+
22
+ ElasticNet (L1 + L2)
23
+ ----------------------
24
+ L(w) = ‖y - Xw‖² + α·ρ‖w‖₁ + α·(1-ρ)/2·‖w‖²
25
+ Coordinate descent, same convergence guarantee as Lasso.
26
+
27
+ LogisticRegression
28
+ -------------------
29
+ Binary or multi-class (OvR) with L2 regularisation:
30
+ Binary: p = σ(Xw + b)
31
+ Multi: P = softmax(XW + b)
32
+ Trained by mini-batch gradient descent.
33
+
34
+ Only numpy and Python stdlib are used.
35
+ """
36
+
37
+ from __future__ import annotations
38
+ import numpy as np
39
+
40
+
41
+ # ────────────────────────────────────────────────────────────────
42
+ # Helpers
43
+ # ────────────────────────────────────────────────────────────────
44
+
45
+ def _sigmoid(x: np.ndarray) -> np.ndarray:
46
+ return 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
47
+
48
+ def _softmax(x: np.ndarray) -> np.ndarray:
49
+ e = np.exp(x - x.max(axis=-1, keepdims=True))
50
+ return e / e.sum(axis=-1, keepdims=True)
51
+
52
+ def _add_bias(X: np.ndarray) -> np.ndarray:
53
+ return np.column_stack([np.ones(len(X)), X])
54
+
55
+
56
+ # ────────────────────────────────────────────────────────────────
57
+ # LinearRegression
58
+ # ────────────────────────────────────────────────────────────────
59
+
60
+ class LinearRegression:
61
+ """
62
+ Ordinary Least Squares Linear Regression.
63
+
64
+ Solver ``'exact'``: closed-form normal equations w = (XᵀX)⁻¹ Xᵀy.
65
+ Solver ``'sgd'``: mini-batch stochastic gradient descent.
66
+
67
+ Parameters
68
+ ----------
69
+ fit_intercept : bool
70
+ solver : str 'exact' | 'sgd'
71
+ learning_rate : float (sgd only)
72
+ epochs : int (sgd only)
73
+ batch_size : int | None
74
+ random_state : int | None
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ fit_intercept: bool = True,
80
+ solver: str = "exact",
81
+ learning_rate: float = 0.01,
82
+ epochs: int = 1000,
83
+ batch_size: int | None = 32,
84
+ random_state: int | None = None,
85
+ ) -> None:
86
+ if solver not in {"exact", "sgd"}:
87
+ raise ValueError("solver must be 'exact' or 'sgd'.")
88
+ self.fit_intercept = fit_intercept
89
+ self.solver = solver
90
+ self.learning_rate = learning_rate
91
+ self.epochs = epochs
92
+ self.batch_size = batch_size
93
+ self._rng = np.random.default_rng(random_state)
94
+
95
+ self.coef_: np.ndarray | None = None
96
+ self.intercept_: float = 0.0
97
+ self.losses_: list[float] = []
98
+
99
+ def _prepare(self, X: np.ndarray) -> np.ndarray:
100
+ return _add_bias(X) if self.fit_intercept else X
101
+
102
+ def fit(self, X: np.ndarray, y: np.ndarray) -> "LinearRegression":
103
+ Xp = self._prepare(X)
104
+ n, p = Xp.shape
105
+ self.losses_ = []
106
+
107
+ if self.solver == "exact":
108
+ # w = (XᵀX)⁻¹ Xᵀy — regularised with tiny ridge for stability
109
+ A = Xp.T @ Xp + 1e-12 * np.eye(p)
110
+ w = np.linalg.solve(A, Xp.T @ y)
111
+ else:
112
+ w = np.zeros(p)
113
+ bs = self.batch_size or n
114
+ for _ in range(self.epochs):
115
+ idx = self._rng.permutation(n)
116
+ ep_loss = 0.0
117
+ for start in range(0, n, bs):
118
+ mb = idx[start:start + bs]
119
+ Xb, yb = Xp[mb], y[mb]
120
+ resid = Xb @ w - yb
121
+ w -= self.learning_rate * 2 * Xb.T @ resid / len(mb)
122
+ ep_loss += float(np.mean(resid ** 2))
123
+ self.losses_.append(ep_loss / max(1, n // bs))
124
+
125
+ if self.fit_intercept:
126
+ self.intercept_ = float(w[0])
127
+ self.coef_ = w[1:]
128
+ else:
129
+ self.intercept_ = 0.0
130
+ self.coef_ = w
131
+ return self
132
+
133
+ def predict(self, X: np.ndarray) -> np.ndarray:
134
+ return X @ self.coef_ + self.intercept_
135
+
136
+ @property
137
+ def r2_score(self) -> float | None:
138
+ return None # computed externally; stub for API completeness
139
+
140
+
141
+ # ────────────────────────────────────────────────────────────────
142
+ # RidgeRegression
143
+ # ────────────────────────────────────────────────────────────────
144
+
145
+ class RidgeRegression:
146
+ """
147
+ L2-regularised linear regression (Ridge).
148
+
149
+ Parameters
150
+ ----------
151
+ alpha : float regularisation strength
152
+ fit_intercept : bool
153
+ """
154
+
155
+ def __init__(
156
+ self,
157
+ alpha: float = 1.0,
158
+ fit_intercept: bool = True,
159
+ ) -> None:
160
+ self.alpha = alpha
161
+ self.fit_intercept = fit_intercept
162
+ self.coef_: np.ndarray | None = None
163
+ self.intercept_: float = 0.0
164
+
165
+ def fit(self, X: np.ndarray, y: np.ndarray) -> "RidgeRegression":
166
+ Xp = _add_bias(X) if self.fit_intercept else X
167
+ n, p = Xp.shape
168
+ # Bias column is NOT penalised — zero-out its λ row/col
169
+ reg = self.alpha * np.eye(p)
170
+ if self.fit_intercept:
171
+ reg[0, 0] = 0.0
172
+ w = np.linalg.solve(Xp.T @ Xp + reg, Xp.T @ y)
173
+ if self.fit_intercept:
174
+ self.intercept_ = float(w[0])
175
+ self.coef_ = w[1:]
176
+ else:
177
+ self.intercept_ = 0.0
178
+ self.coef_ = w
179
+ return self
180
+
181
+ def predict(self, X: np.ndarray) -> np.ndarray:
182
+ return X @ self.coef_ + self.intercept_
183
+
184
+
185
+ # ────────────────────────────────────────────────────────────────
186
+ # LassoRegression
187
+ # ────────────────────────────────────────────────────────────────
188
+
189
+ class LassoRegression:
190
+ """
191
+ L1-regularised linear regression (Lasso) via coordinate descent.
192
+
193
+ Parameters
194
+ ----------
195
+ alpha : float regularisation strength
196
+ fit_intercept : bool
197
+ max_iter : int
198
+ tol : float coordinate descent convergence tolerance
199
+ """
200
+
201
+ def __init__(
202
+ self,
203
+ alpha: float = 1.0,
204
+ fit_intercept: bool = True,
205
+ max_iter: int = 1000,
206
+ tol: float = 1e-4,
207
+ ) -> None:
208
+ self.alpha = alpha
209
+ self.fit_intercept = fit_intercept
210
+ self.max_iter = max_iter
211
+ self.tol = tol
212
+ self.coef_: np.ndarray | None = None
213
+ self.intercept_: float = 0.0
214
+ self.n_iter_: int = 0
215
+
216
+ @staticmethod
217
+ def _soft_threshold(rho: float, alpha: float) -> float:
218
+ """Coordinate descent closed-form update: S(ρ, α)."""
219
+ if rho > alpha:
220
+ return rho - alpha
221
+ if rho < -alpha:
222
+ return rho + alpha
223
+ return 0.0
224
+
225
+ def fit(self, X: np.ndarray, y: np.ndarray) -> "LassoRegression":
226
+ n, p = X.shape
227
+ # Centring (intercept handled by shifting)
228
+ if self.fit_intercept:
229
+ X_mean = X.mean(axis=0)
230
+ y_mean = float(y.mean())
231
+ Xc = X - X_mean
232
+ yc = y - y_mean
233
+ else:
234
+ Xc, yc, X_mean, y_mean = X, y, np.zeros(p), 0.0
235
+
236
+ w = np.zeros(p)
237
+
238
+ for it in range(self.max_iter):
239
+ w_old = w.copy()
240
+ for j in range(p):
241
+ # Partial residual (exclude feature j)
242
+ r_j = yc - Xc @ w + Xc[:, j] * w[j]
243
+ rho = float(Xc[:, j] @ r_j) / (Xc[:, j] @ Xc[:, j] + 1e-12)
244
+ w[j] = self._soft_threshold(rho, self.alpha / (2 * n))
245
+ self.n_iter_ = it + 1
246
+ if np.max(np.abs(w - w_old)) < self.tol:
247
+ break
248
+
249
+ self.coef_ = w
250
+ if self.fit_intercept:
251
+ self.intercept_ = float(y_mean - X_mean @ w)
252
+ else:
253
+ self.intercept_ = 0.0
254
+ return self
255
+
256
+ def predict(self, X: np.ndarray) -> np.ndarray:
257
+ return X @ self.coef_ + self.intercept_
258
+
259
+
260
+ # ────────────────────────────────────────────────────────────────
261
+ # ElasticNet
262
+ # ────────────────────────────────────────────────────────────────
263
+
264
+ class ElasticNet:
265
+ """
266
+ Elastic-Net regression (L1 + L2) via coordinate descent.
267
+
268
+ Parameters
269
+ ----------
270
+ alpha : float total regularisation strength
271
+ l1_ratio : float ρ ∈ [0,1]; 0 = Ridge, 1 = Lasso
272
+ fit_intercept : bool
273
+ max_iter : int
274
+ tol : float
275
+ """
276
+
277
+ def __init__(
278
+ self,
279
+ alpha: float = 1.0,
280
+ l1_ratio: float = 0.5,
281
+ fit_intercept: bool = True,
282
+ max_iter: int = 1000,
283
+ tol: float = 1e-4,
284
+ ) -> None:
285
+ self.alpha = alpha
286
+ self.l1_ratio = l1_ratio
287
+ self.fit_intercept = fit_intercept
288
+ self.max_iter = max_iter
289
+ self.tol = tol
290
+ self.coef_: np.ndarray | None = None
291
+ self.intercept_: float = 0.0
292
+ self.n_iter_: int = 0
293
+
294
+ def fit(self, X: np.ndarray, y: np.ndarray) -> "ElasticNet":
295
+ n, p = X.shape
296
+ if self.fit_intercept:
297
+ X_mean, y_mean = X.mean(axis=0), float(y.mean())
298
+ Xc, yc = X - X_mean, y - y_mean
299
+ else:
300
+ Xc, yc, X_mean, y_mean = X, y, np.zeros(p), 0.0
301
+
302
+ w = np.zeros(p)
303
+ alpha_l1 = self.alpha * self.l1_ratio
304
+ alpha_l2 = self.alpha * (1 - self.l1_ratio)
305
+
306
+ for it in range(self.max_iter):
307
+ w_old = w.copy()
308
+ for j in range(p):
309
+ r_j = yc - Xc @ w + Xc[:, j] * w[j]
310
+ rho = float(Xc[:, j] @ r_j) / n
311
+ denom = float(Xc[:, j] @ Xc[:, j]) / n + alpha_l2
312
+ # Coordinate update: soft-threshold then scale by L2 denominator
313
+ if rho > alpha_l1:
314
+ w[j] = (rho - alpha_l1) / denom
315
+ elif rho < -alpha_l1:
316
+ w[j] = (rho + alpha_l1) / denom
317
+ else:
318
+ w[j] = 0.0
319
+ self.n_iter_ = it + 1
320
+ if np.max(np.abs(w - w_old)) < self.tol:
321
+ break
322
+
323
+ self.coef_ = w
324
+ if self.fit_intercept:
325
+ self.intercept_ = float(y_mean - X_mean @ w)
326
+ else:
327
+ self.intercept_ = 0.0
328
+ return self
329
+
330
+ def predict(self, X: np.ndarray) -> np.ndarray:
331
+ return X @ self.coef_ + self.intercept_
332
+
333
+
334
+ # ────────────────────────────────────────────────────────────────
335
+ # LogisticRegression
336
+ # ────────────────────────────────────────────────────────────────
337
+
338
+ class LogisticRegression:
339
+ """
340
+ Logistic Regression (binary and multi-class OvR).
341
+
342
+ Parameters
343
+ ----------
344
+ C : float inverse regularisation strength (larger = less reg)
345
+ fit_intercept : bool
346
+ multi_class : str 'binary' (auto-detected) or 'ovr'
347
+ learning_rate : float
348
+ epochs : int
349
+ batch_size : int | None
350
+ tol : float early-stop on loss change
351
+ random_state : int | None
352
+ """
353
+
354
+ def __init__(
355
+ self,
356
+ C: float = 1.0,
357
+ fit_intercept: bool = True,
358
+ multi_class: str = "auto",
359
+ learning_rate: float = 0.1,
360
+ epochs: int = 200,
361
+ batch_size: int | None = 32,
362
+ tol: float = 1e-4,
363
+ random_state: int | None = None,
364
+ ) -> None:
365
+ self.C = C
366
+ self.fit_intercept = fit_intercept
367
+ self.multi_class = multi_class
368
+ self.learning_rate = learning_rate
369
+ self.epochs = epochs
370
+ self.batch_size = batch_size
371
+ self.tol = tol
372
+ self._rng = np.random.default_rng(random_state)
373
+
374
+ self.classes_: np.ndarray | None = None
375
+ self.coef_: np.ndarray | None = None # (n_classes, n_features) or (n_features,)
376
+ self.intercept_: np.ndarray | None = None # (n_classes,) or scalar
377
+ self.losses_: list[float] = []
378
+
379
+ def fit(self, X: np.ndarray, y: np.ndarray) -> "LogisticRegression":
380
+ self.classes_ = np.unique(y)
381
+ K = len(self.classes_)
382
+ n, p = X.shape
383
+
384
+ if K == 2 or self.multi_class == "binary":
385
+ self._fit_binary(X, (y == self.classes_[1]).astype(float), n, p)
386
+ else:
387
+ self._fit_ovr(X, y, n, p, K)
388
+ return self
389
+
390
+ def _fit_binary(self, X, y, n, p):
391
+ w = np.zeros(p)
392
+ b = 0.0
393
+ bs = self.batch_size or n
394
+ prev_loss = np.inf
395
+ self.losses_ = []
396
+
397
+ for _ in range(self.epochs):
398
+ idx = self._rng.permutation(n)
399
+ ep_loss = 0.0
400
+ n_batches = 0
401
+ for start in range(0, n, bs):
402
+ mb = idx[start:start + bs]
403
+ Xb, yb = X[mb], y[mb]
404
+ p_hat = _sigmoid(Xb @ w + b)
405
+ # Gradient with L2 regularisation (C = 1/lambda)
406
+ eps = 1e-8
407
+ loss = -np.mean(yb * np.log(p_hat + eps) +
408
+ (1 - yb) * np.log(1 - p_hat + eps))
409
+ err = p_hat - yb
410
+ dw = Xb.T @ err / len(mb) + w / (self.C * n)
411
+ db = err.mean()
412
+ w -= self.learning_rate * dw
413
+ b -= self.learning_rate * db
414
+ ep_loss += loss
415
+ n_batches += 1
416
+ ep_loss /= n_batches
417
+ self.losses_.append(ep_loss)
418
+ if abs(prev_loss - ep_loss) < self.tol:
419
+ break
420
+ prev_loss = ep_loss
421
+
422
+ self.coef_ = w
423
+ self.intercept_ = np.array([b])
424
+
425
+ def _fit_ovr(self, X, y, n, p, K):
426
+ """One-vs-Rest: train K binary classifiers."""
427
+ self.coef_ = np.zeros((K, p))
428
+ self.intercept_ = np.zeros(K)
429
+ for k, cls in enumerate(self.classes_):
430
+ y_bin = (y == cls).astype(float)
431
+ w = np.zeros(p); b = 0.0
432
+ bs = self.batch_size or n
433
+ for _ in range(self.epochs):
434
+ idx = self._rng.permutation(n)
435
+ for start in range(0, n, bs):
436
+ mb = idx[start:start + bs]
437
+ Xb, yb = X[mb], y_bin[mb]
438
+ p_hat = _sigmoid(Xb @ w + b)
439
+ err = p_hat - yb
440
+ w -= self.learning_rate * (Xb.T @ err / len(mb) + w / (self.C * n))
441
+ b -= self.learning_rate * err.mean()
442
+ self.coef_[k] = w
443
+ self.intercept_[k] = b
444
+
445
+ def decision_function(self, X: np.ndarray) -> np.ndarray:
446
+ if self.coef_.ndim == 1:
447
+ return X @ self.coef_ + self.intercept_[0]
448
+ return X @ self.coef_.T + self.intercept_
449
+
450
+ def predict_proba(self, X: np.ndarray) -> np.ndarray:
451
+ scores = self.decision_function(X)
452
+ if self.coef_.ndim == 1:
453
+ p1 = _sigmoid(scores)
454
+ return np.column_stack([1 - p1, p1])
455
+ return _softmax(scores)
456
+
457
+ def predict(self, X: np.ndarray) -> np.ndarray:
458
+ proba = self.predict_proba(X)
459
+ return self.classes_[np.argmax(proba, axis=1)]