scratchkit 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlscratch/__init__.py +56 -0
- mlscratch/__main__.py +118 -0
- mlscratch/bayesian/__init__.py +53 -0
- mlscratch/bayesian/bayesian_linear_regression.py +171 -0
- mlscratch/bayesian/bayesian_network.py +248 -0
- mlscratch/bayesian/bayesian_nn.py +315 -0
- mlscratch/bayesian/gaussian_process.py +207 -0
- mlscratch/bayesian/hmm.py +277 -0
- mlscratch/bayesian/init.py +52 -0
- mlscratch/bayesian/kalman_filter.py +182 -0
- mlscratch/bayesian/naive_bayes.py +209 -0
- mlscratch/metrics/__init__.py +59 -0
- mlscratch/metrics/classification.py +365 -0
- mlscratch/metrics/regression.py +79 -0
- mlscratch/neural/__init__.py +121 -0
- mlscratch/neural/attention.py +420 -0
- mlscratch/neural/autoencoder.py +543 -0
- mlscratch/neural/boltzmann.py +231 -0
- mlscratch/neural/cnn.py +593 -0
- mlscratch/neural/cvnn.py +322 -0
- mlscratch/neural/gan.py +364 -0
- mlscratch/neural/hopfield.py +193 -0
- mlscratch/neural/perceptron.py +398 -0
- mlscratch/neural/rbf_network.py +230 -0
- mlscratch/neural/recurrent.py +569 -0
- mlscratch/preprocessing/__init__.py +38 -0
- mlscratch/preprocessing/encoders.py +140 -0
- mlscratch/preprocessing/model_selection.py +119 -0
- mlscratch/preprocessing/polynomial.py +105 -0
- mlscratch/preprocessing/scalers.py +220 -0
- mlscratch/py.typed +0 -0
- mlscratch/reinforcement/__init__.py +59 -0
- mlscratch/reinforcement/ddpg.py +363 -0
- mlscratch/reinforcement/dqn.py +319 -0
- mlscratch/reinforcement/ppo.py +452 -0
- mlscratch/reinforcement/q_learning.py +352 -0
- mlscratch/reinforcement/sac.py +382 -0
- mlscratch/reinforcement/utils.py +594 -0
- mlscratch/supervised/__init__.py +76 -0
- mlscratch/supervised/_validation.py +50 -0
- mlscratch/supervised/adaboost.py +255 -0
- mlscratch/supervised/decision_tree.py +495 -0
- mlscratch/supervised/gradient_boosting.py +354 -0
- mlscratch/supervised/knn.py +234 -0
- mlscratch/supervised/lasso_regression.py +125 -0
- mlscratch/supervised/linear_models.py +459 -0
- mlscratch/supervised/linear_regression.py +197 -0
- mlscratch/supervised/logistic_regression.py +119 -0
- mlscratch/supervised/naive_bayes.py +113 -0
- mlscratch/supervised/random_forest.py +321 -0
- mlscratch/supervised/ridge_regression.py +93 -0
- mlscratch/supervised/svm.py +356 -0
- mlscratch/unsupervised/__init__.py +39 -0
- mlscratch/unsupervised/apriori.py +178 -0
- mlscratch/unsupervised/dbscan.py +141 -0
- mlscratch/unsupervised/gmm.py +204 -0
- mlscratch/unsupervised/hierarchical_clustering.py +137 -0
- mlscratch/unsupervised/ica.py +167 -0
- mlscratch/unsupervised/kmeans.py +135 -0
- mlscratch/unsupervised/kmedoids.py +133 -0
- mlscratch/unsupervised/pca.py +103 -0
- mlscratch/unsupervised/tsne.py +200 -0
- scratchkit-0.2.0.dist-info/METADATA +241 -0
- scratchkit-0.2.0.dist-info/RECORD +68 -0
- scratchkit-0.2.0.dist-info/WHEEL +5 -0
- scratchkit-0.2.0.dist-info/entry_points.txt +2 -0
- scratchkit-0.2.0.dist-info/licenses/LICENSE +201 -0
- scratchkit-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lasso Regression
|
|
3
|
+
================
|
|
4
|
+
|
|
5
|
+
Lasso regression using coordinate descent and an explicit intercept.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from numpy.typing import ArrayLike, NDArray
|
|
12
|
+
|
|
13
|
+
FloatArray = NDArray[np.float64]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _validate_regression_inputs(
|
|
17
|
+
X: ArrayLike, y: ArrayLike,
|
|
18
|
+
) -> tuple[FloatArray, FloatArray]:
|
|
19
|
+
X_arr = np.asarray(X, dtype=float)
|
|
20
|
+
y_arr = np.asarray(y, dtype=float).flatten()
|
|
21
|
+
if X_arr.ndim != 2:
|
|
22
|
+
raise ValueError("X must be a 2D array of shape (n_samples, n_features).")
|
|
23
|
+
if X_arr.shape[0] != y_arr.shape[0]:
|
|
24
|
+
raise ValueError(
|
|
25
|
+
f"X has {X_arr.shape[0]} samples but y has {y_arr.shape[0]}."
|
|
26
|
+
)
|
|
27
|
+
return X_arr, y_arr
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class LassoRegression:
|
|
31
|
+
"""Lasso regression using coordinate descent.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
alpha : float, default=1.0
|
|
36
|
+
Regularization strength for the L1 penalty.
|
|
37
|
+
max_iter : int, default=1000
|
|
38
|
+
Maximum number of coordinate descent iterations.
|
|
39
|
+
tol : float, default=1e-4
|
|
40
|
+
Convergence threshold for coefficient updates.
|
|
41
|
+
|
|
42
|
+
Attributes
|
|
43
|
+
----------
|
|
44
|
+
coef_ : FloatArray
|
|
45
|
+
Estimated coefficients for each feature.
|
|
46
|
+
intercept_ : float
|
|
47
|
+
Estimated intercept term.
|
|
48
|
+
loss_history_ : list[float]
|
|
49
|
+
Training loss on each iteration.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
alpha: float = 1.0,
|
|
55
|
+
max_iter: int = 1000,
|
|
56
|
+
tol: float = 1e-4,
|
|
57
|
+
) -> None:
|
|
58
|
+
self.alpha = float(alpha)
|
|
59
|
+
self.max_iter = int(max_iter)
|
|
60
|
+
self.tol = float(tol)
|
|
61
|
+
self.coef_: FloatArray | None = None
|
|
62
|
+
self.intercept_: float | None = None
|
|
63
|
+
self.loss_history_: list[float] = []
|
|
64
|
+
self.feature_means_: FloatArray | None = None
|
|
65
|
+
self.y_mean_: float | None = None
|
|
66
|
+
|
|
67
|
+
def fit(self, X: ArrayLike, y: ArrayLike) -> "LassoRegression":
|
|
68
|
+
"""Fit the Lasso regression model to the training data."""
|
|
69
|
+
X_arr, y_arr = _validate_regression_inputs(X, y)
|
|
70
|
+
n_samples, n_features = X_arr.shape
|
|
71
|
+
self.feature_means_ = np.mean(X_arr, axis=0)
|
|
72
|
+
self.y_mean_ = np.mean(y_arr)
|
|
73
|
+
X_centered = X_arr - self.feature_means_
|
|
74
|
+
y_centered = y_arr - self.y_mean_
|
|
75
|
+
|
|
76
|
+
self.coef_ = np.zeros(n_features, dtype=np.float64)
|
|
77
|
+
self.intercept_ = 0.0
|
|
78
|
+
self.loss_history_ = []
|
|
79
|
+
|
|
80
|
+
X_norm_sq = np.sum(X_centered**2, axis=0) / n_samples
|
|
81
|
+
X_norm_sq = np.where(X_norm_sq == 0.0, 1.0, X_norm_sq)
|
|
82
|
+
|
|
83
|
+
for iteration in range(self.max_iter):
|
|
84
|
+
coef_old = self.coef_.copy()
|
|
85
|
+
|
|
86
|
+
for j in range(n_features):
|
|
87
|
+
residual = y_centered - (X_centered @ self.coef_ - X_centered[:, j] * self.coef_[j])
|
|
88
|
+
rho = (X_centered[:, j] @ residual) / n_samples
|
|
89
|
+
if rho < -self.alpha:
|
|
90
|
+
self.coef_[j] = (rho + self.alpha) / X_norm_sq[j]
|
|
91
|
+
elif rho > self.alpha:
|
|
92
|
+
self.coef_[j] = (rho - self.alpha) / X_norm_sq[j]
|
|
93
|
+
else:
|
|
94
|
+
self.coef_[j] = 0.0
|
|
95
|
+
|
|
96
|
+
max_coef_change = np.max(np.abs(self.coef_ - coef_old))
|
|
97
|
+
self.intercept_ = self.y_mean_ - float(self.feature_means_ @ self.coef_)
|
|
98
|
+
loss = self._objective(X_arr, y_arr)
|
|
99
|
+
self.loss_history_.append(float(loss))
|
|
100
|
+
if max_coef_change < self.tol:
|
|
101
|
+
break
|
|
102
|
+
|
|
103
|
+
return self
|
|
104
|
+
|
|
105
|
+
def predict(self, X: ArrayLike) -> FloatArray:
|
|
106
|
+
"""Predict using the fitted Lasso model."""
|
|
107
|
+
if self.coef_ is None or self.intercept_ is None:
|
|
108
|
+
raise RuntimeError("Call fit() before predict().")
|
|
109
|
+
X_arr = np.asarray(X, dtype=float)
|
|
110
|
+
if X_arr.ndim != 2:
|
|
111
|
+
raise ValueError("X must be a 2D array.")
|
|
112
|
+
return (X_arr @ self.coef_ + self.intercept_).astype(np.float64)
|
|
113
|
+
|
|
114
|
+
def score(self, X: ArrayLike, y: ArrayLike) -> float:
|
|
115
|
+
"""Return R² of the fitted model on the given data."""
|
|
116
|
+
X_arr, y_arr = _validate_regression_inputs(X, y)
|
|
117
|
+
y_pred = self.predict(X_arr)
|
|
118
|
+
ss_res = np.sum((y_arr - y_pred) ** 2)
|
|
119
|
+
ss_tot = np.sum((y_arr - np.mean(y_arr)) ** 2)
|
|
120
|
+
return float(1.0 - ss_res / ss_tot) if ss_tot > 0 else 0.0
|
|
121
|
+
|
|
122
|
+
def _objective(self, X: FloatArray, y: FloatArray) -> float:
|
|
123
|
+
y_pred = X @ self.coef_ + self.intercept_
|
|
124
|
+
mse = np.mean((y - y_pred) ** 2) / 2.0
|
|
125
|
+
return float(mse + self.alpha * np.sum(np.abs(self.coef_)))
|
|
@@ -0,0 +1,459 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Linear Models
|
|
3
|
+
=============
|
|
4
|
+
Five regularised linear models implemented from scratch.
|
|
5
|
+
|
|
6
|
+
LinearRegression
|
|
7
|
+
-----------------
|
|
8
|
+
Ordinary Least Squares via the normal equations and optional gradient descent.
|
|
9
|
+
L(w) = ‖y - Xw‖²
|
|
10
|
+
|
|
11
|
+
RidgeRegression (L2)
|
|
12
|
+
----------------------
|
|
13
|
+
Adds an L2 penalty on weights:
|
|
14
|
+
L(w) = ‖y - Xw‖² + α‖w‖²
|
|
15
|
+
Closed-form: w = (XᵀX + αI)⁻¹ Xᵀy
|
|
16
|
+
|
|
17
|
+
LassoRegression (L1)
|
|
18
|
+
----------------------
|
|
19
|
+
Adds an L1 penalty; solved via coordinate descent:
|
|
20
|
+
L(w) = ‖y - Xw‖² + α Σ|wⱼ|
|
|
21
|
+
|
|
22
|
+
ElasticNet (L1 + L2)
|
|
23
|
+
----------------------
|
|
24
|
+
L(w) = ‖y - Xw‖² + α·ρ‖w‖₁ + α·(1-ρ)/2·‖w‖²
|
|
25
|
+
Coordinate descent, same convergence guarantee as Lasso.
|
|
26
|
+
|
|
27
|
+
LogisticRegression
|
|
28
|
+
-------------------
|
|
29
|
+
Binary or multi-class (OvR) with L2 regularisation:
|
|
30
|
+
Binary: p = σ(Xw + b)
|
|
31
|
+
Multi: P = softmax(XW + b)
|
|
32
|
+
Trained by mini-batch gradient descent.
|
|
33
|
+
|
|
34
|
+
Only numpy and Python stdlib are used.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
import numpy as np
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ────────────────────────────────────────────────────────────────
|
|
42
|
+
# Helpers
|
|
43
|
+
# ────────────────────────────────────────────────────────────────
|
|
44
|
+
|
|
45
|
+
def _sigmoid(x: np.ndarray) -> np.ndarray:
|
|
46
|
+
return 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
|
|
47
|
+
|
|
48
|
+
def _softmax(x: np.ndarray) -> np.ndarray:
|
|
49
|
+
e = np.exp(x - x.max(axis=-1, keepdims=True))
|
|
50
|
+
return e / e.sum(axis=-1, keepdims=True)
|
|
51
|
+
|
|
52
|
+
def _add_bias(X: np.ndarray) -> np.ndarray:
|
|
53
|
+
return np.column_stack([np.ones(len(X)), X])
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# ────────────────────────────────────────────────────────────────
|
|
57
|
+
# LinearRegression
|
|
58
|
+
# ────────────────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
class LinearRegression:
|
|
61
|
+
"""
|
|
62
|
+
Ordinary Least Squares Linear Regression.
|
|
63
|
+
|
|
64
|
+
Solver ``'exact'``: closed-form normal equations w = (XᵀX)⁻¹ Xᵀy.
|
|
65
|
+
Solver ``'sgd'``: mini-batch stochastic gradient descent.
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
fit_intercept : bool
|
|
70
|
+
solver : str 'exact' | 'sgd'
|
|
71
|
+
learning_rate : float (sgd only)
|
|
72
|
+
epochs : int (sgd only)
|
|
73
|
+
batch_size : int | None
|
|
74
|
+
random_state : int | None
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
fit_intercept: bool = True,
|
|
80
|
+
solver: str = "exact",
|
|
81
|
+
learning_rate: float = 0.01,
|
|
82
|
+
epochs: int = 1000,
|
|
83
|
+
batch_size: int | None = 32,
|
|
84
|
+
random_state: int | None = None,
|
|
85
|
+
) -> None:
|
|
86
|
+
if solver not in {"exact", "sgd"}:
|
|
87
|
+
raise ValueError("solver must be 'exact' or 'sgd'.")
|
|
88
|
+
self.fit_intercept = fit_intercept
|
|
89
|
+
self.solver = solver
|
|
90
|
+
self.learning_rate = learning_rate
|
|
91
|
+
self.epochs = epochs
|
|
92
|
+
self.batch_size = batch_size
|
|
93
|
+
self._rng = np.random.default_rng(random_state)
|
|
94
|
+
|
|
95
|
+
self.coef_: np.ndarray | None = None
|
|
96
|
+
self.intercept_: float = 0.0
|
|
97
|
+
self.losses_: list[float] = []
|
|
98
|
+
|
|
99
|
+
def _prepare(self, X: np.ndarray) -> np.ndarray:
|
|
100
|
+
return _add_bias(X) if self.fit_intercept else X
|
|
101
|
+
|
|
102
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> "LinearRegression":
|
|
103
|
+
Xp = self._prepare(X)
|
|
104
|
+
n, p = Xp.shape
|
|
105
|
+
self.losses_ = []
|
|
106
|
+
|
|
107
|
+
if self.solver == "exact":
|
|
108
|
+
# w = (XᵀX)⁻¹ Xᵀy — regularised with tiny ridge for stability
|
|
109
|
+
A = Xp.T @ Xp + 1e-12 * np.eye(p)
|
|
110
|
+
w = np.linalg.solve(A, Xp.T @ y)
|
|
111
|
+
else:
|
|
112
|
+
w = np.zeros(p)
|
|
113
|
+
bs = self.batch_size or n
|
|
114
|
+
for _ in range(self.epochs):
|
|
115
|
+
idx = self._rng.permutation(n)
|
|
116
|
+
ep_loss = 0.0
|
|
117
|
+
for start in range(0, n, bs):
|
|
118
|
+
mb = idx[start:start + bs]
|
|
119
|
+
Xb, yb = Xp[mb], y[mb]
|
|
120
|
+
resid = Xb @ w - yb
|
|
121
|
+
w -= self.learning_rate * 2 * Xb.T @ resid / len(mb)
|
|
122
|
+
ep_loss += float(np.mean(resid ** 2))
|
|
123
|
+
self.losses_.append(ep_loss / max(1, n // bs))
|
|
124
|
+
|
|
125
|
+
if self.fit_intercept:
|
|
126
|
+
self.intercept_ = float(w[0])
|
|
127
|
+
self.coef_ = w[1:]
|
|
128
|
+
else:
|
|
129
|
+
self.intercept_ = 0.0
|
|
130
|
+
self.coef_ = w
|
|
131
|
+
return self
|
|
132
|
+
|
|
133
|
+
def predict(self, X: np.ndarray) -> np.ndarray:
|
|
134
|
+
return X @ self.coef_ + self.intercept_
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def r2_score(self) -> float | None:
|
|
138
|
+
return None # computed externally; stub for API completeness
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# ────────────────────────────────────────────────────────────────
|
|
142
|
+
# RidgeRegression
|
|
143
|
+
# ────────────────────────────────────────────────────────────────
|
|
144
|
+
|
|
145
|
+
class RidgeRegression:
|
|
146
|
+
"""
|
|
147
|
+
L2-regularised linear regression (Ridge).
|
|
148
|
+
|
|
149
|
+
Parameters
|
|
150
|
+
----------
|
|
151
|
+
alpha : float regularisation strength
|
|
152
|
+
fit_intercept : bool
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
def __init__(
|
|
156
|
+
self,
|
|
157
|
+
alpha: float = 1.0,
|
|
158
|
+
fit_intercept: bool = True,
|
|
159
|
+
) -> None:
|
|
160
|
+
self.alpha = alpha
|
|
161
|
+
self.fit_intercept = fit_intercept
|
|
162
|
+
self.coef_: np.ndarray | None = None
|
|
163
|
+
self.intercept_: float = 0.0
|
|
164
|
+
|
|
165
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> "RidgeRegression":
|
|
166
|
+
Xp = _add_bias(X) if self.fit_intercept else X
|
|
167
|
+
n, p = Xp.shape
|
|
168
|
+
# Bias column is NOT penalised — zero-out its λ row/col
|
|
169
|
+
reg = self.alpha * np.eye(p)
|
|
170
|
+
if self.fit_intercept:
|
|
171
|
+
reg[0, 0] = 0.0
|
|
172
|
+
w = np.linalg.solve(Xp.T @ Xp + reg, Xp.T @ y)
|
|
173
|
+
if self.fit_intercept:
|
|
174
|
+
self.intercept_ = float(w[0])
|
|
175
|
+
self.coef_ = w[1:]
|
|
176
|
+
else:
|
|
177
|
+
self.intercept_ = 0.0
|
|
178
|
+
self.coef_ = w
|
|
179
|
+
return self
|
|
180
|
+
|
|
181
|
+
def predict(self, X: np.ndarray) -> np.ndarray:
|
|
182
|
+
return X @ self.coef_ + self.intercept_
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
# ────────────────────────────────────────────────────────────────
|
|
186
|
+
# LassoRegression
|
|
187
|
+
# ────────────────────────────────────────────────────────────────
|
|
188
|
+
|
|
189
|
+
class LassoRegression:
|
|
190
|
+
"""
|
|
191
|
+
L1-regularised linear regression (Lasso) via coordinate descent.
|
|
192
|
+
|
|
193
|
+
Parameters
|
|
194
|
+
----------
|
|
195
|
+
alpha : float regularisation strength
|
|
196
|
+
fit_intercept : bool
|
|
197
|
+
max_iter : int
|
|
198
|
+
tol : float coordinate descent convergence tolerance
|
|
199
|
+
"""
|
|
200
|
+
|
|
201
|
+
def __init__(
|
|
202
|
+
self,
|
|
203
|
+
alpha: float = 1.0,
|
|
204
|
+
fit_intercept: bool = True,
|
|
205
|
+
max_iter: int = 1000,
|
|
206
|
+
tol: float = 1e-4,
|
|
207
|
+
) -> None:
|
|
208
|
+
self.alpha = alpha
|
|
209
|
+
self.fit_intercept = fit_intercept
|
|
210
|
+
self.max_iter = max_iter
|
|
211
|
+
self.tol = tol
|
|
212
|
+
self.coef_: np.ndarray | None = None
|
|
213
|
+
self.intercept_: float = 0.0
|
|
214
|
+
self.n_iter_: int = 0
|
|
215
|
+
|
|
216
|
+
@staticmethod
|
|
217
|
+
def _soft_threshold(rho: float, alpha: float) -> float:
|
|
218
|
+
"""Coordinate descent closed-form update: S(ρ, α)."""
|
|
219
|
+
if rho > alpha:
|
|
220
|
+
return rho - alpha
|
|
221
|
+
if rho < -alpha:
|
|
222
|
+
return rho + alpha
|
|
223
|
+
return 0.0
|
|
224
|
+
|
|
225
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> "LassoRegression":
|
|
226
|
+
n, p = X.shape
|
|
227
|
+
# Centring (intercept handled by shifting)
|
|
228
|
+
if self.fit_intercept:
|
|
229
|
+
X_mean = X.mean(axis=0)
|
|
230
|
+
y_mean = float(y.mean())
|
|
231
|
+
Xc = X - X_mean
|
|
232
|
+
yc = y - y_mean
|
|
233
|
+
else:
|
|
234
|
+
Xc, yc, X_mean, y_mean = X, y, np.zeros(p), 0.0
|
|
235
|
+
|
|
236
|
+
w = np.zeros(p)
|
|
237
|
+
|
|
238
|
+
for it in range(self.max_iter):
|
|
239
|
+
w_old = w.copy()
|
|
240
|
+
for j in range(p):
|
|
241
|
+
# Partial residual (exclude feature j)
|
|
242
|
+
r_j = yc - Xc @ w + Xc[:, j] * w[j]
|
|
243
|
+
rho = float(Xc[:, j] @ r_j) / (Xc[:, j] @ Xc[:, j] + 1e-12)
|
|
244
|
+
w[j] = self._soft_threshold(rho, self.alpha / (2 * n))
|
|
245
|
+
self.n_iter_ = it + 1
|
|
246
|
+
if np.max(np.abs(w - w_old)) < self.tol:
|
|
247
|
+
break
|
|
248
|
+
|
|
249
|
+
self.coef_ = w
|
|
250
|
+
if self.fit_intercept:
|
|
251
|
+
self.intercept_ = float(y_mean - X_mean @ w)
|
|
252
|
+
else:
|
|
253
|
+
self.intercept_ = 0.0
|
|
254
|
+
return self
|
|
255
|
+
|
|
256
|
+
def predict(self, X: np.ndarray) -> np.ndarray:
|
|
257
|
+
return X @ self.coef_ + self.intercept_
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# ────────────────────────────────────────────────────────────────
|
|
261
|
+
# ElasticNet
|
|
262
|
+
# ────────────────────────────────────────────────────────────────
|
|
263
|
+
|
|
264
|
+
class ElasticNet:
|
|
265
|
+
"""
|
|
266
|
+
Elastic-Net regression (L1 + L2) via coordinate descent.
|
|
267
|
+
|
|
268
|
+
Parameters
|
|
269
|
+
----------
|
|
270
|
+
alpha : float total regularisation strength
|
|
271
|
+
l1_ratio : float ρ ∈ [0,1]; 0 = Ridge, 1 = Lasso
|
|
272
|
+
fit_intercept : bool
|
|
273
|
+
max_iter : int
|
|
274
|
+
tol : float
|
|
275
|
+
"""
|
|
276
|
+
|
|
277
|
+
def __init__(
|
|
278
|
+
self,
|
|
279
|
+
alpha: float = 1.0,
|
|
280
|
+
l1_ratio: float = 0.5,
|
|
281
|
+
fit_intercept: bool = True,
|
|
282
|
+
max_iter: int = 1000,
|
|
283
|
+
tol: float = 1e-4,
|
|
284
|
+
) -> None:
|
|
285
|
+
self.alpha = alpha
|
|
286
|
+
self.l1_ratio = l1_ratio
|
|
287
|
+
self.fit_intercept = fit_intercept
|
|
288
|
+
self.max_iter = max_iter
|
|
289
|
+
self.tol = tol
|
|
290
|
+
self.coef_: np.ndarray | None = None
|
|
291
|
+
self.intercept_: float = 0.0
|
|
292
|
+
self.n_iter_: int = 0
|
|
293
|
+
|
|
294
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> "ElasticNet":
|
|
295
|
+
n, p = X.shape
|
|
296
|
+
if self.fit_intercept:
|
|
297
|
+
X_mean, y_mean = X.mean(axis=0), float(y.mean())
|
|
298
|
+
Xc, yc = X - X_mean, y - y_mean
|
|
299
|
+
else:
|
|
300
|
+
Xc, yc, X_mean, y_mean = X, y, np.zeros(p), 0.0
|
|
301
|
+
|
|
302
|
+
w = np.zeros(p)
|
|
303
|
+
alpha_l1 = self.alpha * self.l1_ratio
|
|
304
|
+
alpha_l2 = self.alpha * (1 - self.l1_ratio)
|
|
305
|
+
|
|
306
|
+
for it in range(self.max_iter):
|
|
307
|
+
w_old = w.copy()
|
|
308
|
+
for j in range(p):
|
|
309
|
+
r_j = yc - Xc @ w + Xc[:, j] * w[j]
|
|
310
|
+
rho = float(Xc[:, j] @ r_j) / n
|
|
311
|
+
denom = float(Xc[:, j] @ Xc[:, j]) / n + alpha_l2
|
|
312
|
+
# Coordinate update: soft-threshold then scale by L2 denominator
|
|
313
|
+
if rho > alpha_l1:
|
|
314
|
+
w[j] = (rho - alpha_l1) / denom
|
|
315
|
+
elif rho < -alpha_l1:
|
|
316
|
+
w[j] = (rho + alpha_l1) / denom
|
|
317
|
+
else:
|
|
318
|
+
w[j] = 0.0
|
|
319
|
+
self.n_iter_ = it + 1
|
|
320
|
+
if np.max(np.abs(w - w_old)) < self.tol:
|
|
321
|
+
break
|
|
322
|
+
|
|
323
|
+
self.coef_ = w
|
|
324
|
+
if self.fit_intercept:
|
|
325
|
+
self.intercept_ = float(y_mean - X_mean @ w)
|
|
326
|
+
else:
|
|
327
|
+
self.intercept_ = 0.0
|
|
328
|
+
return self
|
|
329
|
+
|
|
330
|
+
def predict(self, X: np.ndarray) -> np.ndarray:
|
|
331
|
+
return X @ self.coef_ + self.intercept_
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
# ────────────────────────────────────────────────────────────────
|
|
335
|
+
# LogisticRegression
|
|
336
|
+
# ────────────────────────────────────────────────────────────────
|
|
337
|
+
|
|
338
|
+
class LogisticRegression:
|
|
339
|
+
"""
|
|
340
|
+
Logistic Regression (binary and multi-class OvR).
|
|
341
|
+
|
|
342
|
+
Parameters
|
|
343
|
+
----------
|
|
344
|
+
C : float inverse regularisation strength (larger = less reg)
|
|
345
|
+
fit_intercept : bool
|
|
346
|
+
multi_class : str 'binary' (auto-detected) or 'ovr'
|
|
347
|
+
learning_rate : float
|
|
348
|
+
epochs : int
|
|
349
|
+
batch_size : int | None
|
|
350
|
+
tol : float early-stop on loss change
|
|
351
|
+
random_state : int | None
|
|
352
|
+
"""
|
|
353
|
+
|
|
354
|
+
def __init__(
|
|
355
|
+
self,
|
|
356
|
+
C: float = 1.0,
|
|
357
|
+
fit_intercept: bool = True,
|
|
358
|
+
multi_class: str = "auto",
|
|
359
|
+
learning_rate: float = 0.1,
|
|
360
|
+
epochs: int = 200,
|
|
361
|
+
batch_size: int | None = 32,
|
|
362
|
+
tol: float = 1e-4,
|
|
363
|
+
random_state: int | None = None,
|
|
364
|
+
) -> None:
|
|
365
|
+
self.C = C
|
|
366
|
+
self.fit_intercept = fit_intercept
|
|
367
|
+
self.multi_class = multi_class
|
|
368
|
+
self.learning_rate = learning_rate
|
|
369
|
+
self.epochs = epochs
|
|
370
|
+
self.batch_size = batch_size
|
|
371
|
+
self.tol = tol
|
|
372
|
+
self._rng = np.random.default_rng(random_state)
|
|
373
|
+
|
|
374
|
+
self.classes_: np.ndarray | None = None
|
|
375
|
+
self.coef_: np.ndarray | None = None # (n_classes, n_features) or (n_features,)
|
|
376
|
+
self.intercept_: np.ndarray | None = None # (n_classes,) or scalar
|
|
377
|
+
self.losses_: list[float] = []
|
|
378
|
+
|
|
379
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> "LogisticRegression":
|
|
380
|
+
self.classes_ = np.unique(y)
|
|
381
|
+
K = len(self.classes_)
|
|
382
|
+
n, p = X.shape
|
|
383
|
+
|
|
384
|
+
if K == 2 or self.multi_class == "binary":
|
|
385
|
+
self._fit_binary(X, (y == self.classes_[1]).astype(float), n, p)
|
|
386
|
+
else:
|
|
387
|
+
self._fit_ovr(X, y, n, p, K)
|
|
388
|
+
return self
|
|
389
|
+
|
|
390
|
+
def _fit_binary(self, X, y, n, p):
|
|
391
|
+
w = np.zeros(p)
|
|
392
|
+
b = 0.0
|
|
393
|
+
bs = self.batch_size or n
|
|
394
|
+
prev_loss = np.inf
|
|
395
|
+
self.losses_ = []
|
|
396
|
+
|
|
397
|
+
for _ in range(self.epochs):
|
|
398
|
+
idx = self._rng.permutation(n)
|
|
399
|
+
ep_loss = 0.0
|
|
400
|
+
n_batches = 0
|
|
401
|
+
for start in range(0, n, bs):
|
|
402
|
+
mb = idx[start:start + bs]
|
|
403
|
+
Xb, yb = X[mb], y[mb]
|
|
404
|
+
p_hat = _sigmoid(Xb @ w + b)
|
|
405
|
+
# Gradient with L2 regularisation (C = 1/lambda)
|
|
406
|
+
eps = 1e-8
|
|
407
|
+
loss = -np.mean(yb * np.log(p_hat + eps) +
|
|
408
|
+
(1 - yb) * np.log(1 - p_hat + eps))
|
|
409
|
+
err = p_hat - yb
|
|
410
|
+
dw = Xb.T @ err / len(mb) + w / (self.C * n)
|
|
411
|
+
db = err.mean()
|
|
412
|
+
w -= self.learning_rate * dw
|
|
413
|
+
b -= self.learning_rate * db
|
|
414
|
+
ep_loss += loss
|
|
415
|
+
n_batches += 1
|
|
416
|
+
ep_loss /= n_batches
|
|
417
|
+
self.losses_.append(ep_loss)
|
|
418
|
+
if abs(prev_loss - ep_loss) < self.tol:
|
|
419
|
+
break
|
|
420
|
+
prev_loss = ep_loss
|
|
421
|
+
|
|
422
|
+
self.coef_ = w
|
|
423
|
+
self.intercept_ = np.array([b])
|
|
424
|
+
|
|
425
|
+
def _fit_ovr(self, X, y, n, p, K):
|
|
426
|
+
"""One-vs-Rest: train K binary classifiers."""
|
|
427
|
+
self.coef_ = np.zeros((K, p))
|
|
428
|
+
self.intercept_ = np.zeros(K)
|
|
429
|
+
for k, cls in enumerate(self.classes_):
|
|
430
|
+
y_bin = (y == cls).astype(float)
|
|
431
|
+
w = np.zeros(p); b = 0.0
|
|
432
|
+
bs = self.batch_size or n
|
|
433
|
+
for _ in range(self.epochs):
|
|
434
|
+
idx = self._rng.permutation(n)
|
|
435
|
+
for start in range(0, n, bs):
|
|
436
|
+
mb = idx[start:start + bs]
|
|
437
|
+
Xb, yb = X[mb], y_bin[mb]
|
|
438
|
+
p_hat = _sigmoid(Xb @ w + b)
|
|
439
|
+
err = p_hat - yb
|
|
440
|
+
w -= self.learning_rate * (Xb.T @ err / len(mb) + w / (self.C * n))
|
|
441
|
+
b -= self.learning_rate * err.mean()
|
|
442
|
+
self.coef_[k] = w
|
|
443
|
+
self.intercept_[k] = b
|
|
444
|
+
|
|
445
|
+
def decision_function(self, X: np.ndarray) -> np.ndarray:
|
|
446
|
+
if self.coef_.ndim == 1:
|
|
447
|
+
return X @ self.coef_ + self.intercept_[0]
|
|
448
|
+
return X @ self.coef_.T + self.intercept_
|
|
449
|
+
|
|
450
|
+
def predict_proba(self, X: np.ndarray) -> np.ndarray:
|
|
451
|
+
scores = self.decision_function(X)
|
|
452
|
+
if self.coef_.ndim == 1:
|
|
453
|
+
p1 = _sigmoid(scores)
|
|
454
|
+
return np.column_stack([1 - p1, p1])
|
|
455
|
+
return _softmax(scores)
|
|
456
|
+
|
|
457
|
+
def predict(self, X: np.ndarray) -> np.ndarray:
|
|
458
|
+
proba = self.predict_proba(X)
|
|
459
|
+
return self.classes_[np.argmax(proba, axis=1)]
|