scratchkit 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. mlscratch/__init__.py +56 -0
  2. mlscratch/__main__.py +118 -0
  3. mlscratch/bayesian/__init__.py +53 -0
  4. mlscratch/bayesian/bayesian_linear_regression.py +171 -0
  5. mlscratch/bayesian/bayesian_network.py +248 -0
  6. mlscratch/bayesian/bayesian_nn.py +315 -0
  7. mlscratch/bayesian/gaussian_process.py +207 -0
  8. mlscratch/bayesian/hmm.py +277 -0
  9. mlscratch/bayesian/init.py +52 -0
  10. mlscratch/bayesian/kalman_filter.py +182 -0
  11. mlscratch/bayesian/naive_bayes.py +209 -0
  12. mlscratch/metrics/__init__.py +59 -0
  13. mlscratch/metrics/classification.py +365 -0
  14. mlscratch/metrics/regression.py +79 -0
  15. mlscratch/neural/__init__.py +121 -0
  16. mlscratch/neural/attention.py +420 -0
  17. mlscratch/neural/autoencoder.py +543 -0
  18. mlscratch/neural/boltzmann.py +231 -0
  19. mlscratch/neural/cnn.py +593 -0
  20. mlscratch/neural/cvnn.py +322 -0
  21. mlscratch/neural/gan.py +364 -0
  22. mlscratch/neural/hopfield.py +193 -0
  23. mlscratch/neural/perceptron.py +398 -0
  24. mlscratch/neural/rbf_network.py +230 -0
  25. mlscratch/neural/recurrent.py +569 -0
  26. mlscratch/preprocessing/__init__.py +38 -0
  27. mlscratch/preprocessing/encoders.py +140 -0
  28. mlscratch/preprocessing/model_selection.py +119 -0
  29. mlscratch/preprocessing/polynomial.py +105 -0
  30. mlscratch/preprocessing/scalers.py +220 -0
  31. mlscratch/py.typed +0 -0
  32. mlscratch/reinforcement/__init__.py +59 -0
  33. mlscratch/reinforcement/ddpg.py +363 -0
  34. mlscratch/reinforcement/dqn.py +319 -0
  35. mlscratch/reinforcement/ppo.py +452 -0
  36. mlscratch/reinforcement/q_learning.py +352 -0
  37. mlscratch/reinforcement/sac.py +382 -0
  38. mlscratch/reinforcement/utils.py +594 -0
  39. mlscratch/supervised/__init__.py +76 -0
  40. mlscratch/supervised/_validation.py +50 -0
  41. mlscratch/supervised/adaboost.py +255 -0
  42. mlscratch/supervised/decision_tree.py +495 -0
  43. mlscratch/supervised/gradient_boosting.py +354 -0
  44. mlscratch/supervised/knn.py +234 -0
  45. mlscratch/supervised/lasso_regression.py +125 -0
  46. mlscratch/supervised/linear_models.py +459 -0
  47. mlscratch/supervised/linear_regression.py +197 -0
  48. mlscratch/supervised/logistic_regression.py +119 -0
  49. mlscratch/supervised/naive_bayes.py +113 -0
  50. mlscratch/supervised/random_forest.py +321 -0
  51. mlscratch/supervised/ridge_regression.py +93 -0
  52. mlscratch/supervised/svm.py +356 -0
  53. mlscratch/unsupervised/__init__.py +39 -0
  54. mlscratch/unsupervised/apriori.py +178 -0
  55. mlscratch/unsupervised/dbscan.py +141 -0
  56. mlscratch/unsupervised/gmm.py +204 -0
  57. mlscratch/unsupervised/hierarchical_clustering.py +137 -0
  58. mlscratch/unsupervised/ica.py +167 -0
  59. mlscratch/unsupervised/kmeans.py +135 -0
  60. mlscratch/unsupervised/kmedoids.py +133 -0
  61. mlscratch/unsupervised/pca.py +103 -0
  62. mlscratch/unsupervised/tsne.py +200 -0
  63. scratchkit-0.2.0.dist-info/METADATA +241 -0
  64. scratchkit-0.2.0.dist-info/RECORD +68 -0
  65. scratchkit-0.2.0.dist-info/WHEEL +5 -0
  66. scratchkit-0.2.0.dist-info/entry_points.txt +2 -0
  67. scratchkit-0.2.0.dist-info/licenses/LICENSE +201 -0
  68. scratchkit-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,277 @@
1
+ """
2
+ Hidden Markov Model (HMM)
3
+ ==========================
4
+ Discrete-observation HMM with the three fundamental algorithms:
5
+
6
+ 1. Forward algorithm — compute P(observations | model)
7
+ 2. Viterbi algorithm — find the most likely hidden state sequence
8
+ 3. Baum-Welch algorithm — EM to estimate transition, emission, and initial
9
+ state probabilities from observation sequences
10
+
11
+ Notation
12
+ --------
13
+ N : number of hidden states
14
+ M : number of distinct observation symbols
15
+ T : length of an observation sequence
16
+
17
+ Parameters
18
+ ----------
19
+ A : transition matrix (N, N), A[i,j] = P(s_t=j | s_{t-1}=i)
20
+ B : emission matrix (N, M), B[i,k] = P(o_t=k | s_t=i)
21
+ pi : initial state distribution (N,), pi[i] = P(s_1=i)
22
+
23
+ All computations are performed in log-space where possible.
24
+ Only numpy is used.
25
+ """
26
+
27
+ import numpy as np
28
+
29
+
30
+ class HiddenMarkovModel:
31
+ """
32
+ Discrete Hidden Markov Model.
33
+
34
+ Parameters
35
+ ----------
36
+ n_states : int
37
+ Number of hidden states N.
38
+ n_observations : int
39
+ Size of the observation alphabet M.
40
+ random_state : int or None
41
+ Seed for parameter initialisation.
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ n_states: int,
47
+ n_observations: int,
48
+ random_state: int | None = None,
49
+ ):
50
+ self.n_states = n_states
51
+ self.n_observations = n_observations
52
+ self.random_state = random_state
53
+
54
+ self.A = None # (N, N)
55
+ self.B = None # (N, M)
56
+ self.pi = None # (N,)
57
+
58
+ # ------------------------------------------------------------------
59
+ # Initialisation
60
+ # ------------------------------------------------------------------
61
+
62
+ def _init_params(self) -> None:
63
+ """Random row-stochastic initialisation."""
64
+ rng = np.random.default_rng(self.random_state)
65
+ N, M = self.n_states, self.n_observations
66
+
67
+ A_raw = rng.random((N, N)) + 0.1
68
+ self.A = A_raw / A_raw.sum(axis=1, keepdims=True)
69
+
70
+ B_raw = rng.random((N, M)) + 0.1
71
+ self.B = B_raw / B_raw.sum(axis=1, keepdims=True)
72
+
73
+ pi_raw = rng.random(N) + 0.1
74
+ self.pi = pi_raw / pi_raw.sum()
75
+
76
+ # ------------------------------------------------------------------
77
+ # Forward algorithm — O(N² T)
78
+ # ------------------------------------------------------------------
79
+
80
+ def forward(self, obs: np.ndarray) -> tuple:
81
+ """
82
+ Compute forward variable α_t(i) = P(o_1…o_t, s_t=i | model)
83
+ scaled for numerical stability.
84
+
85
+ Returns
86
+ -------
87
+ alpha : ndarray (T, N)
88
+ scales : ndarray (T,) — scaling coefficients
89
+ log_likelihood : float
90
+ """
91
+ T = len(obs)
92
+ N = self.n_states
93
+ alpha = np.zeros((T, N))
94
+ scales = np.zeros(T)
95
+
96
+ alpha[0] = self.pi * self.B[:, obs[0]]
97
+ scales[0] = alpha[0].sum()
98
+ alpha[0] /= scales[0] + 1e-300
99
+
100
+ for t in range(1, T):
101
+ alpha[t] = (alpha[t - 1] @ self.A) * self.B[:, obs[t]]
102
+ scales[t] = alpha[t].sum()
103
+ alpha[t] /= scales[t] + 1e-300
104
+
105
+ log_likelihood = np.sum(np.log(scales + 1e-300))
106
+ return alpha, scales, log_likelihood
107
+
108
+ # ------------------------------------------------------------------
109
+ # Backward algorithm
110
+ # ------------------------------------------------------------------
111
+
112
+ def backward(self, obs: np.ndarray, scales: np.ndarray) -> np.ndarray:
113
+ """
114
+ Compute backward variable β_t(i), scaled by the same coefficients.
115
+
116
+ Returns
117
+ -------
118
+ beta : ndarray (T, N)
119
+ """
120
+ T = len(obs)
121
+ N = self.n_states
122
+ beta = np.zeros((T, N))
123
+ beta[T - 1] = 1.0
124
+
125
+ for t in range(T - 2, -1, -1):
126
+ beta[t] = (self.A * self.B[:, obs[t + 1]][np.newaxis, :]) @ beta[t + 1]
127
+ beta[t] /= scales[t + 1] + 1e-300
128
+
129
+ return beta
130
+
131
+ # ------------------------------------------------------------------
132
+ # Viterbi algorithm
133
+ # ------------------------------------------------------------------
134
+
135
+ def viterbi(self, obs: np.ndarray) -> np.ndarray:
136
+ """
137
+ Find the most likely state sequence using the Viterbi algorithm.
138
+
139
+ Parameters
140
+ ----------
141
+ obs : ndarray of shape (T,) with integer observations in [0, M)
142
+
143
+ Returns
144
+ -------
145
+ states : ndarray of shape (T,)
146
+ """
147
+ T = len(obs)
148
+ N = self.n_states
149
+
150
+ log_A = np.log(self.A + 1e-300)
151
+ log_B = np.log(self.B + 1e-300)
152
+ log_pi = np.log(self.pi + 1e-300)
153
+
154
+ delta = np.zeros((T, N))
155
+ psi = np.zeros((T, N), dtype=int)
156
+
157
+ delta[0] = log_pi + log_B[:, obs[0]]
158
+
159
+ for t in range(1, T):
160
+ trans = delta[t - 1][:, np.newaxis] + log_A # (N, N)
161
+ psi[t] = np.argmax(trans, axis=0)
162
+ delta[t] = np.max(trans, axis=0) + log_B[:, obs[t]]
163
+
164
+ # Backtrack
165
+ states = np.zeros(T, dtype=int)
166
+ states[T - 1] = np.argmax(delta[T - 1])
167
+ for t in range(T - 2, -1, -1):
168
+ states[t] = psi[t + 1, states[t + 1]]
169
+
170
+ return states
171
+
172
+ # ------------------------------------------------------------------
173
+ # Baum-Welch (EM)
174
+ # ------------------------------------------------------------------
175
+
176
+ def fit(
177
+ self,
178
+ sequences: list,
179
+ n_iter: int = 100,
180
+ tol: float = 1e-4,
181
+ ) -> "HiddenMarkovModel":
182
+ """
183
+ Estimate HMM parameters from observation sequences using Baum-Welch.
184
+
185
+ Parameters
186
+ ----------
187
+ sequences : list of 1-D integer arrays
188
+ Each element is one observation sequence.
189
+ n_iter : int
190
+ Maximum EM iterations.
191
+ tol : float
192
+ Convergence tolerance on total log-likelihood change.
193
+
194
+ Returns
195
+ -------
196
+ self
197
+ """
198
+ self._init_params()
199
+ N = self.n_states
200
+ M = self.n_observations
201
+ prev_ll = -np.inf
202
+
203
+ for _ in range(n_iter):
204
+ # Accumulators
205
+ A_num = np.zeros((N, N))
206
+ B_num = np.zeros((N, M))
207
+ pi_num = np.zeros(N)
208
+ total_ll = 0.0
209
+
210
+ for obs in sequences:
211
+ T = len(obs)
212
+ alpha, scales, ll = self.forward(obs)
213
+ beta = self.backward(obs, scales)
214
+ total_ll += ll
215
+
216
+ # gamma_t(i) = P(s_t=i | obs, model)
217
+ gamma = alpha * beta
218
+ gamma /= gamma.sum(axis=1, keepdims=True) + 1e-300
219
+
220
+ # xi_t(i,j) = P(s_t=i, s_{t+1}=j | obs, model)
221
+ for t in range(T - 1):
222
+ xi = (
223
+ alpha[t][:, np.newaxis]
224
+ * self.A
225
+ * self.B[:, obs[t + 1]][np.newaxis, :]
226
+ * beta[t + 1][np.newaxis, :]
227
+ )
228
+ xi_sum = xi.sum()
229
+ xi /= xi_sum + 1e-300
230
+ A_num += xi
231
+
232
+ for t in range(T):
233
+ B_num[:, obs[t]] += gamma[t]
234
+
235
+ pi_num += gamma[0]
236
+
237
+ # M-step: normalise
238
+ self.A = A_num / (A_num.sum(axis=1, keepdims=True) + 1e-300)
239
+ self.B = B_num / (B_num.sum(axis=1, keepdims=True) + 1e-300)
240
+ self.pi = pi_num / (pi_num.sum() + 1e-300)
241
+
242
+ if abs(total_ll - prev_ll) < tol:
243
+ break
244
+ prev_ll = total_ll
245
+
246
+ return self
247
+
248
+ # ------------------------------------------------------------------
249
+ # Public helpers
250
+ # ------------------------------------------------------------------
251
+
252
+ def log_likelihood(self, obs: np.ndarray) -> float:
253
+ """Return log P(obs | model)."""
254
+ _, _, ll = self.forward(obs)
255
+ return ll
256
+
257
+ def sample(self, length: int, random_state=None) -> tuple:
258
+ """
259
+ Generate a synthetic observation sequence of the given length.
260
+
261
+ Returns
262
+ -------
263
+ states : ndarray (length,)
264
+ observations : ndarray (length,)
265
+ """
266
+ rng = np.random.default_rng(random_state)
267
+ states = np.zeros(length, dtype=int)
268
+ observations = np.zeros(length, dtype=int)
269
+
270
+ states[0] = rng.choice(self.n_states, p=self.pi)
271
+ observations[0] = rng.choice(self.n_observations, p=self.B[states[0]])
272
+
273
+ for t in range(1, length):
274
+ states[t] = rng.choice(self.n_states, p=self.A[states[t - 1]])
275
+ observations[t] = rng.choice(self.n_observations, p=self.B[states[t]])
276
+
277
+ return states, observations
@@ -0,0 +1,52 @@
1
+ """
2
+ mlscratch.bayesian
3
+ ==================
4
+ From-scratch implementations of Bayesian learning algorithms.
5
+ Drop these files alongside existing code in src/mlscratch/bayesian/.
6
+
7
+ Algorithms
8
+ ----------
9
+ GaussianNB – Gaussian Naive Bayes
10
+ MultinomialNB – Multinomial Naive Bayes
11
+ BernoulliNB – Bernoulli Naive Bayes
12
+ BayesianLinearRegression – Conjugate Gaussian prior regression
13
+ GaussianProcessRegressor – GP Regression (RBF, Matern52, Linear, Periodic)
14
+ RBFKernel – RBF / Squared-Exponential kernel
15
+ Matern52Kernel – Matern 5/2 kernel
16
+ LinearKernel – Linear kernel
17
+ PeriodicKernel – Periodic kernel
18
+ HiddenMarkovModel – Discrete HMM (forward-backward, Viterbi, Baum-Welch)
19
+ BayesianNeuralNetwork – BNN via mean-field variational inference
20
+ BayesianNetwork – Discrete DAG (variable elimination, sampling)
21
+ KalmanFilter – Linear Kalman Filter + RTS Smoother
22
+ """
23
+
24
+ from .naive_bayes import GaussianNB, MultinomialNB, BernoulliNB # noqa: F401
25
+ from .bayesian_linear_regression import BayesianLinearRegression # noqa: F401
26
+ from .gaussian_process import ( # noqa: F401
27
+ GaussianProcessRegressor,
28
+ RBFKernel,
29
+ Matern52Kernel,
30
+ LinearKernel,
31
+ PeriodicKernel,
32
+ )
33
+ from .hmm import HiddenMarkovModel # noqa: F401
34
+ from .bayesian_nn import BayesianNeuralNetwork # noqa: F401
35
+ from .bayesian_network import BayesianNetwork # noqa: F401
36
+ from .kalman_filter import KalmanFilter # noqa: F401
37
+
38
+ __all__ = [
39
+ "GaussianNB",
40
+ "MultinomialNB",
41
+ "BernoulliNB",
42
+ "BayesianLinearRegression",
43
+ "GaussianProcessRegressor",
44
+ "RBFKernel",
45
+ "Matern52Kernel",
46
+ "LinearKernel",
47
+ "PeriodicKernel",
48
+ "HiddenMarkovModel",
49
+ "BayesianNeuralNetwork",
50
+ "BayesianNetwork",
51
+ "KalmanFilter",
52
+ ]
@@ -0,0 +1,182 @@
1
+ """
2
+ Kalman Filter and RTS Smoother
3
+ ================================
4
+ A sequential Bayesian filter for linear-Gaussian state-space models.
5
+
6
+ State-space model
7
+ -----------------
8
+ x_t = F x_{t-1} + q_t, q_t ~ N(0, Q) (transition)
9
+ z_t = H x_t + r_t, r_t ~ N(0, R) (observation)
10
+
11
+ Kalman Filter (forward pass)
12
+ ----------------------------
13
+ Predict:
14
+ x̂_{t|t-1} = F x̂_{t-1|t-1}
15
+ P_{t|t-1} = F P_{t-1|t-1} F^T + Q
16
+
17
+ Update:
18
+ K_t = P_{t|t-1} H^T (H P_{t|t-1} H^T + R)^{-1}
19
+ x̂_{t|t} = x̂_{t|t-1} + K_t (z_t - H x̂_{t|t-1})
20
+ P_{t|t} = (I - K_t H) P_{t|t-1}
21
+
22
+ RTS Smoother (backward pass) — Rauch-Tung-Striebel
23
+ ---------------------------------------------------
24
+ Refines the filtered estimates using all future observations.
25
+
26
+ Only numpy is used.
27
+ """
28
+
29
+ import numpy as np
30
+
31
+
32
+ class KalmanFilter:
33
+ """
34
+ Linear Kalman Filter with optional RTS smoother.
35
+
36
+ Parameters
37
+ ----------
38
+ F : ndarray (state_dim, state_dim)
39
+ State transition matrix.
40
+ H : ndarray (obs_dim, state_dim)
41
+ Observation matrix.
42
+ Q : ndarray (state_dim, state_dim)
43
+ Process noise covariance.
44
+ R : ndarray (obs_dim, obs_dim)
45
+ Observation noise covariance.
46
+ x0 : ndarray (state_dim,) or None
47
+ Initial state estimate. Defaults to zeros.
48
+ P0 : ndarray (state_dim, state_dim) or None
49
+ Initial state covariance. Defaults to identity * 1e6.
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ F: np.ndarray,
55
+ H: np.ndarray,
56
+ Q: np.ndarray,
57
+ R: np.ndarray,
58
+ x0: np.ndarray | None = None,
59
+ P0: np.ndarray | None = None,
60
+ ):
61
+ self.F = np.array(F, dtype=float)
62
+ self.H = np.array(H, dtype=float)
63
+ self.Q = np.array(Q, dtype=float)
64
+ self.R = np.array(R, dtype=float)
65
+
66
+ state_dim = self.F.shape[0]
67
+ self.x0 = np.array(x0, dtype=float) if x0 is not None else np.zeros(state_dim)
68
+ self.P0 = np.array(P0, dtype=float) if P0 is not None else np.eye(state_dim) * 1e6
69
+
70
+ # Stored after filter()
71
+ self.x_filt_ = None # (T, state_dim) filtered means
72
+ self.P_filt_ = None # (T, state_dim, state_dim) filtered covs
73
+ self.x_pred_ = None # (T, state_dim) predicted means
74
+ self.P_pred_ = None # (T, state_dim, state_dim) predicted covs
75
+ self.log_likelihood_ = None
76
+
77
+ # ------------------------------------------------------------------
78
+ # Kalman Filter
79
+ # ------------------------------------------------------------------
80
+
81
+ def filter(self, Z: np.ndarray) -> "KalmanFilter":
82
+ """
83
+ Run the Kalman filter over observation sequence Z.
84
+
85
+ Parameters
86
+ ----------
87
+ Z : ndarray of shape (T, obs_dim)
88
+
89
+ Returns
90
+ -------
91
+ self
92
+ """
93
+ T = len(Z)
94
+ state_dim = self.F.shape[0]
95
+ obs_dim = self.H.shape[0]
96
+ I = np.eye(state_dim)
97
+
98
+ x_filt = np.zeros((T, state_dim))
99
+ P_filt = np.zeros((T, state_dim, state_dim))
100
+ x_pred = np.zeros((T, state_dim))
101
+ P_pred = np.zeros((T, state_dim, state_dim))
102
+ log_lik = 0.0
103
+
104
+ x = self.x0.copy()
105
+ P = self.P0.copy()
106
+
107
+ for t in range(T):
108
+ # --- Predict ---
109
+ x_p = self.F @ x
110
+ P_p = self.F @ P @ self.F.T + self.Q
111
+
112
+ x_pred[t] = x_p
113
+ P_pred[t] = P_p
114
+
115
+ # --- Update ---
116
+ S = self.H @ P_p @ self.H.T + self.R # innovation covariance
117
+ K = P_p @ self.H.T @ np.linalg.inv(S) # Kalman gain
118
+
119
+ innovation = Z[t] - self.H @ x_p
120
+ x = x_p + K @ innovation
121
+ P = (I - K @ self.H) @ P_p
122
+
123
+ # Log-likelihood contribution
124
+ sign, log_det = np.linalg.slogdet(S)
125
+ if sign > 0:
126
+ log_lik -= 0.5 * (
127
+ obs_dim * np.log(2 * np.pi) + log_det
128
+ + innovation @ np.linalg.inv(S) @ innovation
129
+ )
130
+
131
+ x_filt[t] = x
132
+ P_filt[t] = P
133
+
134
+ self.x_filt_ = x_filt
135
+ self.P_filt_ = P_filt
136
+ self.x_pred_ = x_pred
137
+ self.P_pred_ = P_pred
138
+ self.log_likelihood_ = log_lik
139
+ return self
140
+
141
+ # ------------------------------------------------------------------
142
+ # RTS Smoother
143
+ # ------------------------------------------------------------------
144
+
145
+ def smooth(self) -> tuple:
146
+ """
147
+ Rauch-Tung-Striebel smoother. Must call filter() first.
148
+
149
+ Returns
150
+ -------
151
+ x_smooth : ndarray (T, state_dim)
152
+ P_smooth : ndarray (T, state_dim, state_dim)
153
+ """
154
+ if self.x_filt_ is None:
155
+ raise RuntimeError("Call filter() before smooth().")
156
+
157
+ T = len(self.x_filt_)
158
+ state_dim = self.F.shape[0]
159
+
160
+ x_smooth = self.x_filt_.copy()
161
+ P_smooth = self.P_filt_.copy()
162
+
163
+ for t in range(T - 2, -1, -1):
164
+ P_pred_inv = np.linalg.inv(self.P_pred_[t + 1])
165
+ G = self.P_filt_[t] @ self.F.T @ P_pred_inv # smoother gain
166
+ x_smooth[t] = self.x_filt_[t] + G @ (
167
+ x_smooth[t + 1] - self.x_pred_[t + 1]
168
+ )
169
+ P_smooth[t] = (
170
+ self.P_filt_[t]
171
+ + G @ (P_smooth[t + 1] - self.P_pred_[t + 1]) @ G.T
172
+ )
173
+
174
+ return x_smooth, P_smooth
175
+
176
+ # ------------------------------------------------------------------
177
+ # Convenience
178
+ # ------------------------------------------------------------------
179
+
180
+ def predict_obs(self, x_state: np.ndarray) -> np.ndarray:
181
+ """Return expected observation given state estimate."""
182
+ return self.H @ x_state