scratchkit 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. mlscratch/__init__.py +56 -0
  2. mlscratch/__main__.py +118 -0
  3. mlscratch/bayesian/__init__.py +53 -0
  4. mlscratch/bayesian/bayesian_linear_regression.py +171 -0
  5. mlscratch/bayesian/bayesian_network.py +248 -0
  6. mlscratch/bayesian/bayesian_nn.py +315 -0
  7. mlscratch/bayesian/gaussian_process.py +207 -0
  8. mlscratch/bayesian/hmm.py +277 -0
  9. mlscratch/bayesian/init.py +52 -0
  10. mlscratch/bayesian/kalman_filter.py +182 -0
  11. mlscratch/bayesian/naive_bayes.py +209 -0
  12. mlscratch/metrics/__init__.py +59 -0
  13. mlscratch/metrics/classification.py +365 -0
  14. mlscratch/metrics/regression.py +79 -0
  15. mlscratch/neural/__init__.py +121 -0
  16. mlscratch/neural/attention.py +420 -0
  17. mlscratch/neural/autoencoder.py +543 -0
  18. mlscratch/neural/boltzmann.py +231 -0
  19. mlscratch/neural/cnn.py +593 -0
  20. mlscratch/neural/cvnn.py +322 -0
  21. mlscratch/neural/gan.py +364 -0
  22. mlscratch/neural/hopfield.py +193 -0
  23. mlscratch/neural/perceptron.py +398 -0
  24. mlscratch/neural/rbf_network.py +230 -0
  25. mlscratch/neural/recurrent.py +569 -0
  26. mlscratch/preprocessing/__init__.py +38 -0
  27. mlscratch/preprocessing/encoders.py +140 -0
  28. mlscratch/preprocessing/model_selection.py +119 -0
  29. mlscratch/preprocessing/polynomial.py +105 -0
  30. mlscratch/preprocessing/scalers.py +220 -0
  31. mlscratch/py.typed +0 -0
  32. mlscratch/reinforcement/__init__.py +59 -0
  33. mlscratch/reinforcement/ddpg.py +363 -0
  34. mlscratch/reinforcement/dqn.py +319 -0
  35. mlscratch/reinforcement/ppo.py +452 -0
  36. mlscratch/reinforcement/q_learning.py +352 -0
  37. mlscratch/reinforcement/sac.py +382 -0
  38. mlscratch/reinforcement/utils.py +594 -0
  39. mlscratch/supervised/__init__.py +76 -0
  40. mlscratch/supervised/_validation.py +50 -0
  41. mlscratch/supervised/adaboost.py +255 -0
  42. mlscratch/supervised/decision_tree.py +495 -0
  43. mlscratch/supervised/gradient_boosting.py +354 -0
  44. mlscratch/supervised/knn.py +234 -0
  45. mlscratch/supervised/lasso_regression.py +125 -0
  46. mlscratch/supervised/linear_models.py +459 -0
  47. mlscratch/supervised/linear_regression.py +197 -0
  48. mlscratch/supervised/logistic_regression.py +119 -0
  49. mlscratch/supervised/naive_bayes.py +113 -0
  50. mlscratch/supervised/random_forest.py +321 -0
  51. mlscratch/supervised/ridge_regression.py +93 -0
  52. mlscratch/supervised/svm.py +356 -0
  53. mlscratch/unsupervised/__init__.py +39 -0
  54. mlscratch/unsupervised/apriori.py +178 -0
  55. mlscratch/unsupervised/dbscan.py +141 -0
  56. mlscratch/unsupervised/gmm.py +204 -0
  57. mlscratch/unsupervised/hierarchical_clustering.py +137 -0
  58. mlscratch/unsupervised/ica.py +167 -0
  59. mlscratch/unsupervised/kmeans.py +135 -0
  60. mlscratch/unsupervised/kmedoids.py +133 -0
  61. mlscratch/unsupervised/pca.py +103 -0
  62. mlscratch/unsupervised/tsne.py +200 -0
  63. scratchkit-0.2.0.dist-info/METADATA +241 -0
  64. scratchkit-0.2.0.dist-info/RECORD +68 -0
  65. scratchkit-0.2.0.dist-info/WHEEL +5 -0
  66. scratchkit-0.2.0.dist-info/entry_points.txt +2 -0
  67. scratchkit-0.2.0.dist-info/licenses/LICENSE +201 -0
  68. scratchkit-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,352 @@
1
+ """
2
+ Q-Learning
3
+ ===========
4
+ Tabular and linear-function-approximation variants of the classic
5
+ off-policy TD control algorithm (Watkins & Dayan, 1992).
6
+
7
+ Tabular Q-Learning
8
+ ------------------
9
+ Maintains a Q-table Q[s, a] and updates via:
10
+
11
+ Q(s,a) ← Q(s,a) + α [r + γ max_a' Q(s',a') - Q(s,a)]
12
+
13
+ Supports ε-greedy exploration with optional linear or exponential decay.
14
+
15
+ Linear Q-Learning (Linear Function Approximation)
16
+ --------------------------------------------------
17
+ Represents Q(s,a) = φ(s,a)^T w where φ is a hand-crafted feature
18
+ vector and w are learned weights — useful for larger state spaces.
19
+
20
+ Both classes follow the same fit() / predict_action() API and expose
21
+ episode-level training via train_episode().
22
+
23
+ Only numpy and Python stdlib are used.
24
+ """
25
+
26
+ from __future__ import annotations
27
+ import numpy as np
28
+
29
+
30
+ # ============================================================
31
+ # Tabular Q-Learning
32
+ # ============================================================
33
+
34
+ class QLearning:
35
+ """
36
+ Tabular Q-Learning agent.
37
+
38
+ Parameters
39
+ ----------
40
+ n_states : int
41
+ n_actions : int
42
+ alpha : float learning rate
43
+ gamma : float discount factor
44
+ epsilon : float initial exploration probability
45
+ epsilon_min : float minimum exploration probability
46
+ epsilon_decay : float multiplicative decay per episode
47
+ random_state : int | None
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ n_states: int,
53
+ n_actions: int,
54
+ alpha: float = 0.1,
55
+ gamma: float = 0.99,
56
+ epsilon: float = 1.0,
57
+ epsilon_min: float = 0.01,
58
+ epsilon_decay: float = 0.995,
59
+ random_state: int | None = None,
60
+ ):
61
+ self.n_states = n_states
62
+ self.n_actions = n_actions
63
+ self.alpha = alpha
64
+ self.gamma = gamma
65
+ self.epsilon = epsilon
66
+ self.epsilon_min = epsilon_min
67
+ self.epsilon_decay = epsilon_decay
68
+ self._rng = np.random.default_rng(random_state)
69
+
70
+ # Q-table initialised to zeros
71
+ self.Q: np.ndarray = np.zeros((n_states, n_actions))
72
+
73
+ # Episode-level tracking
74
+ self.episode_rewards_: list[float] = []
75
+ self.epsilons_: list[float] = []
76
+
77
+ # ------------------------------------------------------------------
78
+ # Action selection
79
+ # ------------------------------------------------------------------
80
+
81
+ def select_action(self, state: int, greedy: bool = False) -> int:
82
+ """ε-greedy action selection."""
83
+ if not greedy and self._rng.random() < self.epsilon:
84
+ return int(self._rng.integers(self.n_actions))
85
+ return int(np.argmax(self.Q[state]))
86
+
87
+ # ------------------------------------------------------------------
88
+ # Single update step
89
+ # ------------------------------------------------------------------
90
+
91
+ def update(
92
+ self,
93
+ state: int,
94
+ action: int,
95
+ reward: float,
96
+ next_state: int,
97
+ done: bool,
98
+ ) -> float:
99
+ """
100
+ Apply one Q-learning update.
101
+
102
+ Returns
103
+ -------
104
+ td_error : float
105
+ """
106
+ target = reward if done else reward + self.gamma * np.max(self.Q[next_state])
107
+ td_error = target - self.Q[state, action]
108
+ self.Q[state, action] += self.alpha * td_error
109
+ return float(td_error)
110
+
111
+ # ------------------------------------------------------------------
112
+ # Episode training
113
+ # ------------------------------------------------------------------
114
+
115
+ def train_episode(self, env) -> float:
116
+ """
117
+ Run one full episode and return total reward.
118
+
119
+ Parameters
120
+ ----------
121
+ env : object with .reset() → int and .step(a) → (int, float, bool)
122
+ """
123
+ state = env.reset()
124
+ total_reward = 0.0
125
+ done = False
126
+
127
+ while not done:
128
+ action = self.select_action(state)
129
+ next_state, reward, done = env.step(action)
130
+ self.update(state, action, reward, next_state, done)
131
+ state = next_state
132
+ total_reward += reward
133
+
134
+ # Decay epsilon
135
+ self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
136
+ self.episode_rewards_.append(total_reward)
137
+ self.epsilons_.append(self.epsilon)
138
+ return total_reward
139
+
140
+ def train(self, env, n_episodes: int) -> "QLearning":
141
+ """Train for n_episodes episodes."""
142
+ for _ in range(n_episodes):
143
+ self.train_episode(env)
144
+ return self
145
+
146
+ # ------------------------------------------------------------------
147
+ # Value / policy helpers
148
+ # ------------------------------------------------------------------
149
+
150
+ def value_function(self) -> np.ndarray:
151
+ """V(s) = max_a Q(s,a) for all states."""
152
+ return self.Q.max(axis=1)
153
+
154
+ def policy(self) -> np.ndarray:
155
+ """Greedy policy: π(s) = argmax_a Q(s,a)."""
156
+ return self.Q.argmax(axis=1)
157
+
158
+
159
+ # ============================================================
160
+ # Double Q-Learning
161
+ # ============================================================
162
+
163
+ class DoubleQLearning:
164
+ """
165
+ Double Q-Learning (van Hasselt, 2010).
166
+
167
+ Maintains two independent Q-tables Q_A and Q_B.
168
+ On each step, one is selected at random for the update, using the
169
+ other to evaluate the greedy action — removing maximisation bias.
170
+
171
+ Same API as QLearning.
172
+ """
173
+
174
+ def __init__(
175
+ self,
176
+ n_states: int,
177
+ n_actions: int,
178
+ alpha: float = 0.1,
179
+ gamma: float = 0.99,
180
+ epsilon: float = 1.0,
181
+ epsilon_min: float = 0.01,
182
+ epsilon_decay: float = 0.995,
183
+ random_state: int | None = None,
184
+ ):
185
+ self.n_states = n_states
186
+ self.n_actions = n_actions
187
+ self.alpha = alpha
188
+ self.gamma = gamma
189
+ self.epsilon = epsilon
190
+ self.epsilon_min = epsilon_min
191
+ self.epsilon_decay = epsilon_decay
192
+ self._rng = np.random.default_rng(random_state)
193
+
194
+ self.Q_A: np.ndarray = np.zeros((n_states, n_actions))
195
+ self.Q_B: np.ndarray = np.zeros((n_states, n_actions))
196
+
197
+ self.episode_rewards_: list[float] = []
198
+ self.epsilons_: list[float] = []
199
+
200
+ @property
201
+ def Q(self) -> np.ndarray:
202
+ """Combined Q estimate (average of both tables)."""
203
+ return (self.Q_A + self.Q_B) / 2.0
204
+
205
+ def select_action(self, state: int, greedy: bool = False) -> int:
206
+ if not greedy and self._rng.random() < self.epsilon:
207
+ return int(self._rng.integers(self.n_actions))
208
+ return int(np.argmax(self.Q[state]))
209
+
210
+ def update(
211
+ self,
212
+ state: int,
213
+ action: int,
214
+ reward: float,
215
+ next_state: int,
216
+ done: bool,
217
+ ) -> float:
218
+ if self._rng.random() < 0.5:
219
+ # Update A, evaluate with B
220
+ a_star = int(np.argmax(self.Q_A[next_state]))
221
+ target = reward if done else reward + self.gamma * self.Q_B[next_state, a_star]
222
+ td_error = target - self.Q_A[state, action]
223
+ self.Q_A[state, action] += self.alpha * td_error
224
+ else:
225
+ # Update B, evaluate with A
226
+ a_star = int(np.argmax(self.Q_B[next_state]))
227
+ target = reward if done else reward + self.gamma * self.Q_A[next_state, a_star]
228
+ td_error = target - self.Q_B[state, action]
229
+ self.Q_B[state, action] += self.alpha * td_error
230
+ return float(td_error)
231
+
232
+ def train_episode(self, env) -> float:
233
+ state = env.reset()
234
+ total_reward = 0.0
235
+ done = False
236
+ while not done:
237
+ action = self.select_action(state)
238
+ next_state, reward, done = env.step(action)
239
+ self.update(state, action, reward, next_state, done)
240
+ state = next_state
241
+ total_reward += reward
242
+ self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
243
+ self.episode_rewards_.append(total_reward)
244
+ self.epsilons_.append(self.epsilon)
245
+ return total_reward
246
+
247
+ def train(self, env, n_episodes: int) -> "DoubleQLearning":
248
+ for _ in range(n_episodes):
249
+ self.train_episode(env)
250
+ return self
251
+
252
+ def value_function(self) -> np.ndarray:
253
+ return self.Q.max(axis=1)
254
+
255
+ def policy(self) -> np.ndarray:
256
+ return self.Q.argmax(axis=1)
257
+
258
+
259
+ # ============================================================
260
+ # Linear Function Approximation Q-Learning
261
+ # ============================================================
262
+
263
+ class LinearQLearning:
264
+ """
265
+ Q-Learning with linear function approximation.
266
+
267
+ Q(s, a) ≈ φ(s, a)^T w
268
+
269
+ Feature construction: one-hot state × one-hot action tiling.
270
+ Works with integer state/action spaces.
271
+
272
+ Parameters
273
+ ----------
274
+ n_states : int
275
+ n_actions : int
276
+ alpha, gamma, epsilon, epsilon_min, epsilon_decay : see QLearning
277
+ """
278
+
279
+ def __init__(
280
+ self,
281
+ n_states: int,
282
+ n_actions: int,
283
+ alpha: float = 0.01,
284
+ gamma: float = 0.99,
285
+ epsilon: float = 1.0,
286
+ epsilon_min: float = 0.01,
287
+ epsilon_decay: float = 0.995,
288
+ random_state: int | None = None,
289
+ ):
290
+ self.n_states = n_states
291
+ self.n_actions = n_actions
292
+ self.alpha = alpha
293
+ self.gamma = gamma
294
+ self.epsilon = epsilon
295
+ self.epsilon_min = epsilon_min
296
+ self.epsilon_decay = epsilon_decay
297
+ self._rng = np.random.default_rng(random_state)
298
+
299
+ self.n_features = n_states * n_actions
300
+ self.w = np.zeros(self.n_features)
301
+
302
+ self.episode_rewards_: list[float] = []
303
+
304
+ def _features(self, state: int, action: int) -> np.ndarray:
305
+ """One-hot feature vector for (state, action) pair."""
306
+ phi = np.zeros(self.n_features)
307
+ phi[state * self.n_actions + action] = 1.0
308
+ return phi
309
+
310
+ def _q(self, state: int, action: int) -> float:
311
+ return float(self.w @ self._features(state, action))
312
+
313
+ def select_action(self, state: int, greedy: bool = False) -> int:
314
+ if not greedy and self._rng.random() < self.epsilon:
315
+ return int(self._rng.integers(self.n_actions))
316
+ q_vals = [self._q(state, a) for a in range(self.n_actions)]
317
+ return int(np.argmax(q_vals))
318
+
319
+ def update(self, state, action, reward, next_state, done) -> float:
320
+ q_next = max(self._q(next_state, a) for a in range(self.n_actions))
321
+ target = reward if done else reward + self.gamma * q_next
322
+ td_error = target - self._q(state, action)
323
+ self.w += self.alpha * td_error * self._features(state, action)
324
+ return float(td_error)
325
+
326
+ def train_episode(self, env) -> float:
327
+ state = env.reset()
328
+ total_reward = 0.0
329
+ done = False
330
+ while not done:
331
+ action = self.select_action(state)
332
+ next_state, reward, done = env.step(action)
333
+ self.update(state, action, reward, next_state, done)
334
+ state = next_state
335
+ total_reward += reward
336
+ self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
337
+ self.episode_rewards_.append(total_reward)
338
+ return total_reward
339
+
340
+ def train(self, env, n_episodes: int) -> "LinearQLearning":
341
+ for _ in range(n_episodes):
342
+ self.train_episode(env)
343
+ return self
344
+
345
+ @property
346
+ def Q(self) -> np.ndarray:
347
+ """Recover Q-table from weight vector."""
348
+ Q = np.zeros((self.n_states, self.n_actions))
349
+ for s in range(self.n_states):
350
+ for a in range(self.n_actions):
351
+ Q[s, a] = self._q(s, a)
352
+ return Q