scratchkit 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. mlscratch/__init__.py +56 -0
  2. mlscratch/__main__.py +118 -0
  3. mlscratch/bayesian/__init__.py +53 -0
  4. mlscratch/bayesian/bayesian_linear_regression.py +171 -0
  5. mlscratch/bayesian/bayesian_network.py +248 -0
  6. mlscratch/bayesian/bayesian_nn.py +315 -0
  7. mlscratch/bayesian/gaussian_process.py +207 -0
  8. mlscratch/bayesian/hmm.py +277 -0
  9. mlscratch/bayesian/init.py +52 -0
  10. mlscratch/bayesian/kalman_filter.py +182 -0
  11. mlscratch/bayesian/naive_bayes.py +209 -0
  12. mlscratch/metrics/__init__.py +59 -0
  13. mlscratch/metrics/classification.py +365 -0
  14. mlscratch/metrics/regression.py +79 -0
  15. mlscratch/neural/__init__.py +121 -0
  16. mlscratch/neural/attention.py +420 -0
  17. mlscratch/neural/autoencoder.py +543 -0
  18. mlscratch/neural/boltzmann.py +231 -0
  19. mlscratch/neural/cnn.py +593 -0
  20. mlscratch/neural/cvnn.py +322 -0
  21. mlscratch/neural/gan.py +364 -0
  22. mlscratch/neural/hopfield.py +193 -0
  23. mlscratch/neural/perceptron.py +398 -0
  24. mlscratch/neural/rbf_network.py +230 -0
  25. mlscratch/neural/recurrent.py +569 -0
  26. mlscratch/preprocessing/__init__.py +38 -0
  27. mlscratch/preprocessing/encoders.py +140 -0
  28. mlscratch/preprocessing/model_selection.py +119 -0
  29. mlscratch/preprocessing/polynomial.py +105 -0
  30. mlscratch/preprocessing/scalers.py +220 -0
  31. mlscratch/py.typed +0 -0
  32. mlscratch/reinforcement/__init__.py +59 -0
  33. mlscratch/reinforcement/ddpg.py +363 -0
  34. mlscratch/reinforcement/dqn.py +319 -0
  35. mlscratch/reinforcement/ppo.py +452 -0
  36. mlscratch/reinforcement/q_learning.py +352 -0
  37. mlscratch/reinforcement/sac.py +382 -0
  38. mlscratch/reinforcement/utils.py +594 -0
  39. mlscratch/supervised/__init__.py +76 -0
  40. mlscratch/supervised/_validation.py +50 -0
  41. mlscratch/supervised/adaboost.py +255 -0
  42. mlscratch/supervised/decision_tree.py +495 -0
  43. mlscratch/supervised/gradient_boosting.py +354 -0
  44. mlscratch/supervised/knn.py +234 -0
  45. mlscratch/supervised/lasso_regression.py +125 -0
  46. mlscratch/supervised/linear_models.py +459 -0
  47. mlscratch/supervised/linear_regression.py +197 -0
  48. mlscratch/supervised/logistic_regression.py +119 -0
  49. mlscratch/supervised/naive_bayes.py +113 -0
  50. mlscratch/supervised/random_forest.py +321 -0
  51. mlscratch/supervised/ridge_regression.py +93 -0
  52. mlscratch/supervised/svm.py +356 -0
  53. mlscratch/unsupervised/__init__.py +39 -0
  54. mlscratch/unsupervised/apriori.py +178 -0
  55. mlscratch/unsupervised/dbscan.py +141 -0
  56. mlscratch/unsupervised/gmm.py +204 -0
  57. mlscratch/unsupervised/hierarchical_clustering.py +137 -0
  58. mlscratch/unsupervised/ica.py +167 -0
  59. mlscratch/unsupervised/kmeans.py +135 -0
  60. mlscratch/unsupervised/kmedoids.py +133 -0
  61. mlscratch/unsupervised/pca.py +103 -0
  62. mlscratch/unsupervised/tsne.py +200 -0
  63. scratchkit-0.2.0.dist-info/METADATA +241 -0
  64. scratchkit-0.2.0.dist-info/RECORD +68 -0
  65. scratchkit-0.2.0.dist-info/WHEEL +5 -0
  66. scratchkit-0.2.0.dist-info/entry_points.txt +2 -0
  67. scratchkit-0.2.0.dist-info/licenses/LICENSE +201 -0
  68. scratchkit-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,594 @@
1
+ """
2
+ mlscratch.reinforcement.utils
3
+ ==============================
4
+ Shared infrastructure used by all RL algorithms:
5
+
6
+ - GridWorld : discrete 4-action environment (tabular RL)
7
+ - ContinuousEnv : continuous state/action environment for deep RL
8
+ - ReplayBuffer : experience replay with uniform random sampling
9
+ - PrioritizedReplayBuffer : sum-tree prioritised experience replay (DQN+)
10
+ - MLP : multi-layer perceptron (forward + backward)
11
+ - OrnsteinUhlenbeckNoise : temporally-correlated exploration noise (DDPG)
12
+ - GaussianNoise : i.i.d. Gaussian exploration noise
13
+
14
+ Only numpy and Python stdlib are used throughout.
15
+ """
16
+
17
+ from __future__ import annotations
18
+ import numpy as np
19
+ from collections import deque
20
+
21
+
22
+ # ============================================================
23
+ # Environments
24
+ # ============================================================
25
+
26
+ class GridWorld:
27
+ """
28
+ Simple deterministic grid-world for tabular RL.
29
+
30
+ Layout (default 4×4):
31
+ S . . .
32
+ . # . .
33
+ . . . .
34
+ . . . G
35
+
36
+ S = start (0,0), G = goal (3,3), # = pit (1,1).
37
+ Actions: 0=up, 1=down, 2=left, 3=right.
38
+ Rewards: +10 (goal), -10 (pit), -0.1 (step).
39
+ Episode ends on goal or pit.
40
+ """
41
+
42
+ ACTIONS = {0: (-1, 0), 1: (1, 0), 2: (0, -1), 3: (0, 1)}
43
+ N_ACTIONS = 4
44
+
45
+ def __init__(self, size: int = 4, pit: tuple = (1, 1)):
46
+ self.size = size
47
+ self.goal = (size - 1, size - 1)
48
+ self.pit = pit
49
+ self._state: tuple | None = None
50
+
51
+ # ------------------------------------------------------------------
52
+ @property
53
+ def n_states(self) -> int:
54
+ return self.size * self.size
55
+
56
+ def _encode(self, pos: tuple) -> int:
57
+ return pos[0] * self.size + pos[1]
58
+
59
+ def reset(self) -> int:
60
+ self._state = (0, 0)
61
+ return self._encode(self._state)
62
+
63
+ def step(self, action: int) -> tuple[int, float, bool]:
64
+ r, c = self._state
65
+ dr, dc = self.ACTIONS[action]
66
+ nr = max(0, min(self.size - 1, r + dr))
67
+ nc = max(0, min(self.size - 1, c + dc))
68
+ self._state = (nr, nc)
69
+ s = self._encode(self._state)
70
+
71
+ if self._state == self.goal:
72
+ return s, 10.0, True
73
+ if self._state == self.pit:
74
+ return s, -10.0, True
75
+ return s, -0.1, False
76
+
77
+ def render(self) -> str:
78
+ rows = []
79
+ for r in range(self.size):
80
+ row = []
81
+ for c in range(self.size):
82
+ pos = (r, c)
83
+ if pos == self._state:
84
+ row.append("A")
85
+ elif pos == self.goal:
86
+ row.append("G")
87
+ elif pos == self.pit:
88
+ row.append("#")
89
+ else:
90
+ row.append(".")
91
+ rows.append(" ".join(row))
92
+ return "\n".join(rows)
93
+
94
+
95
+ class ContinuousEnv:
96
+ """
97
+ Lightweight continuous control environment — 1-D point mass.
98
+
99
+ State : [position, velocity] ∈ ℝ²
100
+ Action : force ∈ [-1, 1] (clipped)
101
+ Goal : drive position to 0 with velocity 0.
102
+
103
+ Reward : -(position² + 0.1 velocity² + 0.001 force²)
104
+ Episode terminates after `max_steps` steps.
105
+
106
+ This is a minimal stand-in for MuJoCo-style envs; used for testing
107
+ deep RL algorithms without external dependencies.
108
+ """
109
+
110
+ STATE_DIM = 2
111
+ ACTION_DIM = 1
112
+ ACTION_LOW = -1.0
113
+ ACTION_HIGH = 1.0
114
+
115
+ def __init__(self, max_steps: int = 200):
116
+ self.max_steps = max_steps
117
+ self._state: np.ndarray | None = None
118
+ self._t: int = 0
119
+
120
+ def reset(self, rng: np.random.Generator | None = None) -> np.ndarray:
121
+ rng = rng or np.random.default_rng()
122
+ self._state = rng.uniform([-0.5, -0.2], [0.5, 0.2])
123
+ self._t = 0
124
+ return self._state.copy()
125
+
126
+ def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool]:
127
+ a = float(np.clip(np.asarray(action).ravel()[0], self.ACTION_LOW, self.ACTION_HIGH))
128
+ pos, vel = self._state
129
+ # Simple Euler integration: m=1, drag=0.1
130
+ acc = a - 0.1 * vel
131
+ vel = vel + 0.05 * acc
132
+ pos = pos + 0.05 * vel
133
+ pos = float(np.clip(pos, -2.0, 2.0))
134
+ vel = float(np.clip(vel, -2.0, 2.0))
135
+ self._state = np.array([pos, vel])
136
+ reward = -(pos**2 + 0.1 * vel**2 + 0.001 * a**2)
137
+ self._t += 1
138
+ done = self._t >= self.max_steps
139
+ return self._state.copy(), reward, done
140
+
141
+ @property
142
+ def state_dim(self) -> int:
143
+ return self.STATE_DIM
144
+
145
+ @property
146
+ def action_dim(self) -> int:
147
+ return self.ACTION_DIM
148
+
149
+
150
+ class DiscreteEnv:
151
+ """
152
+ Discrete-action wrapper around ContinuousEnv for DQN testing.
153
+ Actions: {−1.0, −0.5, 0.0, +0.5, +1.0}
154
+ """
155
+
156
+ DISCRETE_ACTIONS = np.array([-1.0, -0.5, 0.0, 0.5, 1.0])
157
+ N_ACTIONS = 5
158
+ STATE_DIM = 2
159
+
160
+ def __init__(self, max_steps: int = 200):
161
+ self._env = ContinuousEnv(max_steps)
162
+
163
+ def reset(self, rng=None) -> np.ndarray:
164
+ return self._env.reset(rng)
165
+
166
+ def step(self, action_idx: int) -> tuple[np.ndarray, float, bool]:
167
+ a = self.DISCRETE_ACTIONS[action_idx]
168
+ return self._env.step(np.array([a]))
169
+
170
+ @property
171
+ def state_dim(self) -> int:
172
+ return self.STATE_DIM
173
+
174
+
175
+ # ============================================================
176
+ # Replay Buffers
177
+ # ============================================================
178
+
179
+ class ReplayBuffer:
180
+ """
181
+ Circular experience replay buffer for off-policy algorithms.
182
+
183
+ Stores (state, action, reward, next_state, done) tuples.
184
+ Sampling is uniform random (no priorities).
185
+ """
186
+
187
+ def __init__(self, capacity: int):
188
+ self.capacity = capacity
189
+ self._buf: deque = deque(maxlen=capacity)
190
+
191
+ def push(
192
+ self,
193
+ state: np.ndarray,
194
+ action,
195
+ reward: float,
196
+ next_state: np.ndarray,
197
+ done: bool,
198
+ ) -> None:
199
+ self._buf.append((
200
+ np.array(state, dtype=np.float32),
201
+ np.array(action, dtype=np.float32),
202
+ float(reward),
203
+ np.array(next_state, dtype=np.float32),
204
+ float(done),
205
+ ))
206
+
207
+ def sample(self, batch_size: int, rng: np.random.Generator | None = None
208
+ ) -> tuple:
209
+ rng = rng or np.random.default_rng()
210
+ indices = rng.choice(len(self._buf), size=batch_size, replace=False)
211
+ batch = [self._buf[i] for i in indices]
212
+ states, actions, rewards, next_states, dones = zip(*batch)
213
+ return (
214
+ np.stack(states),
215
+ np.stack(actions),
216
+ np.array(rewards, dtype=np.float32),
217
+ np.stack(next_states),
218
+ np.array(dones, dtype=np.float32),
219
+ )
220
+
221
+ def __len__(self) -> int:
222
+ return len(self._buf)
223
+
224
+
225
+ class _SumTree:
226
+ """Binary sum tree for O(log n) priority sampling."""
227
+
228
+ def __init__(self, capacity: int):
229
+ self.capacity = capacity
230
+ self.tree = np.zeros(2 * capacity)
231
+ self.data: list = [None] * capacity
232
+ self._ptr = 0
233
+ self._size = 0
234
+
235
+ def _propagate(self, idx: int, delta: float) -> None:
236
+ parent = (idx - 1) // 2
237
+ self.tree[parent] += delta
238
+ if parent != 0:
239
+ self._propagate(parent, delta)
240
+
241
+ def update(self, idx: int, priority: float) -> None:
242
+ leaf = idx + self.capacity - 1
243
+ delta = priority - self.tree[leaf]
244
+ self.tree[leaf] = priority
245
+ self._propagate(leaf, delta)
246
+
247
+ def add(self, priority: float, data) -> None:
248
+ self.data[self._ptr] = data
249
+ self.update(self._ptr, priority)
250
+ self._ptr = (self._ptr + 1) % self.capacity
251
+ self._size = min(self._size + 1, self.capacity)
252
+
253
+ def _retrieve(self, idx: int, s: float) -> int:
254
+ left = 2 * idx + 1
255
+ right = 2 * idx + 2
256
+ if left >= len(self.tree):
257
+ return idx
258
+ if s <= self.tree[left]:
259
+ return self._retrieve(left, s)
260
+ return self._retrieve(right, s - self.tree[left])
261
+
262
+ def get(self, s: float) -> tuple[int, float, object]:
263
+ leaf = self._retrieve(0, s)
264
+ data_idx = leaf - self.capacity + 1
265
+ return data_idx, self.tree[leaf], self.data[data_idx]
266
+
267
+ @property
268
+ def total(self) -> float:
269
+ return float(self.tree[0])
270
+
271
+ def __len__(self) -> int:
272
+ return self._size
273
+
274
+
275
+ class PrioritizedReplayBuffer:
276
+ """
277
+ Proportional Prioritised Experience Replay (Schaul et al., 2015).
278
+
279
+ Parameters
280
+ ----------
281
+ capacity : int
282
+ alpha : float priority exponent (0 = uniform, 1 = full priority)
283
+ beta : float IS-weight exponent (0 = no correction, 1 = full)
284
+ beta_increment : float anneal beta toward 1 each sample call
285
+ eps : float small constant added to |TD-error| for stability
286
+ """
287
+
288
+ def __init__(
289
+ self,
290
+ capacity: int,
291
+ alpha: float = 0.6,
292
+ beta: float = 0.4,
293
+ beta_increment: float = 1e-4,
294
+ eps: float = 1e-5,
295
+ ):
296
+ self.alpha = alpha
297
+ self.beta = beta
298
+ self.beta_increment = beta_increment
299
+ self.eps = eps
300
+ self._tree = _SumTree(capacity)
301
+
302
+ def push(self, state, action, reward, next_state, done) -> None:
303
+ # New transitions get max current priority (greedy)
304
+ if self._tree._size == 0:
305
+ max_p = 1.0
306
+ else:
307
+ leaf_start = self._tree.capacity - 1
308
+ leaf_end = leaf_start + self._tree._size
309
+ max_p = float(self._tree.tree[leaf_start:leaf_end].max())
310
+ if max_p == 0:
311
+ max_p = 1.0
312
+ self._tree.add(max_p, (
313
+ np.array(state, dtype=np.float32),
314
+ np.array(action, dtype=np.float32),
315
+ float(reward),
316
+ np.array(next_state, dtype=np.float32),
317
+ float(done),
318
+ ))
319
+
320
+ def sample(self, batch_size: int, rng: np.random.Generator | None = None
321
+ ) -> tuple:
322
+ rng = rng or np.random.default_rng()
323
+ n = len(self._tree)
324
+ segment = self._tree.total / batch_size
325
+
326
+ idxs, priorities, transitions = [], [], []
327
+ for i in range(batch_size):
328
+ s = rng.uniform(segment * i, segment * (i + 1))
329
+ idx, p, data = self._tree.get(s)
330
+ idxs.append(idx)
331
+ priorities.append(p)
332
+ transitions.append(data)
333
+
334
+ # IS weights
335
+ probs = np.array(priorities) / self._tree.total
336
+ weights = (n * probs) ** (-self.beta)
337
+ weights /= weights.max()
338
+ self.beta = min(1.0, self.beta + self.beta_increment)
339
+
340
+ states, actions, rewards, next_states, dones = zip(*transitions)
341
+ return (
342
+ np.stack(states),
343
+ np.stack(actions),
344
+ np.array(rewards, dtype=np.float32),
345
+ np.stack(next_states),
346
+ np.array(dones, dtype=np.float32),
347
+ np.array(weights, dtype=np.float32),
348
+ np.array(idxs),
349
+ )
350
+
351
+ def update_priorities(self, idxs: np.ndarray, td_errors: np.ndarray) -> None:
352
+ priorities = (np.abs(td_errors) + self.eps) ** self.alpha
353
+ for idx, p in zip(idxs, priorities):
354
+ self._tree.update(int(idx), float(p))
355
+
356
+ def __len__(self) -> int:
357
+ return len(self._tree)
358
+
359
+
360
+ # ============================================================
361
+ # Neural Network (pure numpy MLP with backprop)
362
+ # ============================================================
363
+
364
+ def _relu(x): return np.maximum(0.0, x)
365
+ def _relu_d(x): return (x > 0).astype(float)
366
+ def _tanh(x): return np.tanh(x)
367
+ def _tanh_d(x): return 1.0 - np.tanh(x) ** 2
368
+ def _sigmoid(x): return 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
369
+ def _linear(x): return x
370
+ def _linear_d(x): return np.ones_like(x)
371
+
372
+
373
+ class MLP:
374
+ """
375
+ Multi-layer perceptron with configurable architecture.
376
+
377
+ Supports:
378
+ - Arbitrary depth / width
379
+ - ReLU hidden activations, configurable output activation
380
+ - Mini-batch gradient descent with Adam optimiser
381
+ - Soft / hard target-network parameter copy
382
+
383
+ Parameters
384
+ ----------
385
+ layer_sizes : list[int] e.g. [state_dim, 256, 256, action_dim]
386
+ output_activation : str 'linear' | 'tanh' | 'sigmoid'
387
+ lr : float
388
+ """
389
+
390
+ def __init__(
391
+ self,
392
+ layer_sizes: list[int],
393
+ output_activation: str = "linear",
394
+ lr: float = 1e-3,
395
+ random_state: int | None = None,
396
+ ):
397
+ rng = np.random.default_rng(random_state)
398
+ self.layer_sizes = layer_sizes
399
+ self.lr = lr
400
+
401
+ # Weight initialisation (He for ReLU)
402
+ self.W: list[np.ndarray] = []
403
+ self.b: list[np.ndarray] = []
404
+ for i in range(len(layer_sizes) - 1):
405
+ fan_in = layer_sizes[i]
406
+ scale = np.sqrt(2.0 / fan_in)
407
+ self.W.append(rng.normal(0, scale, (fan_in, layer_sizes[i + 1])))
408
+ self.b.append(np.zeros(layer_sizes[i + 1]))
409
+
410
+ # Adam moments
411
+ self.mW = [np.zeros_like(w) for w in self.W]
412
+ self.vW = [np.zeros_like(w) for w in self.W]
413
+ self.mb = [np.zeros_like(b) for b in self.b]
414
+ self.vb = [np.zeros_like(b) for b in self.b]
415
+ self._t = 0 # Adam time step
416
+
417
+ out_acts = {
418
+ "linear": (_linear, _linear_d),
419
+ "tanh": (_tanh, _tanh_d),
420
+ "sigmoid": (_sigmoid, None),
421
+ }
422
+ self._out_act, self._out_act_d = out_acts[output_activation]
423
+ self._hidden_act, self._hidden_act_d = _relu, _relu_d
424
+
425
+ # Cache for backprop
426
+ self._cache: dict = {}
427
+
428
+ def forward(self, x: np.ndarray, training: bool = False) -> np.ndarray:
429
+ """
430
+ x : (batch, in_dim) or (in_dim,) for single sample
431
+ Returns output of shape (batch, out_dim) or (out_dim,).
432
+ """
433
+ scalar = x.ndim == 1
434
+ if scalar:
435
+ x = x[np.newaxis, :]
436
+
437
+ a = x
438
+ if training:
439
+ self._cache = {"a": [a]}
440
+ for i, (W, b) in enumerate(zip(self.W, self.b)):
441
+ z = a @ W + b
442
+ if i < len(self.W) - 1:
443
+ a = self._hidden_act(z)
444
+ if training:
445
+ self._cache.setdefault("z", []).append(z)
446
+ else:
447
+ a = self._out_act(z)
448
+ if training:
449
+ self._cache.setdefault("z", []).append(z)
450
+ if training:
451
+ self._cache["a"].append(a)
452
+
453
+ return a[0] if scalar else a
454
+
455
+ def backward(self, d_out: np.ndarray) -> None:
456
+ """
457
+ Compute gradients and apply Adam update.
458
+ d_out : (batch, out_dim) — gradient of loss w.r.t. network output.
459
+ """
460
+ if d_out.ndim == 1:
461
+ d_out = d_out[np.newaxis, :]
462
+ n = d_out.shape[0]
463
+ self._t += 1
464
+ beta1, beta2, eps = 0.9, 0.999, 1e-8
465
+
466
+ # Output layer delta
467
+ z_out = self._cache["z"][-1]
468
+ if self._out_act_d is not None:
469
+ delta = d_out * self._out_act_d(z_out)
470
+ else:
471
+ delta = d_out # linear pass-through for sigmoid (handled externally)
472
+
473
+ for i in reversed(range(len(self.W))):
474
+ a_prev = self._cache["a"][i]
475
+ gW = a_prev.T @ delta / n
476
+ gb = delta.mean(axis=0)
477
+
478
+ # Adam
479
+ self.mW[i] = beta1 * self.mW[i] + (1 - beta1) * gW
480
+ self.vW[i] = beta2 * self.vW[i] + (1 - beta2) * gW ** 2
481
+ self.mb[i] = beta1 * self.mb[i] + (1 - beta1) * gb
482
+ self.vb[i] = beta2 * self.vb[i] + (1 - beta2) * gb ** 2
483
+
484
+ mW_hat = self.mW[i] / (1 - beta1 ** self._t)
485
+ vW_hat = self.vW[i] / (1 - beta2 ** self._t)
486
+ mb_hat = self.mb[i] / (1 - beta1 ** self._t)
487
+ vb_hat = self.vb[i] / (1 - beta2 ** self._t)
488
+
489
+ self.W[i] -= self.lr * mW_hat / (np.sqrt(vW_hat) + eps)
490
+ self.b[i] -= self.lr * mb_hat / (np.sqrt(vb_hat) + eps)
491
+
492
+ if i > 0:
493
+ delta = (delta @ self.W[i].T) * self._hidden_act_d(
494
+ self._cache["z"][i - 1]
495
+ )
496
+
497
+ def soft_update(self, target: "MLP", tau: float) -> None:
498
+ """θ_target ← τ θ_online + (1-τ) θ_target"""
499
+ for w_s, w_t in zip(self.W, target.W):
500
+ w_t[:] = tau * w_s + (1 - tau) * w_t
501
+ for b_s, b_t in zip(self.b, target.b):
502
+ b_t[:] = tau * b_s + (1 - tau) * b_t
503
+
504
+ def hard_update(self, target: "MLP") -> None:
505
+ """θ_target ← θ_online"""
506
+ for w_s, w_t in zip(self.W, target.W):
507
+ w_t[:] = w_s.copy()
508
+ for b_s, b_t in zip(self.b, target.b):
509
+ b_t[:] = b_s.copy()
510
+
511
+ def copy_weights_from(self, source: "MLP") -> None:
512
+ """Copy weights from another MLP of identical architecture."""
513
+ for i in range(len(self.W)):
514
+ self.W[i] = source.W[i].copy()
515
+ self.b[i] = source.b[i].copy()
516
+
517
+
518
+ # ============================================================
519
+ # Exploration Noise
520
+ # ============================================================
521
+
522
+ class OrnsteinUhlenbeckNoise:
523
+ """
524
+ Ornstein-Uhlenbeck process for temporally correlated exploration.
525
+
526
+ dx_t = θ(μ - x_t)dt + σ dW_t
527
+
528
+ Parameters
529
+ ----------
530
+ size : int
531
+ mu : float long-run mean
532
+ theta : float mean reversion rate
533
+ sigma : float noise scale
534
+ dt : float time step
535
+ """
536
+
537
+ def __init__(
538
+ self,
539
+ size: int,
540
+ mu: float = 0.0,
541
+ theta: float = 0.15,
542
+ sigma: float = 0.2,
543
+ dt: float = 1e-2,
544
+ random_state: int | None = None,
545
+ ):
546
+ self.mu = np.full(size, mu)
547
+ self.theta = theta
548
+ self.sigma = sigma
549
+ self.dt = dt
550
+ self._rng = np.random.default_rng(random_state)
551
+ self.reset()
552
+
553
+ def reset(self) -> None:
554
+ self.x = self.mu.copy()
555
+
556
+ def sample(self) -> np.ndarray:
557
+ dx = (
558
+ self.theta * (self.mu - self.x) * self.dt
559
+ + self.sigma * np.sqrt(self.dt) * self._rng.standard_normal(self.mu.shape)
560
+ )
561
+ self.x = self.x + dx
562
+ return self.x.copy()
563
+
564
+
565
+ class GaussianNoise:
566
+ """
567
+ i.i.d. Gaussian exploration noise with optional decay.
568
+
569
+ Parameters
570
+ ----------
571
+ size : int
572
+ sigma : float initial std
573
+ sigma_min : float minimum std after decay
574
+ decay : float multiplicative decay per call to sample()
575
+ """
576
+
577
+ def __init__(
578
+ self,
579
+ size: int,
580
+ sigma: float = 0.1,
581
+ sigma_min: float = 0.01,
582
+ decay: float = 1.0,
583
+ random_state: int | None = None,
584
+ ):
585
+ self.size = size
586
+ self.sigma = sigma
587
+ self.sigma_min = sigma_min
588
+ self.decay = decay
589
+ self._rng = np.random.default_rng(random_state)
590
+
591
+ def sample(self) -> np.ndarray:
592
+ noise = self._rng.normal(0, self.sigma, self.size)
593
+ self.sigma = max(self.sigma_min, self.sigma * self.decay)
594
+ return noise
@@ -0,0 +1,76 @@
1
+ """
2
+ mlscratch.supervised
3
+ ====================
4
+ Supervised learning algorithms, implemented from scratch in pure numpy.
5
+
6
+ Linear models
7
+ -------------
8
+ LinearRegression, RidgeRegression, LassoRegression, ElasticNet,
9
+ LogisticRegression
10
+
11
+ Instance-based
12
+ --------------
13
+ KNeighboursClassifier, KNeighboursRegressor
14
+
15
+ Tree-based
16
+ ----------
17
+ DecisionTreeClassifier, DecisionTreeRegressor
18
+
19
+ Ensembles
20
+ ---------
21
+ RandomForestClassifier, RandomForestRegressor
22
+ GradientBoostingClassifier, GradientBoostingRegressor
23
+ AdaBoostClassifier
24
+
25
+ Kernel methods
26
+ --------------
27
+ SVC — kernel Support Vector Classifier (linear / poly / rbf / sigmoid),
28
+ trained via Sequential Minimal Optimization.
29
+ """
30
+
31
+ from .adaboost import AdaBoostClassifier # noqa: F401
32
+ from .decision_tree import DecisionTreeClassifier, DecisionTreeRegressor # noqa: F401
33
+ from .gradient_boosting import ( # noqa: F401
34
+ GradientBoostingClassifier,
35
+ GradientBoostingRegressor,
36
+ )
37
+ from .knn import ( # noqa: F401
38
+ KNeighborsClassifier,
39
+ KNeighborsRegressor,
40
+ KNeighboursClassifier,
41
+ KNeighboursRegressor,
42
+ )
43
+ from .linear_models import ( # noqa: F401
44
+ ElasticNet,
45
+ LassoRegression,
46
+ LinearRegression,
47
+ LogisticRegression,
48
+ RidgeRegression,
49
+ )
50
+ from .random_forest import RandomForestClassifier, RandomForestRegressor # noqa: F401
51
+ from .svm import SVC # noqa: F401
52
+
53
+ __all__ = [
54
+ # Linear models
55
+ "LinearRegression",
56
+ "RidgeRegression",
57
+ "LassoRegression",
58
+ "ElasticNet",
59
+ "LogisticRegression",
60
+ # Instance-based
61
+ "KNeighboursClassifier",
62
+ "KNeighboursRegressor",
63
+ "KNeighborsClassifier",
64
+ "KNeighborsRegressor",
65
+ # Tree-based
66
+ "DecisionTreeClassifier",
67
+ "DecisionTreeRegressor",
68
+ # Ensembles
69
+ "RandomForestClassifier",
70
+ "RandomForestRegressor",
71
+ "GradientBoostingClassifier",
72
+ "GradientBoostingRegressor",
73
+ "AdaBoostClassifier",
74
+ # Kernel methods
75
+ "SVC",
76
+ ]