scratchkit 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlscratch/__init__.py +56 -0
- mlscratch/__main__.py +118 -0
- mlscratch/bayesian/__init__.py +53 -0
- mlscratch/bayesian/bayesian_linear_regression.py +171 -0
- mlscratch/bayesian/bayesian_network.py +248 -0
- mlscratch/bayesian/bayesian_nn.py +315 -0
- mlscratch/bayesian/gaussian_process.py +207 -0
- mlscratch/bayesian/hmm.py +277 -0
- mlscratch/bayesian/init.py +52 -0
- mlscratch/bayesian/kalman_filter.py +182 -0
- mlscratch/bayesian/naive_bayes.py +209 -0
- mlscratch/metrics/__init__.py +59 -0
- mlscratch/metrics/classification.py +365 -0
- mlscratch/metrics/regression.py +79 -0
- mlscratch/neural/__init__.py +121 -0
- mlscratch/neural/attention.py +420 -0
- mlscratch/neural/autoencoder.py +543 -0
- mlscratch/neural/boltzmann.py +231 -0
- mlscratch/neural/cnn.py +593 -0
- mlscratch/neural/cvnn.py +322 -0
- mlscratch/neural/gan.py +364 -0
- mlscratch/neural/hopfield.py +193 -0
- mlscratch/neural/perceptron.py +398 -0
- mlscratch/neural/rbf_network.py +230 -0
- mlscratch/neural/recurrent.py +569 -0
- mlscratch/preprocessing/__init__.py +38 -0
- mlscratch/preprocessing/encoders.py +140 -0
- mlscratch/preprocessing/model_selection.py +119 -0
- mlscratch/preprocessing/polynomial.py +105 -0
- mlscratch/preprocessing/scalers.py +220 -0
- mlscratch/py.typed +0 -0
- mlscratch/reinforcement/__init__.py +59 -0
- mlscratch/reinforcement/ddpg.py +363 -0
- mlscratch/reinforcement/dqn.py +319 -0
- mlscratch/reinforcement/ppo.py +452 -0
- mlscratch/reinforcement/q_learning.py +352 -0
- mlscratch/reinforcement/sac.py +382 -0
- mlscratch/reinforcement/utils.py +594 -0
- mlscratch/supervised/__init__.py +76 -0
- mlscratch/supervised/_validation.py +50 -0
- mlscratch/supervised/adaboost.py +255 -0
- mlscratch/supervised/decision_tree.py +495 -0
- mlscratch/supervised/gradient_boosting.py +354 -0
- mlscratch/supervised/knn.py +234 -0
- mlscratch/supervised/lasso_regression.py +125 -0
- mlscratch/supervised/linear_models.py +459 -0
- mlscratch/supervised/linear_regression.py +197 -0
- mlscratch/supervised/logistic_regression.py +119 -0
- mlscratch/supervised/naive_bayes.py +113 -0
- mlscratch/supervised/random_forest.py +321 -0
- mlscratch/supervised/ridge_regression.py +93 -0
- mlscratch/supervised/svm.py +356 -0
- mlscratch/unsupervised/__init__.py +39 -0
- mlscratch/unsupervised/apriori.py +178 -0
- mlscratch/unsupervised/dbscan.py +141 -0
- mlscratch/unsupervised/gmm.py +204 -0
- mlscratch/unsupervised/hierarchical_clustering.py +137 -0
- mlscratch/unsupervised/ica.py +167 -0
- mlscratch/unsupervised/kmeans.py +135 -0
- mlscratch/unsupervised/kmedoids.py +133 -0
- mlscratch/unsupervised/pca.py +103 -0
- mlscratch/unsupervised/tsne.py +200 -0
- scratchkit-0.2.0.dist-info/METADATA +241 -0
- scratchkit-0.2.0.dist-info/RECORD +68 -0
- scratchkit-0.2.0.dist-info/WHEEL +5 -0
- scratchkit-0.2.0.dist-info/entry_points.txt +2 -0
- scratchkit-0.2.0.dist-info/licenses/LICENSE +201 -0
- scratchkit-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,594 @@
|
|
|
1
|
+
"""
|
|
2
|
+
mlscratch.reinforcement.utils
|
|
3
|
+
==============================
|
|
4
|
+
Shared infrastructure used by all RL algorithms:
|
|
5
|
+
|
|
6
|
+
- GridWorld : discrete 4-action environment (tabular RL)
|
|
7
|
+
- ContinuousEnv : continuous state/action environment for deep RL
|
|
8
|
+
- ReplayBuffer : experience replay with uniform random sampling
|
|
9
|
+
- PrioritizedReplayBuffer : sum-tree prioritised experience replay (DQN+)
|
|
10
|
+
- MLP : multi-layer perceptron (forward + backward)
|
|
11
|
+
- OrnsteinUhlenbeckNoise : temporally-correlated exploration noise (DDPG)
|
|
12
|
+
- GaussianNoise : i.i.d. Gaussian exploration noise
|
|
13
|
+
|
|
14
|
+
Only numpy and Python stdlib are used throughout.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
import numpy as np
|
|
19
|
+
from collections import deque
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# ============================================================
|
|
23
|
+
# Environments
|
|
24
|
+
# ============================================================
|
|
25
|
+
|
|
26
|
+
class GridWorld:
|
|
27
|
+
"""
|
|
28
|
+
Simple deterministic grid-world for tabular RL.
|
|
29
|
+
|
|
30
|
+
Layout (default 4×4):
|
|
31
|
+
S . . .
|
|
32
|
+
. # . .
|
|
33
|
+
. . . .
|
|
34
|
+
. . . G
|
|
35
|
+
|
|
36
|
+
S = start (0,0), G = goal (3,3), # = pit (1,1).
|
|
37
|
+
Actions: 0=up, 1=down, 2=left, 3=right.
|
|
38
|
+
Rewards: +10 (goal), -10 (pit), -0.1 (step).
|
|
39
|
+
Episode ends on goal or pit.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
ACTIONS = {0: (-1, 0), 1: (1, 0), 2: (0, -1), 3: (0, 1)}
|
|
43
|
+
N_ACTIONS = 4
|
|
44
|
+
|
|
45
|
+
def __init__(self, size: int = 4, pit: tuple = (1, 1)):
|
|
46
|
+
self.size = size
|
|
47
|
+
self.goal = (size - 1, size - 1)
|
|
48
|
+
self.pit = pit
|
|
49
|
+
self._state: tuple | None = None
|
|
50
|
+
|
|
51
|
+
# ------------------------------------------------------------------
|
|
52
|
+
@property
|
|
53
|
+
def n_states(self) -> int:
|
|
54
|
+
return self.size * self.size
|
|
55
|
+
|
|
56
|
+
def _encode(self, pos: tuple) -> int:
|
|
57
|
+
return pos[0] * self.size + pos[1]
|
|
58
|
+
|
|
59
|
+
def reset(self) -> int:
|
|
60
|
+
self._state = (0, 0)
|
|
61
|
+
return self._encode(self._state)
|
|
62
|
+
|
|
63
|
+
def step(self, action: int) -> tuple[int, float, bool]:
|
|
64
|
+
r, c = self._state
|
|
65
|
+
dr, dc = self.ACTIONS[action]
|
|
66
|
+
nr = max(0, min(self.size - 1, r + dr))
|
|
67
|
+
nc = max(0, min(self.size - 1, c + dc))
|
|
68
|
+
self._state = (nr, nc)
|
|
69
|
+
s = self._encode(self._state)
|
|
70
|
+
|
|
71
|
+
if self._state == self.goal:
|
|
72
|
+
return s, 10.0, True
|
|
73
|
+
if self._state == self.pit:
|
|
74
|
+
return s, -10.0, True
|
|
75
|
+
return s, -0.1, False
|
|
76
|
+
|
|
77
|
+
def render(self) -> str:
|
|
78
|
+
rows = []
|
|
79
|
+
for r in range(self.size):
|
|
80
|
+
row = []
|
|
81
|
+
for c in range(self.size):
|
|
82
|
+
pos = (r, c)
|
|
83
|
+
if pos == self._state:
|
|
84
|
+
row.append("A")
|
|
85
|
+
elif pos == self.goal:
|
|
86
|
+
row.append("G")
|
|
87
|
+
elif pos == self.pit:
|
|
88
|
+
row.append("#")
|
|
89
|
+
else:
|
|
90
|
+
row.append(".")
|
|
91
|
+
rows.append(" ".join(row))
|
|
92
|
+
return "\n".join(rows)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class ContinuousEnv:
|
|
96
|
+
"""
|
|
97
|
+
Lightweight continuous control environment — 1-D point mass.
|
|
98
|
+
|
|
99
|
+
State : [position, velocity] ∈ ℝ²
|
|
100
|
+
Action : force ∈ [-1, 1] (clipped)
|
|
101
|
+
Goal : drive position to 0 with velocity 0.
|
|
102
|
+
|
|
103
|
+
Reward : -(position² + 0.1 velocity² + 0.001 force²)
|
|
104
|
+
Episode terminates after `max_steps` steps.
|
|
105
|
+
|
|
106
|
+
This is a minimal stand-in for MuJoCo-style envs; used for testing
|
|
107
|
+
deep RL algorithms without external dependencies.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
STATE_DIM = 2
|
|
111
|
+
ACTION_DIM = 1
|
|
112
|
+
ACTION_LOW = -1.0
|
|
113
|
+
ACTION_HIGH = 1.0
|
|
114
|
+
|
|
115
|
+
def __init__(self, max_steps: int = 200):
|
|
116
|
+
self.max_steps = max_steps
|
|
117
|
+
self._state: np.ndarray | None = None
|
|
118
|
+
self._t: int = 0
|
|
119
|
+
|
|
120
|
+
def reset(self, rng: np.random.Generator | None = None) -> np.ndarray:
|
|
121
|
+
rng = rng or np.random.default_rng()
|
|
122
|
+
self._state = rng.uniform([-0.5, -0.2], [0.5, 0.2])
|
|
123
|
+
self._t = 0
|
|
124
|
+
return self._state.copy()
|
|
125
|
+
|
|
126
|
+
def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool]:
|
|
127
|
+
a = float(np.clip(np.asarray(action).ravel()[0], self.ACTION_LOW, self.ACTION_HIGH))
|
|
128
|
+
pos, vel = self._state
|
|
129
|
+
# Simple Euler integration: m=1, drag=0.1
|
|
130
|
+
acc = a - 0.1 * vel
|
|
131
|
+
vel = vel + 0.05 * acc
|
|
132
|
+
pos = pos + 0.05 * vel
|
|
133
|
+
pos = float(np.clip(pos, -2.0, 2.0))
|
|
134
|
+
vel = float(np.clip(vel, -2.0, 2.0))
|
|
135
|
+
self._state = np.array([pos, vel])
|
|
136
|
+
reward = -(pos**2 + 0.1 * vel**2 + 0.001 * a**2)
|
|
137
|
+
self._t += 1
|
|
138
|
+
done = self._t >= self.max_steps
|
|
139
|
+
return self._state.copy(), reward, done
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def state_dim(self) -> int:
|
|
143
|
+
return self.STATE_DIM
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def action_dim(self) -> int:
|
|
147
|
+
return self.ACTION_DIM
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class DiscreteEnv:
|
|
151
|
+
"""
|
|
152
|
+
Discrete-action wrapper around ContinuousEnv for DQN testing.
|
|
153
|
+
Actions: {−1.0, −0.5, 0.0, +0.5, +1.0}
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
DISCRETE_ACTIONS = np.array([-1.0, -0.5, 0.0, 0.5, 1.0])
|
|
157
|
+
N_ACTIONS = 5
|
|
158
|
+
STATE_DIM = 2
|
|
159
|
+
|
|
160
|
+
def __init__(self, max_steps: int = 200):
|
|
161
|
+
self._env = ContinuousEnv(max_steps)
|
|
162
|
+
|
|
163
|
+
def reset(self, rng=None) -> np.ndarray:
|
|
164
|
+
return self._env.reset(rng)
|
|
165
|
+
|
|
166
|
+
def step(self, action_idx: int) -> tuple[np.ndarray, float, bool]:
|
|
167
|
+
a = self.DISCRETE_ACTIONS[action_idx]
|
|
168
|
+
return self._env.step(np.array([a]))
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def state_dim(self) -> int:
|
|
172
|
+
return self.STATE_DIM
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# ============================================================
|
|
176
|
+
# Replay Buffers
|
|
177
|
+
# ============================================================
|
|
178
|
+
|
|
179
|
+
class ReplayBuffer:
|
|
180
|
+
"""
|
|
181
|
+
Circular experience replay buffer for off-policy algorithms.
|
|
182
|
+
|
|
183
|
+
Stores (state, action, reward, next_state, done) tuples.
|
|
184
|
+
Sampling is uniform random (no priorities).
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
def __init__(self, capacity: int):
|
|
188
|
+
self.capacity = capacity
|
|
189
|
+
self._buf: deque = deque(maxlen=capacity)
|
|
190
|
+
|
|
191
|
+
def push(
|
|
192
|
+
self,
|
|
193
|
+
state: np.ndarray,
|
|
194
|
+
action,
|
|
195
|
+
reward: float,
|
|
196
|
+
next_state: np.ndarray,
|
|
197
|
+
done: bool,
|
|
198
|
+
) -> None:
|
|
199
|
+
self._buf.append((
|
|
200
|
+
np.array(state, dtype=np.float32),
|
|
201
|
+
np.array(action, dtype=np.float32),
|
|
202
|
+
float(reward),
|
|
203
|
+
np.array(next_state, dtype=np.float32),
|
|
204
|
+
float(done),
|
|
205
|
+
))
|
|
206
|
+
|
|
207
|
+
def sample(self, batch_size: int, rng: np.random.Generator | None = None
|
|
208
|
+
) -> tuple:
|
|
209
|
+
rng = rng or np.random.default_rng()
|
|
210
|
+
indices = rng.choice(len(self._buf), size=batch_size, replace=False)
|
|
211
|
+
batch = [self._buf[i] for i in indices]
|
|
212
|
+
states, actions, rewards, next_states, dones = zip(*batch)
|
|
213
|
+
return (
|
|
214
|
+
np.stack(states),
|
|
215
|
+
np.stack(actions),
|
|
216
|
+
np.array(rewards, dtype=np.float32),
|
|
217
|
+
np.stack(next_states),
|
|
218
|
+
np.array(dones, dtype=np.float32),
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
def __len__(self) -> int:
|
|
222
|
+
return len(self._buf)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
class _SumTree:
|
|
226
|
+
"""Binary sum tree for O(log n) priority sampling."""
|
|
227
|
+
|
|
228
|
+
def __init__(self, capacity: int):
|
|
229
|
+
self.capacity = capacity
|
|
230
|
+
self.tree = np.zeros(2 * capacity)
|
|
231
|
+
self.data: list = [None] * capacity
|
|
232
|
+
self._ptr = 0
|
|
233
|
+
self._size = 0
|
|
234
|
+
|
|
235
|
+
def _propagate(self, idx: int, delta: float) -> None:
|
|
236
|
+
parent = (idx - 1) // 2
|
|
237
|
+
self.tree[parent] += delta
|
|
238
|
+
if parent != 0:
|
|
239
|
+
self._propagate(parent, delta)
|
|
240
|
+
|
|
241
|
+
def update(self, idx: int, priority: float) -> None:
|
|
242
|
+
leaf = idx + self.capacity - 1
|
|
243
|
+
delta = priority - self.tree[leaf]
|
|
244
|
+
self.tree[leaf] = priority
|
|
245
|
+
self._propagate(leaf, delta)
|
|
246
|
+
|
|
247
|
+
def add(self, priority: float, data) -> None:
|
|
248
|
+
self.data[self._ptr] = data
|
|
249
|
+
self.update(self._ptr, priority)
|
|
250
|
+
self._ptr = (self._ptr + 1) % self.capacity
|
|
251
|
+
self._size = min(self._size + 1, self.capacity)
|
|
252
|
+
|
|
253
|
+
def _retrieve(self, idx: int, s: float) -> int:
|
|
254
|
+
left = 2 * idx + 1
|
|
255
|
+
right = 2 * idx + 2
|
|
256
|
+
if left >= len(self.tree):
|
|
257
|
+
return idx
|
|
258
|
+
if s <= self.tree[left]:
|
|
259
|
+
return self._retrieve(left, s)
|
|
260
|
+
return self._retrieve(right, s - self.tree[left])
|
|
261
|
+
|
|
262
|
+
def get(self, s: float) -> tuple[int, float, object]:
|
|
263
|
+
leaf = self._retrieve(0, s)
|
|
264
|
+
data_idx = leaf - self.capacity + 1
|
|
265
|
+
return data_idx, self.tree[leaf], self.data[data_idx]
|
|
266
|
+
|
|
267
|
+
@property
|
|
268
|
+
def total(self) -> float:
|
|
269
|
+
return float(self.tree[0])
|
|
270
|
+
|
|
271
|
+
def __len__(self) -> int:
|
|
272
|
+
return self._size
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class PrioritizedReplayBuffer:
|
|
276
|
+
"""
|
|
277
|
+
Proportional Prioritised Experience Replay (Schaul et al., 2015).
|
|
278
|
+
|
|
279
|
+
Parameters
|
|
280
|
+
----------
|
|
281
|
+
capacity : int
|
|
282
|
+
alpha : float priority exponent (0 = uniform, 1 = full priority)
|
|
283
|
+
beta : float IS-weight exponent (0 = no correction, 1 = full)
|
|
284
|
+
beta_increment : float anneal beta toward 1 each sample call
|
|
285
|
+
eps : float small constant added to |TD-error| for stability
|
|
286
|
+
"""
|
|
287
|
+
|
|
288
|
+
def __init__(
|
|
289
|
+
self,
|
|
290
|
+
capacity: int,
|
|
291
|
+
alpha: float = 0.6,
|
|
292
|
+
beta: float = 0.4,
|
|
293
|
+
beta_increment: float = 1e-4,
|
|
294
|
+
eps: float = 1e-5,
|
|
295
|
+
):
|
|
296
|
+
self.alpha = alpha
|
|
297
|
+
self.beta = beta
|
|
298
|
+
self.beta_increment = beta_increment
|
|
299
|
+
self.eps = eps
|
|
300
|
+
self._tree = _SumTree(capacity)
|
|
301
|
+
|
|
302
|
+
def push(self, state, action, reward, next_state, done) -> None:
|
|
303
|
+
# New transitions get max current priority (greedy)
|
|
304
|
+
if self._tree._size == 0:
|
|
305
|
+
max_p = 1.0
|
|
306
|
+
else:
|
|
307
|
+
leaf_start = self._tree.capacity - 1
|
|
308
|
+
leaf_end = leaf_start + self._tree._size
|
|
309
|
+
max_p = float(self._tree.tree[leaf_start:leaf_end].max())
|
|
310
|
+
if max_p == 0:
|
|
311
|
+
max_p = 1.0
|
|
312
|
+
self._tree.add(max_p, (
|
|
313
|
+
np.array(state, dtype=np.float32),
|
|
314
|
+
np.array(action, dtype=np.float32),
|
|
315
|
+
float(reward),
|
|
316
|
+
np.array(next_state, dtype=np.float32),
|
|
317
|
+
float(done),
|
|
318
|
+
))
|
|
319
|
+
|
|
320
|
+
def sample(self, batch_size: int, rng: np.random.Generator | None = None
|
|
321
|
+
) -> tuple:
|
|
322
|
+
rng = rng or np.random.default_rng()
|
|
323
|
+
n = len(self._tree)
|
|
324
|
+
segment = self._tree.total / batch_size
|
|
325
|
+
|
|
326
|
+
idxs, priorities, transitions = [], [], []
|
|
327
|
+
for i in range(batch_size):
|
|
328
|
+
s = rng.uniform(segment * i, segment * (i + 1))
|
|
329
|
+
idx, p, data = self._tree.get(s)
|
|
330
|
+
idxs.append(idx)
|
|
331
|
+
priorities.append(p)
|
|
332
|
+
transitions.append(data)
|
|
333
|
+
|
|
334
|
+
# IS weights
|
|
335
|
+
probs = np.array(priorities) / self._tree.total
|
|
336
|
+
weights = (n * probs) ** (-self.beta)
|
|
337
|
+
weights /= weights.max()
|
|
338
|
+
self.beta = min(1.0, self.beta + self.beta_increment)
|
|
339
|
+
|
|
340
|
+
states, actions, rewards, next_states, dones = zip(*transitions)
|
|
341
|
+
return (
|
|
342
|
+
np.stack(states),
|
|
343
|
+
np.stack(actions),
|
|
344
|
+
np.array(rewards, dtype=np.float32),
|
|
345
|
+
np.stack(next_states),
|
|
346
|
+
np.array(dones, dtype=np.float32),
|
|
347
|
+
np.array(weights, dtype=np.float32),
|
|
348
|
+
np.array(idxs),
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
def update_priorities(self, idxs: np.ndarray, td_errors: np.ndarray) -> None:
|
|
352
|
+
priorities = (np.abs(td_errors) + self.eps) ** self.alpha
|
|
353
|
+
for idx, p in zip(idxs, priorities):
|
|
354
|
+
self._tree.update(int(idx), float(p))
|
|
355
|
+
|
|
356
|
+
def __len__(self) -> int:
|
|
357
|
+
return len(self._tree)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
# ============================================================
|
|
361
|
+
# Neural Network (pure numpy MLP with backprop)
|
|
362
|
+
# ============================================================
|
|
363
|
+
|
|
364
|
+
def _relu(x): return np.maximum(0.0, x)
|
|
365
|
+
def _relu_d(x): return (x > 0).astype(float)
|
|
366
|
+
def _tanh(x): return np.tanh(x)
|
|
367
|
+
def _tanh_d(x): return 1.0 - np.tanh(x) ** 2
|
|
368
|
+
def _sigmoid(x): return 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
|
|
369
|
+
def _linear(x): return x
|
|
370
|
+
def _linear_d(x): return np.ones_like(x)
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
class MLP:
|
|
374
|
+
"""
|
|
375
|
+
Multi-layer perceptron with configurable architecture.
|
|
376
|
+
|
|
377
|
+
Supports:
|
|
378
|
+
- Arbitrary depth / width
|
|
379
|
+
- ReLU hidden activations, configurable output activation
|
|
380
|
+
- Mini-batch gradient descent with Adam optimiser
|
|
381
|
+
- Soft / hard target-network parameter copy
|
|
382
|
+
|
|
383
|
+
Parameters
|
|
384
|
+
----------
|
|
385
|
+
layer_sizes : list[int] e.g. [state_dim, 256, 256, action_dim]
|
|
386
|
+
output_activation : str 'linear' | 'tanh' | 'sigmoid'
|
|
387
|
+
lr : float
|
|
388
|
+
"""
|
|
389
|
+
|
|
390
|
+
def __init__(
|
|
391
|
+
self,
|
|
392
|
+
layer_sizes: list[int],
|
|
393
|
+
output_activation: str = "linear",
|
|
394
|
+
lr: float = 1e-3,
|
|
395
|
+
random_state: int | None = None,
|
|
396
|
+
):
|
|
397
|
+
rng = np.random.default_rng(random_state)
|
|
398
|
+
self.layer_sizes = layer_sizes
|
|
399
|
+
self.lr = lr
|
|
400
|
+
|
|
401
|
+
# Weight initialisation (He for ReLU)
|
|
402
|
+
self.W: list[np.ndarray] = []
|
|
403
|
+
self.b: list[np.ndarray] = []
|
|
404
|
+
for i in range(len(layer_sizes) - 1):
|
|
405
|
+
fan_in = layer_sizes[i]
|
|
406
|
+
scale = np.sqrt(2.0 / fan_in)
|
|
407
|
+
self.W.append(rng.normal(0, scale, (fan_in, layer_sizes[i + 1])))
|
|
408
|
+
self.b.append(np.zeros(layer_sizes[i + 1]))
|
|
409
|
+
|
|
410
|
+
# Adam moments
|
|
411
|
+
self.mW = [np.zeros_like(w) for w in self.W]
|
|
412
|
+
self.vW = [np.zeros_like(w) for w in self.W]
|
|
413
|
+
self.mb = [np.zeros_like(b) for b in self.b]
|
|
414
|
+
self.vb = [np.zeros_like(b) for b in self.b]
|
|
415
|
+
self._t = 0 # Adam time step
|
|
416
|
+
|
|
417
|
+
out_acts = {
|
|
418
|
+
"linear": (_linear, _linear_d),
|
|
419
|
+
"tanh": (_tanh, _tanh_d),
|
|
420
|
+
"sigmoid": (_sigmoid, None),
|
|
421
|
+
}
|
|
422
|
+
self._out_act, self._out_act_d = out_acts[output_activation]
|
|
423
|
+
self._hidden_act, self._hidden_act_d = _relu, _relu_d
|
|
424
|
+
|
|
425
|
+
# Cache for backprop
|
|
426
|
+
self._cache: dict = {}
|
|
427
|
+
|
|
428
|
+
def forward(self, x: np.ndarray, training: bool = False) -> np.ndarray:
|
|
429
|
+
"""
|
|
430
|
+
x : (batch, in_dim) or (in_dim,) for single sample
|
|
431
|
+
Returns output of shape (batch, out_dim) or (out_dim,).
|
|
432
|
+
"""
|
|
433
|
+
scalar = x.ndim == 1
|
|
434
|
+
if scalar:
|
|
435
|
+
x = x[np.newaxis, :]
|
|
436
|
+
|
|
437
|
+
a = x
|
|
438
|
+
if training:
|
|
439
|
+
self._cache = {"a": [a]}
|
|
440
|
+
for i, (W, b) in enumerate(zip(self.W, self.b)):
|
|
441
|
+
z = a @ W + b
|
|
442
|
+
if i < len(self.W) - 1:
|
|
443
|
+
a = self._hidden_act(z)
|
|
444
|
+
if training:
|
|
445
|
+
self._cache.setdefault("z", []).append(z)
|
|
446
|
+
else:
|
|
447
|
+
a = self._out_act(z)
|
|
448
|
+
if training:
|
|
449
|
+
self._cache.setdefault("z", []).append(z)
|
|
450
|
+
if training:
|
|
451
|
+
self._cache["a"].append(a)
|
|
452
|
+
|
|
453
|
+
return a[0] if scalar else a
|
|
454
|
+
|
|
455
|
+
def backward(self, d_out: np.ndarray) -> None:
|
|
456
|
+
"""
|
|
457
|
+
Compute gradients and apply Adam update.
|
|
458
|
+
d_out : (batch, out_dim) — gradient of loss w.r.t. network output.
|
|
459
|
+
"""
|
|
460
|
+
if d_out.ndim == 1:
|
|
461
|
+
d_out = d_out[np.newaxis, :]
|
|
462
|
+
n = d_out.shape[0]
|
|
463
|
+
self._t += 1
|
|
464
|
+
beta1, beta2, eps = 0.9, 0.999, 1e-8
|
|
465
|
+
|
|
466
|
+
# Output layer delta
|
|
467
|
+
z_out = self._cache["z"][-1]
|
|
468
|
+
if self._out_act_d is not None:
|
|
469
|
+
delta = d_out * self._out_act_d(z_out)
|
|
470
|
+
else:
|
|
471
|
+
delta = d_out # linear pass-through for sigmoid (handled externally)
|
|
472
|
+
|
|
473
|
+
for i in reversed(range(len(self.W))):
|
|
474
|
+
a_prev = self._cache["a"][i]
|
|
475
|
+
gW = a_prev.T @ delta / n
|
|
476
|
+
gb = delta.mean(axis=0)
|
|
477
|
+
|
|
478
|
+
# Adam
|
|
479
|
+
self.mW[i] = beta1 * self.mW[i] + (1 - beta1) * gW
|
|
480
|
+
self.vW[i] = beta2 * self.vW[i] + (1 - beta2) * gW ** 2
|
|
481
|
+
self.mb[i] = beta1 * self.mb[i] + (1 - beta1) * gb
|
|
482
|
+
self.vb[i] = beta2 * self.vb[i] + (1 - beta2) * gb ** 2
|
|
483
|
+
|
|
484
|
+
mW_hat = self.mW[i] / (1 - beta1 ** self._t)
|
|
485
|
+
vW_hat = self.vW[i] / (1 - beta2 ** self._t)
|
|
486
|
+
mb_hat = self.mb[i] / (1 - beta1 ** self._t)
|
|
487
|
+
vb_hat = self.vb[i] / (1 - beta2 ** self._t)
|
|
488
|
+
|
|
489
|
+
self.W[i] -= self.lr * mW_hat / (np.sqrt(vW_hat) + eps)
|
|
490
|
+
self.b[i] -= self.lr * mb_hat / (np.sqrt(vb_hat) + eps)
|
|
491
|
+
|
|
492
|
+
if i > 0:
|
|
493
|
+
delta = (delta @ self.W[i].T) * self._hidden_act_d(
|
|
494
|
+
self._cache["z"][i - 1]
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
def soft_update(self, target: "MLP", tau: float) -> None:
|
|
498
|
+
"""θ_target ← τ θ_online + (1-τ) θ_target"""
|
|
499
|
+
for w_s, w_t in zip(self.W, target.W):
|
|
500
|
+
w_t[:] = tau * w_s + (1 - tau) * w_t
|
|
501
|
+
for b_s, b_t in zip(self.b, target.b):
|
|
502
|
+
b_t[:] = tau * b_s + (1 - tau) * b_t
|
|
503
|
+
|
|
504
|
+
def hard_update(self, target: "MLP") -> None:
|
|
505
|
+
"""θ_target ← θ_online"""
|
|
506
|
+
for w_s, w_t in zip(self.W, target.W):
|
|
507
|
+
w_t[:] = w_s.copy()
|
|
508
|
+
for b_s, b_t in zip(self.b, target.b):
|
|
509
|
+
b_t[:] = b_s.copy()
|
|
510
|
+
|
|
511
|
+
def copy_weights_from(self, source: "MLP") -> None:
|
|
512
|
+
"""Copy weights from another MLP of identical architecture."""
|
|
513
|
+
for i in range(len(self.W)):
|
|
514
|
+
self.W[i] = source.W[i].copy()
|
|
515
|
+
self.b[i] = source.b[i].copy()
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
# ============================================================
|
|
519
|
+
# Exploration Noise
|
|
520
|
+
# ============================================================
|
|
521
|
+
|
|
522
|
+
class OrnsteinUhlenbeckNoise:
|
|
523
|
+
"""
|
|
524
|
+
Ornstein-Uhlenbeck process for temporally correlated exploration.
|
|
525
|
+
|
|
526
|
+
dx_t = θ(μ - x_t)dt + σ dW_t
|
|
527
|
+
|
|
528
|
+
Parameters
|
|
529
|
+
----------
|
|
530
|
+
size : int
|
|
531
|
+
mu : float long-run mean
|
|
532
|
+
theta : float mean reversion rate
|
|
533
|
+
sigma : float noise scale
|
|
534
|
+
dt : float time step
|
|
535
|
+
"""
|
|
536
|
+
|
|
537
|
+
def __init__(
|
|
538
|
+
self,
|
|
539
|
+
size: int,
|
|
540
|
+
mu: float = 0.0,
|
|
541
|
+
theta: float = 0.15,
|
|
542
|
+
sigma: float = 0.2,
|
|
543
|
+
dt: float = 1e-2,
|
|
544
|
+
random_state: int | None = None,
|
|
545
|
+
):
|
|
546
|
+
self.mu = np.full(size, mu)
|
|
547
|
+
self.theta = theta
|
|
548
|
+
self.sigma = sigma
|
|
549
|
+
self.dt = dt
|
|
550
|
+
self._rng = np.random.default_rng(random_state)
|
|
551
|
+
self.reset()
|
|
552
|
+
|
|
553
|
+
def reset(self) -> None:
|
|
554
|
+
self.x = self.mu.copy()
|
|
555
|
+
|
|
556
|
+
def sample(self) -> np.ndarray:
|
|
557
|
+
dx = (
|
|
558
|
+
self.theta * (self.mu - self.x) * self.dt
|
|
559
|
+
+ self.sigma * np.sqrt(self.dt) * self._rng.standard_normal(self.mu.shape)
|
|
560
|
+
)
|
|
561
|
+
self.x = self.x + dx
|
|
562
|
+
return self.x.copy()
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
class GaussianNoise:
|
|
566
|
+
"""
|
|
567
|
+
i.i.d. Gaussian exploration noise with optional decay.
|
|
568
|
+
|
|
569
|
+
Parameters
|
|
570
|
+
----------
|
|
571
|
+
size : int
|
|
572
|
+
sigma : float initial std
|
|
573
|
+
sigma_min : float minimum std after decay
|
|
574
|
+
decay : float multiplicative decay per call to sample()
|
|
575
|
+
"""
|
|
576
|
+
|
|
577
|
+
def __init__(
|
|
578
|
+
self,
|
|
579
|
+
size: int,
|
|
580
|
+
sigma: float = 0.1,
|
|
581
|
+
sigma_min: float = 0.01,
|
|
582
|
+
decay: float = 1.0,
|
|
583
|
+
random_state: int | None = None,
|
|
584
|
+
):
|
|
585
|
+
self.size = size
|
|
586
|
+
self.sigma = sigma
|
|
587
|
+
self.sigma_min = sigma_min
|
|
588
|
+
self.decay = decay
|
|
589
|
+
self._rng = np.random.default_rng(random_state)
|
|
590
|
+
|
|
591
|
+
def sample(self) -> np.ndarray:
|
|
592
|
+
noise = self._rng.normal(0, self.sigma, self.size)
|
|
593
|
+
self.sigma = max(self.sigma_min, self.sigma * self.decay)
|
|
594
|
+
return noise
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
mlscratch.supervised
|
|
3
|
+
====================
|
|
4
|
+
Supervised learning algorithms, implemented from scratch in pure numpy.
|
|
5
|
+
|
|
6
|
+
Linear models
|
|
7
|
+
-------------
|
|
8
|
+
LinearRegression, RidgeRegression, LassoRegression, ElasticNet,
|
|
9
|
+
LogisticRegression
|
|
10
|
+
|
|
11
|
+
Instance-based
|
|
12
|
+
--------------
|
|
13
|
+
KNeighboursClassifier, KNeighboursRegressor
|
|
14
|
+
|
|
15
|
+
Tree-based
|
|
16
|
+
----------
|
|
17
|
+
DecisionTreeClassifier, DecisionTreeRegressor
|
|
18
|
+
|
|
19
|
+
Ensembles
|
|
20
|
+
---------
|
|
21
|
+
RandomForestClassifier, RandomForestRegressor
|
|
22
|
+
GradientBoostingClassifier, GradientBoostingRegressor
|
|
23
|
+
AdaBoostClassifier
|
|
24
|
+
|
|
25
|
+
Kernel methods
|
|
26
|
+
--------------
|
|
27
|
+
SVC — kernel Support Vector Classifier (linear / poly / rbf / sigmoid),
|
|
28
|
+
trained via Sequential Minimal Optimization.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from .adaboost import AdaBoostClassifier # noqa: F401
|
|
32
|
+
from .decision_tree import DecisionTreeClassifier, DecisionTreeRegressor # noqa: F401
|
|
33
|
+
from .gradient_boosting import ( # noqa: F401
|
|
34
|
+
GradientBoostingClassifier,
|
|
35
|
+
GradientBoostingRegressor,
|
|
36
|
+
)
|
|
37
|
+
from .knn import ( # noqa: F401
|
|
38
|
+
KNeighborsClassifier,
|
|
39
|
+
KNeighborsRegressor,
|
|
40
|
+
KNeighboursClassifier,
|
|
41
|
+
KNeighboursRegressor,
|
|
42
|
+
)
|
|
43
|
+
from .linear_models import ( # noqa: F401
|
|
44
|
+
ElasticNet,
|
|
45
|
+
LassoRegression,
|
|
46
|
+
LinearRegression,
|
|
47
|
+
LogisticRegression,
|
|
48
|
+
RidgeRegression,
|
|
49
|
+
)
|
|
50
|
+
from .random_forest import RandomForestClassifier, RandomForestRegressor # noqa: F401
|
|
51
|
+
from .svm import SVC # noqa: F401
|
|
52
|
+
|
|
53
|
+
__all__ = [
|
|
54
|
+
# Linear models
|
|
55
|
+
"LinearRegression",
|
|
56
|
+
"RidgeRegression",
|
|
57
|
+
"LassoRegression",
|
|
58
|
+
"ElasticNet",
|
|
59
|
+
"LogisticRegression",
|
|
60
|
+
# Instance-based
|
|
61
|
+
"KNeighboursClassifier",
|
|
62
|
+
"KNeighboursRegressor",
|
|
63
|
+
"KNeighborsClassifier",
|
|
64
|
+
"KNeighborsRegressor",
|
|
65
|
+
# Tree-based
|
|
66
|
+
"DecisionTreeClassifier",
|
|
67
|
+
"DecisionTreeRegressor",
|
|
68
|
+
# Ensembles
|
|
69
|
+
"RandomForestClassifier",
|
|
70
|
+
"RandomForestRegressor",
|
|
71
|
+
"GradientBoostingClassifier",
|
|
72
|
+
"GradientBoostingRegressor",
|
|
73
|
+
"AdaBoostClassifier",
|
|
74
|
+
# Kernel methods
|
|
75
|
+
"SVC",
|
|
76
|
+
]
|