scratchkit 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. mlscratch/__init__.py +56 -0
  2. mlscratch/__main__.py +118 -0
  3. mlscratch/bayesian/__init__.py +53 -0
  4. mlscratch/bayesian/bayesian_linear_regression.py +171 -0
  5. mlscratch/bayesian/bayesian_network.py +248 -0
  6. mlscratch/bayesian/bayesian_nn.py +315 -0
  7. mlscratch/bayesian/gaussian_process.py +207 -0
  8. mlscratch/bayesian/hmm.py +277 -0
  9. mlscratch/bayesian/init.py +52 -0
  10. mlscratch/bayesian/kalman_filter.py +182 -0
  11. mlscratch/bayesian/naive_bayes.py +209 -0
  12. mlscratch/metrics/__init__.py +59 -0
  13. mlscratch/metrics/classification.py +365 -0
  14. mlscratch/metrics/regression.py +79 -0
  15. mlscratch/neural/__init__.py +121 -0
  16. mlscratch/neural/attention.py +420 -0
  17. mlscratch/neural/autoencoder.py +543 -0
  18. mlscratch/neural/boltzmann.py +231 -0
  19. mlscratch/neural/cnn.py +593 -0
  20. mlscratch/neural/cvnn.py +322 -0
  21. mlscratch/neural/gan.py +364 -0
  22. mlscratch/neural/hopfield.py +193 -0
  23. mlscratch/neural/perceptron.py +398 -0
  24. mlscratch/neural/rbf_network.py +230 -0
  25. mlscratch/neural/recurrent.py +569 -0
  26. mlscratch/preprocessing/__init__.py +38 -0
  27. mlscratch/preprocessing/encoders.py +140 -0
  28. mlscratch/preprocessing/model_selection.py +119 -0
  29. mlscratch/preprocessing/polynomial.py +105 -0
  30. mlscratch/preprocessing/scalers.py +220 -0
  31. mlscratch/py.typed +0 -0
  32. mlscratch/reinforcement/__init__.py +59 -0
  33. mlscratch/reinforcement/ddpg.py +363 -0
  34. mlscratch/reinforcement/dqn.py +319 -0
  35. mlscratch/reinforcement/ppo.py +452 -0
  36. mlscratch/reinforcement/q_learning.py +352 -0
  37. mlscratch/reinforcement/sac.py +382 -0
  38. mlscratch/reinforcement/utils.py +594 -0
  39. mlscratch/supervised/__init__.py +76 -0
  40. mlscratch/supervised/_validation.py +50 -0
  41. mlscratch/supervised/adaboost.py +255 -0
  42. mlscratch/supervised/decision_tree.py +495 -0
  43. mlscratch/supervised/gradient_boosting.py +354 -0
  44. mlscratch/supervised/knn.py +234 -0
  45. mlscratch/supervised/lasso_regression.py +125 -0
  46. mlscratch/supervised/linear_models.py +459 -0
  47. mlscratch/supervised/linear_regression.py +197 -0
  48. mlscratch/supervised/logistic_regression.py +119 -0
  49. mlscratch/supervised/naive_bayes.py +113 -0
  50. mlscratch/supervised/random_forest.py +321 -0
  51. mlscratch/supervised/ridge_regression.py +93 -0
  52. mlscratch/supervised/svm.py +356 -0
  53. mlscratch/unsupervised/__init__.py +39 -0
  54. mlscratch/unsupervised/apriori.py +178 -0
  55. mlscratch/unsupervised/dbscan.py +141 -0
  56. mlscratch/unsupervised/gmm.py +204 -0
  57. mlscratch/unsupervised/hierarchical_clustering.py +137 -0
  58. mlscratch/unsupervised/ica.py +167 -0
  59. mlscratch/unsupervised/kmeans.py +135 -0
  60. mlscratch/unsupervised/kmedoids.py +133 -0
  61. mlscratch/unsupervised/pca.py +103 -0
  62. mlscratch/unsupervised/tsne.py +200 -0
  63. scratchkit-0.2.0.dist-info/METADATA +241 -0
  64. scratchkit-0.2.0.dist-info/RECORD +68 -0
  65. scratchkit-0.2.0.dist-info/WHEEL +5 -0
  66. scratchkit-0.2.0.dist-info/entry_points.txt +2 -0
  67. scratchkit-0.2.0.dist-info/licenses/LICENSE +201 -0
  68. scratchkit-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,593 @@
1
+ """
2
+ Convolutional Neural Network (CNN)
3
+ ====================================
4
+ Building blocks for 2-D spatial feature extraction:
5
+
6
+ Conv2D — learnable filters, valid convolution, forward + backward
7
+ MaxPool2D — spatial downsampling, forward + backward (max-index mask)
8
+ AvgPool2D — average pooling, forward + backward
9
+ BatchNorm2D — channel-wise normalisation with learnable γ, β
10
+ Flatten — reshape (B, C, H, W) → (B, C*H*W)
11
+ Dense — fully-connected layer with optional activation
12
+ SimpleCNN — pre-wired model for quick experiments
13
+
14
+ All layers expose ``forward(x)`` / ``backward(grad)`` with weight
15
+ updates performed in-place, matching the style of the repo.
16
+
17
+ Reference
18
+ ----------
19
+ LeCun et al. (1998). Gradient-based learning applied to document
20
+ recognition. Proceedings of the IEEE, 86(11), 2278–2324.
21
+
22
+ Only numpy is used (no PIL / scipy dependency in this file).
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import numpy as np
28
+
29
+
30
+ # ============================================================
31
+ # Helper activations
32
+ # ============================================================
33
+
34
+ def _relu(x: np.ndarray) -> np.ndarray:
35
+ return np.maximum(0.0, x)
36
+
37
+
38
+ def _relu_grad(x: np.ndarray) -> np.ndarray:
39
+ return (x > 0).astype(float)
40
+
41
+
42
+ def _softmax(x: np.ndarray) -> np.ndarray:
43
+ e = np.exp(x - x.max(axis=-1, keepdims=True))
44
+ return e / e.sum(axis=-1, keepdims=True)
45
+
46
+
47
+ # ============================================================
48
+ # Conv2D
49
+ # ============================================================
50
+
51
+ class Conv2D:
52
+ """
53
+ 2-D Convolutional layer (valid padding, stride=1).
54
+
55
+ Parameters
56
+ ----------
57
+ in_channels : int
58
+ out_channels : int
59
+ kernel_size : int
60
+ Square kernel side length.
61
+ learning_rate : float
62
+ random_state : int or None
63
+ """
64
+
65
+ def __init__(
66
+ self,
67
+ in_channels: int,
68
+ out_channels: int,
69
+ kernel_size: int,
70
+ learning_rate: float = 1e-3,
71
+ random_state: int | None = None,
72
+ ) -> None:
73
+ self.in_channels = in_channels
74
+ self.out_channels = out_channels
75
+ self.kernel_size = kernel_size
76
+ self.learning_rate = learning_rate
77
+
78
+ rng = np.random.default_rng(random_state)
79
+ scale = np.sqrt(2.0 / (in_channels * kernel_size * kernel_size))
80
+ # Shape: (out_channels, in_channels, kH, kW)
81
+ self.weights = rng.normal(0, scale,
82
+ (out_channels, in_channels, kernel_size, kernel_size))
83
+ self.bias = np.zeros(out_channels)
84
+
85
+ self._cache: dict = {}
86
+
87
+ # ------------------------------------------------------------------
88
+ # Forward
89
+ # ------------------------------------------------------------------
90
+
91
+ def forward(self, x: np.ndarray) -> np.ndarray:
92
+ """
93
+ Parameters
94
+ ----------
95
+ x : (B, C_in, H, W)
96
+
97
+ Returns
98
+ -------
99
+ out : (B, C_out, H_out, W_out)
100
+ """
101
+ B, C_in, H, W = x.shape
102
+ K = self.kernel_size
103
+ H_out = H - K + 1
104
+ W_out = W - K + 1
105
+ C_out = self.out_channels
106
+
107
+ # im2col: extract every K×K patch
108
+ # col shape: (B, C_in*K*K, H_out*W_out)
109
+ col = self._im2col(x, K, H_out, W_out) # (B, C_in*K², H_out*W_out)
110
+ W_flat = self.weights.reshape(C_out, -1) # (C_out, C_in*K²)
111
+
112
+ # (B, C_out, H_out*W_out) → (B, C_out, H_out, W_out)
113
+ out = (W_flat @ col).reshape(B, C_out, H_out, W_out)
114
+ out += self.bias.reshape(1, -1, 1, 1)
115
+
116
+ self._cache = {"x": x, "col": col, "H_out": H_out, "W_out": W_out}
117
+ return out
118
+
119
+ # ------------------------------------------------------------------
120
+ # Backward
121
+ # ------------------------------------------------------------------
122
+
123
+ def backward(self, d_out: np.ndarray) -> np.ndarray:
124
+ """
125
+ Parameters
126
+ ----------
127
+ d_out : (B, C_out, H_out, W_out)
128
+
129
+ Returns
130
+ -------
131
+ d_x : (B, C_in, H, W)
132
+ """
133
+ x, col = self._cache["x"], self._cache["col"]
134
+ B, C_in, H, W = x.shape
135
+ K = self.kernel_size
136
+ C_out = self.out_channels
137
+ H_out, W_out = self._cache["H_out"], self._cache["W_out"]
138
+
139
+ W_flat = self.weights.reshape(C_out, -1) # (C_out, C_in*K²)
140
+
141
+ # d_out: (B, C_out, H_out, W_out) → (B, C_out, H_out*W_out)
142
+ d_out_flat = d_out.reshape(B, C_out, -1)
143
+
144
+ # Gradient w.r.t. weights: sum over batch and spatial
145
+ d_W_flat = np.einsum("bci,bki->ck", d_out_flat, col) / B # (C_out, C_in*K²)
146
+ d_B = d_out_flat.mean(axis=(0, 2))
147
+
148
+ # Gradient w.r.t. col (input patches)
149
+ d_col = np.einsum("ck,bci->bki", W_flat, d_out_flat) # (B, C_in*K², H_out*W_out)
150
+
151
+ # col2im
152
+ d_x = self._col2im(d_col, x.shape, K, H_out, W_out)
153
+
154
+ self.weights -= self.learning_rate * d_W_flat.reshape(self.weights.shape)
155
+ self.bias -= self.learning_rate * d_B
156
+
157
+ return d_x
158
+
159
+ # ------------------------------------------------------------------
160
+ # im2col / col2im helpers
161
+ # ------------------------------------------------------------------
162
+
163
+ @staticmethod
164
+ def _im2col(
165
+ x: np.ndarray, K: int, H_out: int, W_out: int
166
+ ) -> np.ndarray:
167
+ B, C, H, W = x.shape
168
+ col = np.zeros((B, C * K * K, H_out * W_out))
169
+ idx = 0
170
+ for i in range(H_out):
171
+ for j in range(W_out):
172
+ patch = x[:, :, i:i+K, j:j+K] # (B, C, K, K)
173
+ col[:, :, idx] = patch.reshape(B, -1)
174
+ idx += 1
175
+ return col
176
+
177
+ @staticmethod
178
+ def _col2im(
179
+ d_col: np.ndarray, x_shape: tuple,
180
+ K: int, H_out: int, W_out: int
181
+ ) -> np.ndarray:
182
+ B, C, H, W = x_shape
183
+ d_x = np.zeros(x_shape)
184
+ idx = 0
185
+ for i in range(H_out):
186
+ for j in range(W_out):
187
+ patch = d_col[:, :, idx].reshape(B, C, K, K)
188
+ d_x[:, :, i:i+K, j:j+K] += patch
189
+ idx += 1
190
+ return d_x
191
+
192
+
193
+ # ============================================================
194
+ # MaxPool2D
195
+ # ============================================================
196
+
197
+ class MaxPool2D:
198
+ """
199
+ 2-D Max Pooling layer with backward support.
200
+
201
+ Parameters
202
+ ----------
203
+ pool_size : int
204
+ Square pooling window side length.
205
+ """
206
+
207
+ def __init__(self, pool_size: int = 2) -> None:
208
+ self.pool_size = pool_size
209
+ self._cache: dict = {}
210
+
211
+ def forward(self, x: np.ndarray) -> np.ndarray:
212
+ """
213
+ Parameters
214
+ ----------
215
+ x : (B, C, H, W)
216
+
217
+ Returns
218
+ -------
219
+ out : (B, C, H//pool, W//pool)
220
+ """
221
+ B, C, H, W = x.shape
222
+ P = self.pool_size
223
+ H_out, W_out = H // P, W // P
224
+ out = np.zeros((B, C, H_out, W_out))
225
+ mask = np.zeros_like(x)
226
+
227
+ for i in range(H_out):
228
+ for j in range(W_out):
229
+ region = x[:, :, i*P:(i+1)*P, j*P:(j+1)*P] # (B,C,P,P)
230
+ max_val = region.max(axis=(2, 3), keepdims=True)
231
+ out[:, :, i, j] = max_val.squeeze((2, 3))
232
+ mask[:, :, i*P:(i+1)*P, j*P:(j+1)*P] = (region == max_val)
233
+
234
+ self._cache = {"mask": mask, "x_shape": x.shape}
235
+ return out
236
+
237
+ def backward(self, d_out: np.ndarray) -> np.ndarray:
238
+ """Route gradients back through the max positions."""
239
+ mask = self._cache["mask"]
240
+ x_shape = self._cache["x_shape"]
241
+ B, C, H, W = x_shape
242
+ P = self.pool_size
243
+ H_out, W_out = H // P, W // P
244
+ d_x = np.zeros(x_shape)
245
+
246
+ for i in range(H_out):
247
+ for j in range(W_out):
248
+ d_region = d_out[:, :, i, j][:, :, np.newaxis, np.newaxis]
249
+ d_x[:, :, i*P:(i+1)*P, j*P:(j+1)*P] += (
250
+ mask[:, :, i*P:(i+1)*P, j*P:(j+1)*P] * d_region
251
+ )
252
+ return d_x
253
+
254
+
255
+ # ============================================================
256
+ # AvgPool2D
257
+ # ============================================================
258
+
259
+ class AvgPool2D:
260
+ """2-D Average Pooling."""
261
+
262
+ def __init__(self, pool_size: int = 2) -> None:
263
+ self.pool_size = pool_size
264
+ self._cache: dict = {}
265
+
266
+ def forward(self, x: np.ndarray) -> np.ndarray:
267
+ B, C, H, W = x.shape
268
+ P = self.pool_size
269
+ H_out, W_out = H // P, W // P
270
+ out = np.zeros((B, C, H_out, W_out))
271
+ for i in range(H_out):
272
+ for j in range(W_out):
273
+ out[:, :, i, j] = x[:, :, i*P:(i+1)*P, j*P:(j+1)*P].mean(axis=(2, 3))
274
+ self._cache = {"x_shape": x.shape}
275
+ return out
276
+
277
+ def backward(self, d_out: np.ndarray) -> np.ndarray:
278
+ B, C, H, W = self._cache["x_shape"]
279
+ P = self.pool_size
280
+ H_out, W_out = H // P, W // P
281
+ d_x = np.zeros((B, C, H, W))
282
+ for i in range(H_out):
283
+ for j in range(W_out):
284
+ d_x[:, :, i*P:(i+1)*P, j*P:(j+1)*P] += (
285
+ d_out[:, :, i, j][:, :, np.newaxis, np.newaxis] / (P * P)
286
+ )
287
+ return d_x
288
+
289
+
290
+ # ============================================================
291
+ # BatchNorm2D
292
+ # ============================================================
293
+
294
+ class BatchNorm2D:
295
+ """
296
+ Batch Normalisation over channel dimension.
297
+
298
+ Normalises each channel independently across (B, H, W),
299
+ then applies learnable scale γ and shift β.
300
+ """
301
+
302
+ def __init__(
303
+ self,
304
+ num_features: int,
305
+ eps: float = 1e-5,
306
+ momentum: float = 0.1,
307
+ learning_rate: float = 1e-3,
308
+ ) -> None:
309
+ self.num_features = num_features
310
+ self.eps = eps
311
+ self.momentum = momentum
312
+ self.learning_rate = learning_rate
313
+
314
+ self.gamma = np.ones(num_features)
315
+ self.beta = np.zeros(num_features)
316
+
317
+ self.running_mean = np.zeros(num_features)
318
+ self.running_var = np.ones(num_features)
319
+ self._cache: dict = {}
320
+
321
+ def forward(self, x: np.ndarray, training: bool = True) -> np.ndarray:
322
+ """
323
+ x : (B, C, H, W)
324
+ """
325
+ B, C, H, W = x.shape
326
+
327
+ if training:
328
+ mean = x.mean(axis=(0, 2, 3)) # (C,)
329
+ var = x.var(axis=(0, 2, 3)) # (C,)
330
+ self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean
331
+ self.running_var = (1 - self.momentum) * self.running_var + self.momentum * var
332
+ else:
333
+ mean = self.running_mean
334
+ var = self.running_var
335
+
336
+ x_hat = (x - mean.reshape(1, -1, 1, 1)) / np.sqrt(var.reshape(1, -1, 1, 1) + self.eps)
337
+ out = self.gamma.reshape(1, -1, 1, 1) * x_hat + self.beta.reshape(1, -1, 1, 1)
338
+
339
+ self._cache = {"x": x, "x_hat": x_hat, "mean": mean, "var": var}
340
+ return out
341
+
342
+ def backward(self, d_out: np.ndarray) -> np.ndarray:
343
+ x, x_hat = self._cache["x"], self._cache["x_hat"]
344
+ var = self._cache["var"]
345
+ B, C, H, W = x.shape
346
+ N = B * H * W
347
+
348
+ d_gamma = (d_out * x_hat).sum(axis=(0, 2, 3))
349
+ d_beta = d_out.sum(axis=(0, 2, 3))
350
+
351
+ self.gamma -= self.learning_rate * d_gamma
352
+ self.beta -= self.learning_rate * d_beta
353
+
354
+ d_x_hat = d_out * self.gamma.reshape(1, -1, 1, 1)
355
+ inv_std = 1.0 / np.sqrt(var.reshape(1, -1, 1, 1) + self.eps)
356
+ d_x = (
357
+ inv_std / N * (
358
+ N * d_x_hat
359
+ - d_x_hat.sum(axis=(0, 2, 3), keepdims=True)
360
+ - x_hat * (d_x_hat * x_hat).sum(axis=(0, 2, 3), keepdims=True)
361
+ )
362
+ )
363
+ return d_x
364
+
365
+
366
+ # ============================================================
367
+ # Flatten
368
+ # ============================================================
369
+
370
+ class Flatten:
371
+ """Reshape (B, C, H, W) → (B, C*H*W)."""
372
+
373
+ def __init__(self) -> None:
374
+ self._input_shape: tuple | None = None
375
+
376
+ def forward(self, x: np.ndarray) -> np.ndarray:
377
+ self._input_shape = x.shape
378
+ return x.reshape(x.shape[0], -1)
379
+
380
+ def backward(self, d_out: np.ndarray) -> np.ndarray:
381
+ return d_out.reshape(self._input_shape)
382
+
383
+
384
+ # ============================================================
385
+ # Dense
386
+ # ============================================================
387
+
388
+ class Dense:
389
+ """
390
+ Fully-connected layer with optional ReLU activation.
391
+
392
+ Parameters
393
+ ----------
394
+ in_features : int
395
+ out_features : int
396
+ activation : str
397
+ ``'relu'``, ``'softmax'``, or ``'linear'`` (identity).
398
+ learning_rate : float
399
+ random_state : int or None
400
+ """
401
+
402
+ def __init__(
403
+ self,
404
+ in_features: int,
405
+ out_features: int,
406
+ activation: str = "linear",
407
+ learning_rate: float = 1e-3,
408
+ random_state: int | None = None,
409
+ ) -> None:
410
+ if activation not in {"relu", "softmax", "linear"}:
411
+ raise ValueError("activation must be 'relu', 'softmax', or 'linear'.")
412
+ self.activation = activation
413
+ self.learning_rate = learning_rate
414
+
415
+ rng = np.random.default_rng(random_state)
416
+ scale = np.sqrt(2.0 / in_features)
417
+ self.weights = rng.normal(0, scale, (in_features, out_features))
418
+ self.bias = np.zeros(out_features)
419
+ self._cache: dict = {}
420
+
421
+ def forward(self, x: np.ndarray) -> np.ndarray:
422
+ """x : (B, in_features) → (B, out_features)"""
423
+ z = x @ self.weights + self.bias
424
+ if self.activation == "relu":
425
+ out = _relu(z)
426
+ elif self.activation == "softmax":
427
+ out = _softmax(z)
428
+ else:
429
+ out = z
430
+ self._cache = {"x": x, "z": z}
431
+ return out
432
+
433
+ def backward(self, d_out: np.ndarray) -> np.ndarray:
434
+ x, z = self._cache["x"], self._cache["z"]
435
+ if self.activation == "relu":
436
+ d_out = d_out * _relu_grad(z)
437
+ # softmax gradient is handled externally via (ŷ - y) delta
438
+
439
+ d_W = x.T @ d_out / len(x)
440
+ d_b = d_out.mean(axis=0)
441
+ d_x = d_out @ self.weights.T
442
+
443
+ self.weights -= self.learning_rate * d_W
444
+ self.bias -= self.learning_rate * d_b
445
+ return d_x
446
+
447
+
448
+ # ============================================================
449
+ # SimpleCNN (pre-wired model)
450
+ # ============================================================
451
+
452
+ class SimpleCNN:
453
+ """
454
+ Pre-wired CNN for small grayscale image classification.
455
+
456
+ Architecture:
457
+ Conv2D(in, 16, 3) → ReLU → MaxPool(2)
458
+ Conv2D(16, 32, 3) → ReLU → MaxPool(2)
459
+ Flatten → Dense(flat_dim, 128, relu) → Dense(128, n_classes, softmax)
460
+
461
+ Parameters
462
+ ----------
463
+ in_channels : int
464
+ image_size : int
465
+ Height (and width) of the square input image.
466
+ n_classes : int
467
+ learning_rate : float
468
+ random_state : int or None
469
+ """
470
+
471
+ def __init__(
472
+ self,
473
+ in_channels: int = 1,
474
+ image_size: int = 28,
475
+ n_classes: int = 10,
476
+ learning_rate: float = 1e-3,
477
+ random_state: int | None = None,
478
+ ) -> None:
479
+ self.n_classes = n_classes
480
+ self.learning_rate = learning_rate
481
+
482
+ # Compute feature-map size after two conv+pool blocks
483
+ def _after_conv_pool(size: int, k: int = 3, p: int = 2) -> int:
484
+ return (size - k + 1) // p
485
+
486
+ h = _after_conv_pool(_after_conv_pool(image_size))
487
+ flat_dim = 32 * h * h
488
+
489
+ self.conv1 = Conv2D(in_channels, 16, 3, learning_rate, random_state)
490
+ self.pool1 = MaxPool2D(2)
491
+ self.conv2 = Conv2D(16, 32, 3, learning_rate, random_state)
492
+ self.pool2 = MaxPool2D(2)
493
+ self.flat = Flatten()
494
+ self.dense1 = Dense(flat_dim, 128, "relu", learning_rate, random_state)
495
+ self.dense2 = Dense(128, n_classes, "softmax", learning_rate, random_state)
496
+
497
+ self.losses_: list[float] = []
498
+
499
+ # ------------------------------------------------------------------
500
+ # Forward / backward
501
+ # ------------------------------------------------------------------
502
+
503
+ def _forward(self, X: np.ndarray) -> tuple:
504
+ """Return (output, intermediate activations) for backprop."""
505
+ z1 = self.conv1.forward(X)
506
+ a1 = _relu(z1)
507
+ a2 = self.pool1.forward(a1)
508
+ z2 = self.conv2.forward(a2)
509
+ a3 = _relu(z2)
510
+ a4 = self.pool2.forward(a3)
511
+ a5 = self.flat.forward(a4)
512
+ a6 = self.dense1.forward(a5)
513
+ a7 = self.dense2.forward(a6)
514
+ self._fwd_cache = {"z1": z1, "z2": z2}
515
+ return a7, (X, a1, a2, a3, a4, a5, a6, a7)
516
+
517
+ def _backward(self, y_hot: np.ndarray, a7: np.ndarray) -> None:
518
+ """Back-propagate cross-entropy loss through all layers."""
519
+ n = len(y_hot)
520
+ d = (a7 - y_hot) / n # softmax + cross-entropy delta
521
+
522
+ d = self.dense2.backward(d)
523
+ d = self.dense1.backward(d)
524
+ d = self.flat.backward(d)
525
+ d = self.pool2.backward(d)
526
+ d = d * _relu_grad(self._fwd_cache["z2"])
527
+ d = self.conv2.backward(d)
528
+ d = self.pool1.backward(d)
529
+ d = d * _relu_grad(self._fwd_cache["z1"])
530
+ d = self.conv1.backward(d)
531
+
532
+ # ------------------------------------------------------------------
533
+ # Public API
534
+ # ------------------------------------------------------------------
535
+
536
+ def fit(
537
+ self,
538
+ X: np.ndarray,
539
+ y: np.ndarray,
540
+ epochs: int = 10,
541
+ batch_size: int = 32,
542
+ ) -> "SimpleCNN":
543
+ """
544
+ Train the CNN.
545
+
546
+ Parameters
547
+ ----------
548
+ X : ndarray (n_samples, in_channels, H, W)
549
+ y : ndarray (n_samples,) — integer class labels
550
+ epochs : int
551
+ batch_size : int
552
+
553
+ Returns
554
+ -------
555
+ self
556
+ """
557
+ n = len(X)
558
+ rng = np.random.default_rng(0)
559
+
560
+ # One-hot targets
561
+ y_hot = np.zeros((n, self.n_classes))
562
+ y_hot[np.arange(n), y.astype(int)] = 1.0
563
+
564
+ for _ in range(epochs):
565
+ idx = rng.permutation(n)
566
+ ep_loss = 0.0
567
+ n_batches = 0
568
+
569
+ for start in range(0, n, batch_size):
570
+ mb = idx[start:start + batch_size]
571
+ Xb = X[mb]
572
+ yb = y_hot[mb]
573
+
574
+ out, _ = self._forward(Xb)
575
+ loss = float(-np.mean(np.sum(yb * np.log(out + 1e-8), axis=1)))
576
+ ep_loss += loss
577
+ n_batches += 1
578
+
579
+ self._backward(yb, out)
580
+
581
+ self.losses_.append(ep_loss / n_batches)
582
+
583
+ return self
584
+
585
+ def predict(self, X: np.ndarray) -> np.ndarray:
586
+ """Return class predictions for X."""
587
+ out, _ = self._forward(X)
588
+ return np.argmax(out, axis=1)
589
+
590
+ def predict_proba(self, X: np.ndarray) -> np.ndarray:
591
+ """Return softmax class probabilities."""
592
+ out, _ = self._forward(X)
593
+ return out