scratchkit 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. mlscratch/__init__.py +56 -0
  2. mlscratch/__main__.py +118 -0
  3. mlscratch/bayesian/__init__.py +53 -0
  4. mlscratch/bayesian/bayesian_linear_regression.py +171 -0
  5. mlscratch/bayesian/bayesian_network.py +248 -0
  6. mlscratch/bayesian/bayesian_nn.py +315 -0
  7. mlscratch/bayesian/gaussian_process.py +207 -0
  8. mlscratch/bayesian/hmm.py +277 -0
  9. mlscratch/bayesian/init.py +52 -0
  10. mlscratch/bayesian/kalman_filter.py +182 -0
  11. mlscratch/bayesian/naive_bayes.py +209 -0
  12. mlscratch/metrics/__init__.py +59 -0
  13. mlscratch/metrics/classification.py +365 -0
  14. mlscratch/metrics/regression.py +79 -0
  15. mlscratch/neural/__init__.py +121 -0
  16. mlscratch/neural/attention.py +420 -0
  17. mlscratch/neural/autoencoder.py +543 -0
  18. mlscratch/neural/boltzmann.py +231 -0
  19. mlscratch/neural/cnn.py +593 -0
  20. mlscratch/neural/cvnn.py +322 -0
  21. mlscratch/neural/gan.py +364 -0
  22. mlscratch/neural/hopfield.py +193 -0
  23. mlscratch/neural/perceptron.py +398 -0
  24. mlscratch/neural/rbf_network.py +230 -0
  25. mlscratch/neural/recurrent.py +569 -0
  26. mlscratch/preprocessing/__init__.py +38 -0
  27. mlscratch/preprocessing/encoders.py +140 -0
  28. mlscratch/preprocessing/model_selection.py +119 -0
  29. mlscratch/preprocessing/polynomial.py +105 -0
  30. mlscratch/preprocessing/scalers.py +220 -0
  31. mlscratch/py.typed +0 -0
  32. mlscratch/reinforcement/__init__.py +59 -0
  33. mlscratch/reinforcement/ddpg.py +363 -0
  34. mlscratch/reinforcement/dqn.py +319 -0
  35. mlscratch/reinforcement/ppo.py +452 -0
  36. mlscratch/reinforcement/q_learning.py +352 -0
  37. mlscratch/reinforcement/sac.py +382 -0
  38. mlscratch/reinforcement/utils.py +594 -0
  39. mlscratch/supervised/__init__.py +76 -0
  40. mlscratch/supervised/_validation.py +50 -0
  41. mlscratch/supervised/adaboost.py +255 -0
  42. mlscratch/supervised/decision_tree.py +495 -0
  43. mlscratch/supervised/gradient_boosting.py +354 -0
  44. mlscratch/supervised/knn.py +234 -0
  45. mlscratch/supervised/lasso_regression.py +125 -0
  46. mlscratch/supervised/linear_models.py +459 -0
  47. mlscratch/supervised/linear_regression.py +197 -0
  48. mlscratch/supervised/logistic_regression.py +119 -0
  49. mlscratch/supervised/naive_bayes.py +113 -0
  50. mlscratch/supervised/random_forest.py +321 -0
  51. mlscratch/supervised/ridge_regression.py +93 -0
  52. mlscratch/supervised/svm.py +356 -0
  53. mlscratch/unsupervised/__init__.py +39 -0
  54. mlscratch/unsupervised/apriori.py +178 -0
  55. mlscratch/unsupervised/dbscan.py +141 -0
  56. mlscratch/unsupervised/gmm.py +204 -0
  57. mlscratch/unsupervised/hierarchical_clustering.py +137 -0
  58. mlscratch/unsupervised/ica.py +167 -0
  59. mlscratch/unsupervised/kmeans.py +135 -0
  60. mlscratch/unsupervised/kmedoids.py +133 -0
  61. mlscratch/unsupervised/pca.py +103 -0
  62. mlscratch/unsupervised/tsne.py +200 -0
  63. scratchkit-0.2.0.dist-info/METADATA +241 -0
  64. scratchkit-0.2.0.dist-info/RECORD +68 -0
  65. scratchkit-0.2.0.dist-info/WHEEL +5 -0
  66. scratchkit-0.2.0.dist-info/entry_points.txt +2 -0
  67. scratchkit-0.2.0.dist-info/licenses/LICENSE +201 -0
  68. scratchkit-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,569 @@
1
+ """
2
+ Recurrent Neural Networks
3
+ ==========================
4
+ Sequential-data architectures that maintain a hidden state across timesteps.
5
+
6
+ SimpleRNN
7
+ ---------
8
+ Elman recurrent network:
9
+ h_t = tanh(W_xh x_t + W_hh h_{t-1} + b_h)
10
+ y_t = W_hy h_t + b_y (output layer, optional)
11
+
12
+ LSTMCell / LSTM
13
+ ---------------
14
+ Long Short-Term Memory (Hochreiter & Schmidhuber, 1997).
15
+ Four gates operating on the concatenated [x_t; h_{t-1}]:
16
+
17
+ i_t = σ(W_i [x_t; h_{t-1}] + b_i) input gate
18
+ f_t = σ(W_f [x_t; h_{t-1}] + b_f) forget gate
19
+ g_t = tanh(W_g [x_t; h_{t-1}] + b_g) cell gate (candidate)
20
+ o_t = σ(W_o [x_t; h_{t-1}] + b_o) output gate
21
+ c_t = f_t ⊙ c_{t-1} + i_t ⊙ g_t
22
+ h_t = o_t ⊙ tanh(c_t)
23
+
24
+ EncoderDecoder
25
+ --------------
26
+ Sequence-to-sequence architecture with an RNN encoder that compresses
27
+ an input sequence to a context vector, and an RNN decoder that
28
+ unrolls to produce the output sequence.
29
+
30
+ References
31
+ ----------
32
+ Elman, J. (1990). Finding structure in time. Cognitive Science, 14(2), 179-211.
33
+ Hochreiter & Schmidhuber (1997). Long short-term memory. Neural Computation.
34
+ Sutskever et al. (2014). Sequence to sequence learning with neural networks. NeurIPS.
35
+
36
+ Only numpy is used.
37
+ """
38
+
39
+ from __future__ import annotations
40
+
41
+ import numpy as np
42
+
43
+
44
+ # ============================================================
45
+ # Helpers
46
+ # ============================================================
47
+
48
+ def _sigmoid(x: np.ndarray) -> np.ndarray:
49
+ return 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
50
+
51
+
52
+ def _softmax(x: np.ndarray) -> np.ndarray:
53
+ e = np.exp(x - x.max(axis=-1, keepdims=True))
54
+ return e / e.sum(axis=-1, keepdims=True)
55
+
56
+
57
+ # ============================================================
58
+ # SimpleRNN
59
+ # ============================================================
60
+
61
+ class SimpleRNN:
62
+ """
63
+ Simple Elman RNN.
64
+
65
+ Supports sequence classification (uses final hidden state),
66
+ sequence regression, and returning all hidden states.
67
+
68
+ Parameters
69
+ ----------
70
+ input_size : int
71
+ hidden_size : int
72
+ output_size : int or None
73
+ If None, the network is a feature extractor (returns hidden states).
74
+ return_sequences : bool
75
+ If True, return hidden state at every timestep.
76
+ If False (default), return only the final hidden state.
77
+ learning_rate : float
78
+ epochs : int
79
+ random_state : int or None
80
+ """
81
+
82
+ def __init__(
83
+ self,
84
+ input_size: int,
85
+ hidden_size: int,
86
+ output_size: int | None = None,
87
+ return_sequences: bool = False,
88
+ learning_rate: float = 1e-3,
89
+ epochs: int = 50,
90
+ random_state: int | None = None,
91
+ ) -> None:
92
+ self.input_size = input_size
93
+ self.hidden_size = hidden_size
94
+ self.output_size = output_size
95
+ self.return_sequences = return_sequences
96
+ self.learning_rate = learning_rate
97
+ self.epochs = epochs
98
+ self._rng = np.random.default_rng(random_state)
99
+
100
+ self._init_params()
101
+ self.losses_: list[float] = []
102
+
103
+ # ------------------------------------------------------------------
104
+ # Init
105
+ # ------------------------------------------------------------------
106
+
107
+ def _init_params(self) -> None:
108
+ D, H = self.input_size, self.hidden_size
109
+ s_xh = np.sqrt(2.0 / D)
110
+ s_hh = np.sqrt(2.0 / H)
111
+
112
+ self.W_xh = self._rng.normal(0, s_xh, (D, H))
113
+ self.W_hh = self._rng.normal(0, s_hh, (H, H))
114
+ self.b_h = np.zeros(H)
115
+
116
+ if self.output_size is not None:
117
+ self.W_hy = self._rng.normal(0, np.sqrt(2.0 / H), (H, self.output_size))
118
+ self.b_y = np.zeros(self.output_size)
119
+
120
+ # ------------------------------------------------------------------
121
+ # Forward
122
+ # ------------------------------------------------------------------
123
+
124
+ def forward(self, X: np.ndarray) -> np.ndarray:
125
+ """
126
+ Forward pass through the RNN.
127
+
128
+ Parameters
129
+ ----------
130
+ X : ndarray of shape (seq_len, input_size) or
131
+ (batch, seq_len, input_size)
132
+
133
+ Returns
134
+ -------
135
+ ndarray — hidden states (and optionally output projections)
136
+ """
137
+ batched = X.ndim == 3
138
+ if not batched:
139
+ X = X[np.newaxis, :] # (1, T, D)
140
+
141
+ B, T, D = X.shape
142
+ H = self.hidden_size
143
+ h = np.zeros((B, H))
144
+ hidden_states = []
145
+
146
+ for t in range(T):
147
+ h = np.tanh(X[:, t, :] @ self.W_xh + h @ self.W_hh + self.b_h)
148
+ hidden_states.append(h.copy())
149
+
150
+ hidden_states = np.stack(hidden_states, axis=1) # (B, T, H)
151
+
152
+ if self.return_sequences:
153
+ out = hidden_states
154
+ else:
155
+ out = hidden_states[:, -1, :] # (B, H)
156
+
157
+ if self.output_size is not None:
158
+ out = out @ self.W_hy + self.b_y
159
+
160
+ return out[0] if not batched else out
161
+
162
+ def fit(self, X: np.ndarray, y: np.ndarray) -> "SimpleRNN":
163
+ """
164
+ Train the RNN on sequences X with targets y.
165
+
166
+ Parameters
167
+ ----------
168
+ X : ndarray (n_samples, seq_len, input_size)
169
+ y : ndarray (n_samples,) or (n_samples, output_size)
170
+
171
+ Returns
172
+ -------
173
+ self
174
+ """
175
+ if self.output_size is None:
176
+ raise ValueError("output_size must be set to use fit().")
177
+ n = len(X)
178
+ lr = self.learning_rate
179
+ self.losses_ = []
180
+
181
+ for epoch in range(self.epochs):
182
+ idx = self._rng.permutation(n)
183
+ loss = 0.0
184
+
185
+ for i in idx:
186
+ # Single-sample BPTT (simplified; no truncation)
187
+ xi = X[i] # (T, D)
188
+ yi = y[i:i+1] if y.ndim == 1 else y[i:i+1]
189
+ T_len = xi.shape[0]
190
+ H = self.hidden_size
191
+
192
+ # Forward
193
+ hs = np.zeros((T_len + 1, H))
194
+ for t in range(T_len):
195
+ hs[t + 1] = np.tanh(
196
+ xi[t:t+1] @ self.W_xh + hs[t:t+1] @ self.W_hh + self.b_h
197
+ )
198
+
199
+ out = hs[-1:] @ self.W_hy + self.b_y
200
+ error = out - yi.reshape(1, -1)
201
+ loss += float(np.mean(error ** 2))
202
+
203
+ # Backward through output layer
204
+ d_out = 2.0 * error
205
+ dW_hy = hs[-1:].T @ d_out
206
+ db_y = d_out.squeeze()
207
+
208
+ # BPTT
209
+ dh_next = d_out @ self.W_hy.T
210
+ dW_xh = np.zeros_like(self.W_xh)
211
+ dW_hh = np.zeros_like(self.W_hh)
212
+ db_h = np.zeros(H)
213
+
214
+ for t in reversed(range(T_len)):
215
+ dtanh = dh_next * (1.0 - hs[t + 1] ** 2)
216
+ dW_xh += xi[t:t+1].T @ dtanh
217
+ dW_hh += hs[t:t+1].T @ dtanh
218
+ db_h += dtanh.squeeze()
219
+ dh_next = dtanh @ self.W_hh.T
220
+
221
+ # Clip gradients
222
+ for grad in [dW_xh, dW_hh, dW_hy, db_h, db_y]:
223
+ np.clip(grad, -5, 5, out=grad)
224
+
225
+ self.W_xh -= lr * dW_xh
226
+ self.W_hh -= lr * dW_hh
227
+ self.b_h -= lr * db_h
228
+ self.W_hy -= lr * dW_hy
229
+ self.b_y -= lr * db_y
230
+
231
+ self.losses_.append(loss / n)
232
+
233
+ return self
234
+
235
+ def predict(self, X: np.ndarray) -> np.ndarray:
236
+ """Run forward pass on X."""
237
+ return self.forward(X)
238
+
239
+
240
+ # ============================================================
241
+ # LSTMCell
242
+ # ============================================================
243
+
244
+ class LSTMCell:
245
+ """
246
+ A single LSTM cell — stateful, processes one timestep at a time.
247
+
248
+ Parameters
249
+ ----------
250
+ input_size : int
251
+ hidden_size : int
252
+ random_state : int or None
253
+ """
254
+
255
+ def __init__(
256
+ self,
257
+ input_size: int,
258
+ hidden_size: int,
259
+ random_state: int | None = None,
260
+ ) -> None:
261
+ self.input_size = input_size
262
+ self.hidden_size = hidden_size
263
+ self._rng = np.random.default_rng(random_state)
264
+
265
+ H, D = hidden_size, input_size
266
+ scale = np.sqrt(2.0 / (D + H))
267
+ # Single stacked weight matrix for efficiency: [i, f, g, o]
268
+ self.W = self._rng.normal(0, scale, (4 * H, D + H))
269
+ self.b = np.zeros(4 * H)
270
+
271
+ self.reset_state()
272
+
273
+ def reset_state(self) -> None:
274
+ """Reset hidden and cell state to zeros."""
275
+ H = self.hidden_size
276
+ self.h_t = np.zeros((1, H))
277
+ self.c_t = np.zeros((1, H))
278
+
279
+ def forward(self, x_t: np.ndarray) -> np.ndarray:
280
+ """
281
+ Process one timestep.
282
+
283
+ Parameters
284
+ ----------
285
+ x_t : ndarray of shape (input_size,) or (1, input_size)
286
+
287
+ Returns
288
+ -------
289
+ h_t : ndarray of shape (hidden_size,)
290
+ """
291
+ x_t = np.atleast_2d(x_t) # (1, D)
292
+ xh = np.concatenate([x_t, self.h_t], axis=1) # (1, D+H)
293
+ gates = xh @ self.W.T + self.b # (1, 4H)
294
+
295
+ H = self.hidden_size
296
+ i_t = _sigmoid(gates[:, :H])
297
+ f_t = _sigmoid(gates[:, H:2*H])
298
+ g_t = np.tanh(gates[:, 2*H:3*H])
299
+ o_t = _sigmoid(gates[:, 3*H:])
300
+
301
+ self.c_t = f_t * self.c_t + i_t * g_t
302
+ self.h_t = o_t * np.tanh(self.c_t)
303
+
304
+ return self.h_t.squeeze()
305
+
306
+
307
+ # ============================================================
308
+ # LSTM (multi-layer, with optional linear output head)
309
+ # ============================================================
310
+
311
+ class LSTM:
312
+ """
313
+ Multi-layer LSTM for sequence modelling.
314
+
315
+ Parameters
316
+ ----------
317
+ input_size : int
318
+ hidden_size : int
319
+ num_layers : int
320
+ Number of stacked LSTM layers.
321
+ output_size : int or None
322
+ If set, a linear projection layer is added on top of the final
323
+ hidden state.
324
+ return_sequences : bool
325
+ Return all hidden states (True) or just the final one (False).
326
+ dropout : float
327
+ Dropout probability applied between LSTM layers (0 = no dropout).
328
+ random_state : int or None
329
+ """
330
+
331
+ def __init__(
332
+ self,
333
+ input_size: int,
334
+ hidden_size: int,
335
+ num_layers: int = 1,
336
+ output_size: int | None = None,
337
+ return_sequences: bool = False,
338
+ dropout: float = 0.0,
339
+ random_state: int | None = None,
340
+ ) -> None:
341
+ self.input_size = input_size
342
+ self.hidden_size = hidden_size
343
+ self.num_layers = num_layers
344
+ self.output_size = output_size
345
+ self.return_sequences = return_sequences
346
+ self.dropout = dropout
347
+ self._rng = np.random.default_rng(random_state)
348
+
349
+ # Build one cell per layer
350
+ layer_input = input_size
351
+ self.cells: list[LSTMCell] = []
352
+ for i in range(num_layers):
353
+ seed = (random_state or 0) + i
354
+ self.cells.append(LSTMCell(layer_input, hidden_size, seed))
355
+ layer_input = hidden_size
356
+
357
+ # Optional linear output head
358
+ if output_size is not None:
359
+ scale = np.sqrt(2.0 / hidden_size)
360
+ self.W_out = self._rng.normal(0, scale, (hidden_size, output_size))
361
+ self.b_out = np.zeros(output_size)
362
+ else:
363
+ self.W_out = None
364
+ self.b_out = None
365
+
366
+ def reset_states(self) -> None:
367
+ """Reset all cell hidden and cell states."""
368
+ for cell in self.cells:
369
+ cell.reset_state()
370
+
371
+ def forward(self, X: np.ndarray, training: bool = False) -> np.ndarray:
372
+ """
373
+ Forward pass through the stacked LSTM.
374
+
375
+ Parameters
376
+ ----------
377
+ X : ndarray of shape (seq_len, input_size) or
378
+ (batch, seq_len, input_size)
379
+ training : bool
380
+ If True and dropout > 0, apply dropout between layers.
381
+
382
+ Returns
383
+ -------
384
+ ndarray — shape depends on return_sequences and output_size
385
+ """
386
+ batched = X.ndim == 3
387
+ if batched:
388
+ # Process each sequence in batch independently
389
+ results = [self._forward_single(X[b], training) for b in range(X.shape[0])]
390
+ return np.stack(results)
391
+ return self._forward_single(X, training)
392
+
393
+ def _forward_single(self, X: np.ndarray, training: bool) -> np.ndarray:
394
+ """Forward pass for a single (unbatched) sequence (T, D)."""
395
+ T = len(X)
396
+ H = self.hidden_size
397
+
398
+ # Reset states for fresh inference
399
+ self.reset_states()
400
+
401
+ all_outputs = []
402
+ current_input = X # (T, D)
403
+
404
+ for layer_idx, cell in enumerate(self.cells):
405
+ layer_outputs = []
406
+ for t in range(T):
407
+ h_t = cell.forward(current_input[t])
408
+ layer_outputs.append(h_t.copy())
409
+ layer_outputs = np.stack(layer_outputs) # (T, H)
410
+
411
+ # Dropout between layers (not on last layer)
412
+ if (training and self.dropout > 0
413
+ and layer_idx < self.num_layers - 1):
414
+ mask = (self._rng.random(layer_outputs.shape) > self.dropout).astype(float)
415
+ layer_outputs = layer_outputs * mask / (1.0 - self.dropout + 1e-8)
416
+
417
+ current_input = layer_outputs
418
+ all_outputs.append(layer_outputs)
419
+
420
+ final_hidden = all_outputs[-1] # (T, H) from last layer
421
+
422
+ if self.return_sequences:
423
+ out = final_hidden
424
+ else:
425
+ out = final_hidden[-1] # (H,)
426
+
427
+ if self.W_out is not None:
428
+ out = out @ self.W_out + self.b_out
429
+
430
+ return out
431
+
432
+
433
+ # ============================================================
434
+ # Encoder-Decoder (Seq2Seq)
435
+ # ============================================================
436
+
437
+ class EncoderDecoder:
438
+ """
439
+ Sequence-to-sequence Encoder-Decoder with RNN encoder and decoder.
440
+
441
+ The encoder reads the input sequence and produces a context vector
442
+ (final hidden state). The decoder is initialised with this context
443
+ and unrolls to generate the output sequence.
444
+
445
+ Parameters
446
+ ----------
447
+ input_vocab_size : int
448
+ Vocabulary size of the input sequence (one-hot encoded).
449
+ output_vocab_size : int
450
+ Vocabulary size of the output sequence.
451
+ hidden_size : int
452
+ Hidden state size for both encoder and decoder.
453
+ random_state : int or None
454
+ """
455
+
456
+ def __init__(
457
+ self,
458
+ input_vocab_size: int,
459
+ output_vocab_size: int,
460
+ hidden_size: int,
461
+ random_state: int | None = None,
462
+ ) -> None:
463
+ self.input_vocab_size = input_vocab_size
464
+ self.output_vocab_size = output_vocab_size
465
+ self.hidden_size = hidden_size
466
+ self._rng = np.random.default_rng(random_state)
467
+
468
+ self._init_params()
469
+
470
+ def _init_params(self) -> None:
471
+ D_in = self.input_vocab_size
472
+ D_out = self.output_vocab_size
473
+ H = self.hidden_size
474
+ s = lambda fi: np.sqrt(2.0 / fi)
475
+
476
+ # Encoder
477
+ self.We_xh = self._rng.normal(0, s(D_in), (D_in, H))
478
+ self.We_hh = self._rng.normal(0, s(H), (H, H))
479
+ self.be_h = np.zeros(H)
480
+
481
+ # Decoder
482
+ self.Wd_xh = self._rng.normal(0, s(H), (H, H))
483
+ self.Wd_hh = self._rng.normal(0, s(H), (H, H))
484
+ self.bd_h = np.zeros(H)
485
+
486
+ # Decoder output projection
487
+ self.Wd_hy = self._rng.normal(0, s(H), (H, D_out))
488
+ self.bd_y = np.zeros(D_out)
489
+
490
+ def _encode(self, X_one_hot: np.ndarray) -> np.ndarray:
491
+ """
492
+ Encode input sequence into a context vector.
493
+
494
+ Parameters
495
+ ----------
496
+ X_one_hot : ndarray (seq_len, input_vocab_size)
497
+
498
+ Returns
499
+ -------
500
+ context : ndarray (hidden_size,) — final hidden state
501
+ """
502
+ H = self.hidden_size
503
+ h = np.zeros(H)
504
+ for t in range(len(X_one_hot)):
505
+ h = np.tanh(
506
+ X_one_hot[t] @ self.We_xh + h @ self.We_hh + self.be_h
507
+ )
508
+ return h
509
+
510
+ def _decode(self, context: np.ndarray, output_len: int) -> np.ndarray:
511
+ """
512
+ Decode context vector into an output sequence.
513
+
514
+ Parameters
515
+ ----------
516
+ context : ndarray (hidden_size,)
517
+ output_len : int
518
+
519
+ Returns
520
+ -------
521
+ outputs : ndarray (output_len, output_vocab_size)
522
+ """
523
+ H = self.hidden_size
524
+ h = np.zeros(H)
525
+ outputs = []
526
+
527
+ for t in range(output_len):
528
+ h = np.tanh(
529
+ context @ self.Wd_xh + h @ self.Wd_hh + self.bd_h
530
+ )
531
+ y_t = _softmax((h @ self.Wd_hy + self.bd_y).reshape(1, -1)).squeeze()
532
+ outputs.append(y_t)
533
+
534
+ return np.stack(outputs)
535
+
536
+ def forward(
537
+ self,
538
+ input_sequence: np.ndarray,
539
+ output_len: int | None = None,
540
+ ) -> np.ndarray:
541
+ """
542
+ Encode input_sequence and decode to output_len tokens.
543
+
544
+ Parameters
545
+ ----------
546
+ input_sequence : ndarray (seq_len, input_vocab_size)
547
+ One-hot encoded input.
548
+ output_len : int or None
549
+ Target sequence length. Defaults to len(input_sequence).
550
+
551
+ Returns
552
+ -------
553
+ outputs : ndarray (output_len, output_vocab_size)
554
+ """
555
+ if output_len is None:
556
+ output_len = len(input_sequence)
557
+ context = self._encode(input_sequence)
558
+ return self._decode(context, output_len)
559
+
560
+ def predict_sequence(self, input_sequence: np.ndarray) -> np.ndarray:
561
+ """
562
+ Predict token indices for an input sequence.
563
+
564
+ Returns
565
+ -------
566
+ ndarray of shape (output_len,) — integer token indices
567
+ """
568
+ outputs = self.forward(input_sequence)
569
+ return np.argmax(outputs, axis=1)
@@ -0,0 +1,38 @@
1
+ """
2
+ mlscratch.preprocessing
3
+ =========================
4
+ Feature scaling, categorical encoding, polynomial feature expansion,
5
+ and train/test splitting utilities — pure numpy, sklearn-familiar API.
6
+
7
+ Scalers
8
+ -------
9
+ StandardScaler, MinMaxScaler, RobustScaler, Normalizer
10
+
11
+ Encoders
12
+ --------
13
+ LabelEncoder, OneHotEncoder
14
+
15
+ Feature expansion
16
+ ------------------
17
+ PolynomialFeatures
18
+
19
+ Splitting
20
+ ---------
21
+ train_test_split
22
+ """
23
+
24
+ from .encoders import LabelEncoder, OneHotEncoder # noqa: F401
25
+ from .model_selection import train_test_split # noqa: F401
26
+ from .polynomial import PolynomialFeatures # noqa: F401
27
+ from .scalers import MinMaxScaler, Normalizer, RobustScaler, StandardScaler # noqa: F401
28
+
29
+ __all__ = [
30
+ "StandardScaler",
31
+ "MinMaxScaler",
32
+ "RobustScaler",
33
+ "Normalizer",
34
+ "LabelEncoder",
35
+ "OneHotEncoder",
36
+ "PolynomialFeatures",
37
+ "train_test_split",
38
+ ]