scratchkit 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlscratch/__init__.py +56 -0
- mlscratch/__main__.py +118 -0
- mlscratch/bayesian/__init__.py +53 -0
- mlscratch/bayesian/bayesian_linear_regression.py +171 -0
- mlscratch/bayesian/bayesian_network.py +248 -0
- mlscratch/bayesian/bayesian_nn.py +315 -0
- mlscratch/bayesian/gaussian_process.py +207 -0
- mlscratch/bayesian/hmm.py +277 -0
- mlscratch/bayesian/init.py +52 -0
- mlscratch/bayesian/kalman_filter.py +182 -0
- mlscratch/bayesian/naive_bayes.py +209 -0
- mlscratch/metrics/__init__.py +59 -0
- mlscratch/metrics/classification.py +365 -0
- mlscratch/metrics/regression.py +79 -0
- mlscratch/neural/__init__.py +121 -0
- mlscratch/neural/attention.py +420 -0
- mlscratch/neural/autoencoder.py +543 -0
- mlscratch/neural/boltzmann.py +231 -0
- mlscratch/neural/cnn.py +593 -0
- mlscratch/neural/cvnn.py +322 -0
- mlscratch/neural/gan.py +364 -0
- mlscratch/neural/hopfield.py +193 -0
- mlscratch/neural/perceptron.py +398 -0
- mlscratch/neural/rbf_network.py +230 -0
- mlscratch/neural/recurrent.py +569 -0
- mlscratch/preprocessing/__init__.py +38 -0
- mlscratch/preprocessing/encoders.py +140 -0
- mlscratch/preprocessing/model_selection.py +119 -0
- mlscratch/preprocessing/polynomial.py +105 -0
- mlscratch/preprocessing/scalers.py +220 -0
- mlscratch/py.typed +0 -0
- mlscratch/reinforcement/__init__.py +59 -0
- mlscratch/reinforcement/ddpg.py +363 -0
- mlscratch/reinforcement/dqn.py +319 -0
- mlscratch/reinforcement/ppo.py +452 -0
- mlscratch/reinforcement/q_learning.py +352 -0
- mlscratch/reinforcement/sac.py +382 -0
- mlscratch/reinforcement/utils.py +594 -0
- mlscratch/supervised/__init__.py +76 -0
- mlscratch/supervised/_validation.py +50 -0
- mlscratch/supervised/adaboost.py +255 -0
- mlscratch/supervised/decision_tree.py +495 -0
- mlscratch/supervised/gradient_boosting.py +354 -0
- mlscratch/supervised/knn.py +234 -0
- mlscratch/supervised/lasso_regression.py +125 -0
- mlscratch/supervised/linear_models.py +459 -0
- mlscratch/supervised/linear_regression.py +197 -0
- mlscratch/supervised/logistic_regression.py +119 -0
- mlscratch/supervised/naive_bayes.py +113 -0
- mlscratch/supervised/random_forest.py +321 -0
- mlscratch/supervised/ridge_regression.py +93 -0
- mlscratch/supervised/svm.py +356 -0
- mlscratch/unsupervised/__init__.py +39 -0
- mlscratch/unsupervised/apriori.py +178 -0
- mlscratch/unsupervised/dbscan.py +141 -0
- mlscratch/unsupervised/gmm.py +204 -0
- mlscratch/unsupervised/hierarchical_clustering.py +137 -0
- mlscratch/unsupervised/ica.py +167 -0
- mlscratch/unsupervised/kmeans.py +135 -0
- mlscratch/unsupervised/kmedoids.py +133 -0
- mlscratch/unsupervised/pca.py +103 -0
- mlscratch/unsupervised/tsne.py +200 -0
- scratchkit-0.2.0.dist-info/METADATA +241 -0
- scratchkit-0.2.0.dist-info/RECORD +68 -0
- scratchkit-0.2.0.dist-info/WHEEL +5 -0
- scratchkit-0.2.0.dist-info/entry_points.txt +2 -0
- scratchkit-0.2.0.dist-info/licenses/LICENSE +201 -0
- scratchkit-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Restricted Boltzmann Machine (RBM)
|
|
3
|
+
====================================
|
|
4
|
+
A bipartite, undirected probabilistic graphical model with visible units
|
|
5
|
+
v and hidden units h, trained to model the joint distribution P(v, h)
|
|
6
|
+
via an energy function (Smolensky, 1986; Hinton, 2002).
|
|
7
|
+
|
|
8
|
+
Energy function
|
|
9
|
+
-----------------
|
|
10
|
+
E(v, h) = -aᵗv - bᵗh - vᵗWh
|
|
11
|
+
|
|
12
|
+
Conditional distributions (Gibbs sampling)
|
|
13
|
+
---------------------------------------------
|
|
14
|
+
P(h_j=1 | v) = σ(b_j + Σ_i v_i W_ij)
|
|
15
|
+
P(v_i=1 | h) = σ(a_i + Σ_j h_j W_ij)
|
|
16
|
+
|
|
17
|
+
Training — Contrastive Divergence (CD-k)
|
|
18
|
+
-------------------------------------------
|
|
19
|
+
1. v⁰ = data
|
|
20
|
+
2. h⁰ ~ P(h | v⁰)
|
|
21
|
+
3. Repeat k times: v¹ ~ P(v | h⁰), h¹ ~ P(h | v¹)
|
|
22
|
+
4. Update:
|
|
23
|
+
ΔW ∝ ⟨v⁰h⁰ᵗ⟩ - ⟨v^k h^kᵗ⟩
|
|
24
|
+
Δa ∝ v⁰ - v^k
|
|
25
|
+
Δb ∝ h⁰ - h^k
|
|
26
|
+
|
|
27
|
+
Free energy
|
|
28
|
+
-------------
|
|
29
|
+
F(v) = -aᵗv - Σ_j log(1 + exp(b_j + (Wᵗv)_j))
|
|
30
|
+
|
|
31
|
+
Used as a building block for Deep Belief Networks and for unsupervised
|
|
32
|
+
feature learning / dimensionality reduction.
|
|
33
|
+
|
|
34
|
+
Reference
|
|
35
|
+
----------
|
|
36
|
+
Hinton, G. E. (2002). Training products of experts by minimizing
|
|
37
|
+
contrastive divergence. Neural Computation, 14(8), 1771-1800.
|
|
38
|
+
|
|
39
|
+
Only numpy is used.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
from __future__ import annotations
|
|
43
|
+
|
|
44
|
+
import numpy as np
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _sigmoid(x: np.ndarray) -> np.ndarray:
|
|
48
|
+
return 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class RestrictedBoltzmannMachine:
|
|
52
|
+
"""
|
|
53
|
+
Restricted Boltzmann Machine trained with Contrastive Divergence.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
n_visible : int
|
|
58
|
+
n_hidden : int
|
|
59
|
+
learning_rate : float
|
|
60
|
+
cd_k : int
|
|
61
|
+
Number of Gibbs sampling steps for CD-k (default 1, i.e. CD-1).
|
|
62
|
+
epochs : int
|
|
63
|
+
batch_size : int or None
|
|
64
|
+
random_state : int or None
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
n_visible: int,
|
|
70
|
+
n_hidden: int,
|
|
71
|
+
learning_rate: float = 0.05,
|
|
72
|
+
cd_k: int = 1,
|
|
73
|
+
epochs: int = 50,
|
|
74
|
+
batch_size: int | None = 32,
|
|
75
|
+
random_state: int | None = None,
|
|
76
|
+
) -> None:
|
|
77
|
+
self.n_visible = n_visible
|
|
78
|
+
self.n_hidden = n_hidden
|
|
79
|
+
self.learning_rate = learning_rate
|
|
80
|
+
self.cd_k = cd_k
|
|
81
|
+
self.epochs = epochs
|
|
82
|
+
self.batch_size = batch_size
|
|
83
|
+
self._rng = np.random.default_rng(random_state)
|
|
84
|
+
|
|
85
|
+
scale = 0.01
|
|
86
|
+
self.W = self._rng.normal(0, scale, (n_visible, n_hidden))
|
|
87
|
+
self.a = np.zeros(n_visible) # visible bias
|
|
88
|
+
self.b = np.zeros(n_hidden) # hidden bias
|
|
89
|
+
|
|
90
|
+
self.reconstruction_errors_: list[float] = []
|
|
91
|
+
|
|
92
|
+
# ------------------------------------------------------------------
|
|
93
|
+
# Sampling
|
|
94
|
+
# ------------------------------------------------------------------
|
|
95
|
+
|
|
96
|
+
def _sample_hidden(self, v: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
|
97
|
+
"""Return (P(h=1|v), sampled h)."""
|
|
98
|
+
p_h = _sigmoid(v @ self.W + self.b)
|
|
99
|
+
h = (self._rng.random(p_h.shape) < p_h).astype(float)
|
|
100
|
+
return p_h, h
|
|
101
|
+
|
|
102
|
+
def _sample_visible(self, h: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
|
103
|
+
"""Return (P(v=1|h), sampled v)."""
|
|
104
|
+
p_v = _sigmoid(h @ self.W.T + self.a)
|
|
105
|
+
v = (self._rng.random(p_v.shape) < p_v).astype(float)
|
|
106
|
+
return p_v, v
|
|
107
|
+
|
|
108
|
+
# ------------------------------------------------------------------
|
|
109
|
+
# Training — Contrastive Divergence (CD-k)
|
|
110
|
+
# ------------------------------------------------------------------
|
|
111
|
+
|
|
112
|
+
def fit(self, X: np.ndarray) -> "RestrictedBoltzmannMachine":
|
|
113
|
+
"""
|
|
114
|
+
Train the RBM on binary (or [0,1]-valued) data X.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
X : ndarray of shape (n_samples, n_visible)
|
|
119
|
+
|
|
120
|
+
Returns
|
|
121
|
+
-------
|
|
122
|
+
self
|
|
123
|
+
"""
|
|
124
|
+
n = len(X)
|
|
125
|
+
bs = self.batch_size or n
|
|
126
|
+
self.reconstruction_errors_ = []
|
|
127
|
+
|
|
128
|
+
for _ in range(self.epochs):
|
|
129
|
+
idx = self._rng.permutation(n)
|
|
130
|
+
epoch_err = 0.0
|
|
131
|
+
n_batches = 0
|
|
132
|
+
|
|
133
|
+
for start in range(0, n, bs):
|
|
134
|
+
mb = idx[start:start + bs]
|
|
135
|
+
v0 = X[mb]
|
|
136
|
+
B = len(v0)
|
|
137
|
+
|
|
138
|
+
# Positive phase
|
|
139
|
+
p_h0, h0 = self._sample_hidden(v0)
|
|
140
|
+
|
|
141
|
+
# Gibbs chain for CD-k
|
|
142
|
+
v_k, h_k = v0, h0
|
|
143
|
+
for _ in range(self.cd_k):
|
|
144
|
+
p_vk, v_k = self._sample_visible(h_k)
|
|
145
|
+
p_hk, h_k = self._sample_hidden(v_k)
|
|
146
|
+
|
|
147
|
+
# Gradient (use probabilities, not samples, for hidden — standard trick)
|
|
148
|
+
pos_assoc = v0.T @ p_h0
|
|
149
|
+
neg_assoc = v_k.T @ p_hk
|
|
150
|
+
|
|
151
|
+
self.W += self.learning_rate * (pos_assoc - neg_assoc) / B
|
|
152
|
+
self.a += self.learning_rate * (v0 - v_k).mean(axis=0)
|
|
153
|
+
self.b += self.learning_rate * (p_h0 - p_hk).mean(axis=0)
|
|
154
|
+
|
|
155
|
+
epoch_err += float(np.mean((v0 - v_k) ** 2))
|
|
156
|
+
n_batches += 1
|
|
157
|
+
|
|
158
|
+
self.reconstruction_errors_.append(epoch_err / n_batches)
|
|
159
|
+
|
|
160
|
+
return self
|
|
161
|
+
|
|
162
|
+
# ------------------------------------------------------------------
|
|
163
|
+
# Inference
|
|
164
|
+
# ------------------------------------------------------------------
|
|
165
|
+
|
|
166
|
+
def transform(self, X: np.ndarray) -> np.ndarray:
|
|
167
|
+
"""
|
|
168
|
+
Compute hidden-unit activation probabilities P(h=1|v).
|
|
169
|
+
|
|
170
|
+
Returns
|
|
171
|
+
-------
|
|
172
|
+
ndarray of shape (n_samples, n_hidden)
|
|
173
|
+
"""
|
|
174
|
+
return _sigmoid(X @ self.W + self.b)
|
|
175
|
+
|
|
176
|
+
def reconstruct(self, X: np.ndarray) -> np.ndarray:
|
|
177
|
+
"""
|
|
178
|
+
One Gibbs step v → h → v̂ (probabilities, not samples).
|
|
179
|
+
|
|
180
|
+
Returns
|
|
181
|
+
-------
|
|
182
|
+
ndarray of shape (n_samples, n_visible)
|
|
183
|
+
"""
|
|
184
|
+
p_h = self.transform(X)
|
|
185
|
+
return _sigmoid(p_h @ self.W.T + self.a)
|
|
186
|
+
|
|
187
|
+
def free_energy(self, X: np.ndarray) -> np.ndarray:
|
|
188
|
+
"""
|
|
189
|
+
Free energy F(v) = -aᵗv - Σ_j log(1 + exp(b_j + (Wᵗv)_j)).
|
|
190
|
+
Lower free energy ⇒ more "typical" under the model.
|
|
191
|
+
|
|
192
|
+
Returns
|
|
193
|
+
-------
|
|
194
|
+
ndarray of shape (n_samples,)
|
|
195
|
+
"""
|
|
196
|
+
vbias_term = X @ self.a
|
|
197
|
+
wx_b = X @ self.W + self.b
|
|
198
|
+
hidden_term = np.sum(np.log1p(np.exp(wx_b)), axis=1)
|
|
199
|
+
return -vbias_term - hidden_term
|
|
200
|
+
|
|
201
|
+
def sample(
|
|
202
|
+
self,
|
|
203
|
+
n_samples: int,
|
|
204
|
+
n_gibbs_steps: int = 1000,
|
|
205
|
+
v_init: np.ndarray | None = None,
|
|
206
|
+
) -> np.ndarray:
|
|
207
|
+
"""
|
|
208
|
+
Generate samples via Gibbs sampling from a random (or given) start.
|
|
209
|
+
|
|
210
|
+
Parameters
|
|
211
|
+
----------
|
|
212
|
+
n_samples : int
|
|
213
|
+
n_gibbs_steps : int
|
|
214
|
+
Number of full Gibbs sweeps per chain.
|
|
215
|
+
v_init : ndarray of shape (n_samples, n_visible) or None
|
|
216
|
+
Initial visible state. If None, random binary init.
|
|
217
|
+
|
|
218
|
+
Returns
|
|
219
|
+
-------
|
|
220
|
+
ndarray of shape (n_samples, n_visible)
|
|
221
|
+
"""
|
|
222
|
+
if v_init is None:
|
|
223
|
+
v = (self._rng.random((n_samples, self.n_visible)) < 0.5).astype(float)
|
|
224
|
+
else:
|
|
225
|
+
v = v_init.copy()
|
|
226
|
+
|
|
227
|
+
for _ in range(n_gibbs_steps):
|
|
228
|
+
_, h = self._sample_hidden(v)
|
|
229
|
+
_, v = self._sample_visible(h)
|
|
230
|
+
|
|
231
|
+
return v
|