scratchkit 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. mlscratch/__init__.py +56 -0
  2. mlscratch/__main__.py +118 -0
  3. mlscratch/bayesian/__init__.py +53 -0
  4. mlscratch/bayesian/bayesian_linear_regression.py +171 -0
  5. mlscratch/bayesian/bayesian_network.py +248 -0
  6. mlscratch/bayesian/bayesian_nn.py +315 -0
  7. mlscratch/bayesian/gaussian_process.py +207 -0
  8. mlscratch/bayesian/hmm.py +277 -0
  9. mlscratch/bayesian/init.py +52 -0
  10. mlscratch/bayesian/kalman_filter.py +182 -0
  11. mlscratch/bayesian/naive_bayes.py +209 -0
  12. mlscratch/metrics/__init__.py +59 -0
  13. mlscratch/metrics/classification.py +365 -0
  14. mlscratch/metrics/regression.py +79 -0
  15. mlscratch/neural/__init__.py +121 -0
  16. mlscratch/neural/attention.py +420 -0
  17. mlscratch/neural/autoencoder.py +543 -0
  18. mlscratch/neural/boltzmann.py +231 -0
  19. mlscratch/neural/cnn.py +593 -0
  20. mlscratch/neural/cvnn.py +322 -0
  21. mlscratch/neural/gan.py +364 -0
  22. mlscratch/neural/hopfield.py +193 -0
  23. mlscratch/neural/perceptron.py +398 -0
  24. mlscratch/neural/rbf_network.py +230 -0
  25. mlscratch/neural/recurrent.py +569 -0
  26. mlscratch/preprocessing/__init__.py +38 -0
  27. mlscratch/preprocessing/encoders.py +140 -0
  28. mlscratch/preprocessing/model_selection.py +119 -0
  29. mlscratch/preprocessing/polynomial.py +105 -0
  30. mlscratch/preprocessing/scalers.py +220 -0
  31. mlscratch/py.typed +0 -0
  32. mlscratch/reinforcement/__init__.py +59 -0
  33. mlscratch/reinforcement/ddpg.py +363 -0
  34. mlscratch/reinforcement/dqn.py +319 -0
  35. mlscratch/reinforcement/ppo.py +452 -0
  36. mlscratch/reinforcement/q_learning.py +352 -0
  37. mlscratch/reinforcement/sac.py +382 -0
  38. mlscratch/reinforcement/utils.py +594 -0
  39. mlscratch/supervised/__init__.py +76 -0
  40. mlscratch/supervised/_validation.py +50 -0
  41. mlscratch/supervised/adaboost.py +255 -0
  42. mlscratch/supervised/decision_tree.py +495 -0
  43. mlscratch/supervised/gradient_boosting.py +354 -0
  44. mlscratch/supervised/knn.py +234 -0
  45. mlscratch/supervised/lasso_regression.py +125 -0
  46. mlscratch/supervised/linear_models.py +459 -0
  47. mlscratch/supervised/linear_regression.py +197 -0
  48. mlscratch/supervised/logistic_regression.py +119 -0
  49. mlscratch/supervised/naive_bayes.py +113 -0
  50. mlscratch/supervised/random_forest.py +321 -0
  51. mlscratch/supervised/ridge_regression.py +93 -0
  52. mlscratch/supervised/svm.py +356 -0
  53. mlscratch/unsupervised/__init__.py +39 -0
  54. mlscratch/unsupervised/apriori.py +178 -0
  55. mlscratch/unsupervised/dbscan.py +141 -0
  56. mlscratch/unsupervised/gmm.py +204 -0
  57. mlscratch/unsupervised/hierarchical_clustering.py +137 -0
  58. mlscratch/unsupervised/ica.py +167 -0
  59. mlscratch/unsupervised/kmeans.py +135 -0
  60. mlscratch/unsupervised/kmedoids.py +133 -0
  61. mlscratch/unsupervised/pca.py +103 -0
  62. mlscratch/unsupervised/tsne.py +200 -0
  63. scratchkit-0.2.0.dist-info/METADATA +241 -0
  64. scratchkit-0.2.0.dist-info/RECORD +68 -0
  65. scratchkit-0.2.0.dist-info/WHEEL +5 -0
  66. scratchkit-0.2.0.dist-info/entry_points.txt +2 -0
  67. scratchkit-0.2.0.dist-info/licenses/LICENSE +201 -0
  68. scratchkit-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,135 @@
1
+ r"""
2
+ K-Means Clustering
3
+ ===================
4
+
5
+ A classic unsupervised clustering algorithm using Lloyd's iteration with
6
+ K-Means++ initialization.
7
+
8
+ The objective minimized is:
9
+
10
+ .. math::
11
+ J = \sum_{i=1}^n \min_{1 \leq k \leq K} \|x_i - \mu_k\|^2
12
+
13
+ Complexity
14
+ ----------
15
+ - Training: O(n K d \cdot n\_iter)
16
+ - Inference: O(n K d)
17
+ - Space: O(K d)
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import numpy as np
23
+ from numpy.typing import ArrayLike, NDArray
24
+
25
+ FloatArray = NDArray[np.float64]
26
+
27
+
28
+ def _validate_input(X: ArrayLike) -> FloatArray:
29
+ X_arr = np.asarray(X, dtype=float)
30
+ if X_arr.ndim != 2:
31
+ raise ValueError("X must be a 2D array of shape (n_samples, n_features).")
32
+ return X_arr
33
+
34
+
35
+ class KMeans:
36
+ """K-Means clustering with optional K-Means++ initialization.
37
+
38
+ Parameters
39
+ ----------
40
+ n_clusters : int
41
+ The number of clusters to form.
42
+ max_iter : int, default=300
43
+ Maximum number of iterations of the k-means algorithm for a single run.
44
+ tol : float, default=1e-4
45
+ Convergence tolerance. The algorithm stops when centroid movement is
46
+ less than this threshold.
47
+ random_state : int | None, default=None
48
+ Seed for centroid initialization.
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ n_clusters: int = 8,
54
+ max_iter: int = 300,
55
+ tol: float = 1e-4,
56
+ random_state: int | None = None,
57
+ ) -> None:
58
+ self.n_clusters = int(n_clusters)
59
+ self.max_iter = int(max_iter)
60
+ self.tol = float(tol)
61
+ self.random_state = random_state
62
+ self.cluster_centers_: FloatArray | None = None
63
+ self.labels_: NDArray[np.int64] | None = None
64
+ self.inertia_: float | None = None
65
+ self.n_iter_: int | None = None
66
+
67
+ def fit(self, X: ArrayLike) -> "KMeans":
68
+ X_arr = _validate_input(X)
69
+ n_samples, n_features = X_arr.shape
70
+ if self.n_clusters <= 0 or self.n_clusters > n_samples:
71
+ raise ValueError("n_clusters must be between 1 and n_samples.")
72
+
73
+ rng = np.random.default_rng(self.random_state)
74
+ centers = self._initialize_centroids(X_arr, rng)
75
+
76
+ for iteration in range(1, self.max_iter + 1):
77
+ labels = self._assign_clusters(X_arr, centers)
78
+ new_centers = self._compute_centers(X_arr, labels, n_features)
79
+
80
+ shift = np.linalg.norm(centers - new_centers, axis=1).max()
81
+ centers = new_centers
82
+ if shift <= self.tol:
83
+ break
84
+
85
+ self.cluster_centers_ = centers
86
+ self.labels_ = labels
87
+ self.inertia_ = float(self._compute_inertia(X_arr, centers, labels))
88
+ self.n_iter_ = iteration
89
+ return self
90
+
91
+ def predict(self, X: ArrayLike) -> NDArray[np.int64]:
92
+ if self.cluster_centers_ is None:
93
+ raise RuntimeError("Call fit() before predict().")
94
+ X_arr = _validate_input(X)
95
+ if X_arr.shape[1] != self.cluster_centers_.shape[1]:
96
+ raise ValueError("X has a different number of features than the training data.")
97
+ return self._assign_clusters(X_arr, self.cluster_centers_)
98
+
99
+ def _initialize_centroids(self, X: FloatArray, rng: np.random.Generator) -> FloatArray:
100
+ centers = np.empty((self.n_clusters, X.shape[1]), dtype=float)
101
+ first_idx = rng.integers(X.shape[0])
102
+ centers[0] = X[first_idx]
103
+
104
+ distances = np.full(X.shape[0], np.inf, dtype=float)
105
+ for i in range(1, self.n_clusters):
106
+ squared_distances = np.sum((X - centers[i - 1]) ** 2, axis=1)
107
+ distances = np.minimum(distances, squared_distances)
108
+ probabilities = distances / distances.sum()
109
+ cumulative = np.cumsum(probabilities)
110
+ chosen = rng.random()
111
+ centers[i] = X[np.searchsorted(cumulative, chosen)]
112
+
113
+ return centers
114
+
115
+ def _assign_clusters(self, X: FloatArray, centers: FloatArray) -> NDArray[np.int64]:
116
+ distances = np.linalg.norm(X[:, np.newaxis, :] - centers[np.newaxis, :, :], axis=2)
117
+ return np.argmin(distances, axis=1).astype(np.int64)
118
+
119
+ def _compute_centers(
120
+ self, X: FloatArray, labels: NDArray[np.int64], n_features: int
121
+ ) -> FloatArray:
122
+ centers = np.zeros((self.n_clusters, n_features), dtype=float)
123
+ for cluster_index in range(self.n_clusters):
124
+ members = X[labels == cluster_index]
125
+ if members.size == 0:
126
+ centers[cluster_index] = X[np.random.default_rng(self.random_state).integers(X.shape[0])]
127
+ else:
128
+ centers[cluster_index] = members.mean(axis=0)
129
+ return centers
130
+
131
+ def _compute_inertia(
132
+ self, X: FloatArray, centers: FloatArray, labels: NDArray[np.int64]
133
+ ) -> float:
134
+ diff = X - centers[labels]
135
+ return float(np.sum(diff ** 2))
@@ -0,0 +1,133 @@
1
+ """
2
+ K-Medoids Clustering (PAM — Partitioning Around Medoids)
3
+ =========================================================
4
+ Similar to K-Means but the cluster representatives (medoids) must be actual
5
+ data points, making the algorithm more robust to outliers and compatible
6
+ with non-Euclidean distances.
7
+
8
+ Algorithm (simplified PAM)
9
+ --------------------------
10
+ 1. Randomly initialise K medoids from the dataset.
11
+ 2. Assign every point to its nearest medoid.
12
+ 3. For each cluster, try every non-medoid point as a new medoid;
13
+ keep the swap if it reduces the total cluster cost.
14
+ 4. Repeat steps 2-3 until no swap improves the cost.
15
+
16
+ Only numpy and Python stdlib are used.
17
+ """
18
+
19
+ import numpy as np
20
+
21
+
22
+ class KMedoids:
23
+ """
24
+ K-Medoids clustering.
25
+
26
+ Parameters
27
+ ----------
28
+ n_clusters : int
29
+ Number of clusters / medoids.
30
+ max_iter : int
31
+ Maximum number of swap iterations.
32
+ random_state : int or None
33
+ Seed for reproducible medoid initialisation.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ n_clusters: int = 3,
39
+ max_iter: int = 100,
40
+ random_state: int | None = None,
41
+ ):
42
+ self.n_clusters = n_clusters
43
+ self.max_iter = max_iter
44
+ self.random_state = random_state
45
+ self.medoid_indices_ = None # indices into X of the medoids
46
+ self.labels_ = None
47
+ self.inertia_ = None
48
+
49
+ # ------------------------------------------------------------------
50
+ # Helpers
51
+ # ------------------------------------------------------------------
52
+
53
+ @staticmethod
54
+ def _pairwise_distances(X: np.ndarray) -> np.ndarray:
55
+ """Return Euclidean distance matrix of shape (n, n)."""
56
+ n = len(X)
57
+ D = np.zeros((n, n))
58
+ for i in range(n):
59
+ for j in range(i + 1, n):
60
+ d = np.sqrt(np.sum((X[i] - X[j]) ** 2))
61
+ D[i, j] = D[j, i] = d
62
+ return D
63
+
64
+ def _assign_labels(self, D: np.ndarray, medoids: list) -> np.ndarray:
65
+ """Assign each point to its nearest medoid."""
66
+ dist_to_medoids = D[:, medoids] # (n, K)
67
+ return np.argmin(dist_to_medoids, axis=1)
68
+
69
+ def _total_cost(self, D: np.ndarray, medoids: list, labels: np.ndarray) -> float:
70
+ """Sum of distances from each point to its medoid."""
71
+ cost = 0.0
72
+ for k, m in enumerate(medoids):
73
+ members = np.where(labels == k)[0]
74
+ cost += D[members, m].sum()
75
+ return float(cost)
76
+
77
+ # ------------------------------------------------------------------
78
+ # Public API
79
+ # ------------------------------------------------------------------
80
+
81
+ def fit(self, X: np.ndarray) -> "KMedoids":
82
+ """
83
+ Fit K-Medoids to X.
84
+
85
+ Parameters
86
+ ----------
87
+ X : ndarray of shape (n_samples, n_features)
88
+ """
89
+ rng = np.random.default_rng(self.random_state)
90
+ n_samples = len(X)
91
+ D = self._pairwise_distances(X)
92
+
93
+ # 1. Initialise medoids
94
+ medoids = rng.choice(n_samples, self.n_clusters, replace=False).tolist()
95
+
96
+ for _ in range(self.max_iter):
97
+ labels = self._assign_labels(D, medoids)
98
+ current_cost = self._total_cost(D, medoids, labels)
99
+ improved = False
100
+
101
+ for k in range(self.n_clusters):
102
+ cluster_members = np.where(labels == k)[0].tolist()
103
+ for candidate in cluster_members:
104
+ if candidate in medoids:
105
+ continue
106
+ new_medoids = medoids.copy()
107
+ new_medoids[k] = candidate
108
+ new_labels = self._assign_labels(D, new_medoids)
109
+ new_cost = self._total_cost(D, new_medoids, new_labels)
110
+
111
+ if new_cost < current_cost:
112
+ medoids = new_medoids
113
+ labels = new_labels
114
+ current_cost = new_cost
115
+ improved = True
116
+
117
+ if not improved:
118
+ break
119
+
120
+ self.medoid_indices_ = np.array(medoids, dtype=int)
121
+ self.labels_ = self._assign_labels(D, medoids)
122
+ self.inertia_ = self._total_cost(D, medoids, self.labels_)
123
+ return self
124
+
125
+ def fit_predict(self, X: np.ndarray) -> np.ndarray:
126
+ """Fit and return cluster labels."""
127
+ self.fit(X)
128
+ return self.labels_
129
+
130
+ @property
131
+ def cluster_centers_(self) -> np.ndarray:
132
+ """Return the actual medoid data points."""
133
+ return None # set by fit via fit_predict path; use medoid_indices_
@@ -0,0 +1,103 @@
1
+ """
2
+ Principal Component Analysis (PCA)
3
+ ====================================
4
+ Linear dimensionality reduction via eigen-decomposition of the covariance
5
+ matrix. Projects data onto the directions of maximum variance.
6
+
7
+ Key steps
8
+ ---------
9
+ 1. Centre the data (subtract column means).
10
+ 2. Compute the covariance matrix C = X^T X / (n - 1).
11
+ 3. Eigendecompose C to get eigenvalues and eigenvectors.
12
+ 4. Sort eigenvectors by descending eigenvalue.
13
+ 5. Project: X_reduced = X_centered @ W, where W holds the top-k eigenvectors.
14
+
15
+ Only numpy is used; no scipy or sklearn.
16
+ """
17
+
18
+ import numpy as np
19
+
20
+
21
+ class PCA:
22
+ """
23
+ Principal Component Analysis.
24
+
25
+ Parameters
26
+ ----------
27
+ n_components : int or None
28
+ Number of components to keep. If None, all components are kept.
29
+ """
30
+
31
+ def __init__(self, n_components: int | None = None):
32
+ self.n_components = n_components
33
+ self.components_ = None # shape (n_components, n_features)
34
+ self.explained_variance_ = None
35
+ self.explained_variance_ratio_ = None
36
+ self.mean_ = None
37
+
38
+ # ------------------------------------------------------------------
39
+ # Fit
40
+ # ------------------------------------------------------------------
41
+
42
+ def fit(self, X: np.ndarray) -> "PCA":
43
+ """
44
+ Compute principal components from X.
45
+
46
+ Parameters
47
+ ----------
48
+ X : ndarray of shape (n_samples, n_features)
49
+ """
50
+ n_samples, n_features = X.shape
51
+
52
+ # 1. Centre
53
+ self.mean_ = np.mean(X, axis=0)
54
+ X_centered = X - self.mean_
55
+
56
+ # 2. Covariance matrix (unbiased, divide by n-1)
57
+ cov = np.dot(X_centered.T, X_centered) / (n_samples - 1)
58
+
59
+ # 3. Eigen-decomposition
60
+ eigenvalues, eigenvectors = np.linalg.eigh(cov)
61
+
62
+ # 4. Sort descending by eigenvalue
63
+ order = np.argsort(eigenvalues)[::-1]
64
+ eigenvalues = eigenvalues[order]
65
+ eigenvectors = eigenvectors[:, order] # columns are eigenvectors
66
+
67
+ # 5. Keep top-k
68
+ k = self.n_components if self.n_components is not None else n_features
69
+ self.components_ = eigenvectors[:, :k].T # (k, n_features)
70
+ self.explained_variance_ = eigenvalues[:k]
71
+ total_var = np.sum(eigenvalues)
72
+ self.explained_variance_ratio_ = (
73
+ self.explained_variance_ / total_var if total_var > 0
74
+ else np.zeros(k)
75
+ )
76
+ return self
77
+
78
+ # ------------------------------------------------------------------
79
+ # Transform / inverse_transform
80
+ # ------------------------------------------------------------------
81
+
82
+ def transform(self, X: np.ndarray) -> np.ndarray:
83
+ """
84
+ Project X onto the principal components.
85
+
86
+ Returns
87
+ -------
88
+ X_new : ndarray of shape (n_samples, n_components)
89
+ """
90
+ X_centered = X - self.mean_
91
+ return np.dot(X_centered, self.components_.T)
92
+
93
+ def fit_transform(self, X: np.ndarray) -> np.ndarray:
94
+ """Fit and immediately transform X."""
95
+ self.fit(X)
96
+ return self.transform(X)
97
+
98
+ def inverse_transform(self, X_reduced: np.ndarray) -> np.ndarray:
99
+ """
100
+ Map data from reduced space back to original feature space
101
+ (approximate reconstruction).
102
+ """
103
+ return np.dot(X_reduced, self.components_) + self.mean_
@@ -0,0 +1,200 @@
1
+ """
2
+ t-Distributed Stochastic Neighbour Embedding (t-SNE)
3
+ =====================================================
4
+ Non-linear dimensionality reduction that preserves local structure.
5
+ Converts high-dimensional Euclidean distances into conditional
6
+ probabilities (Gaussian in high-dim, Student-t in low-dim) and minimises
7
+ the KL divergence between the two distributions via gradient descent.
8
+
9
+ Key steps
10
+ ---------
11
+ 1. Compute pairwise affinities p_{j|i} in the high-dimensional space using
12
+ a Gaussian kernel; perplexity controls the effective number of neighbours.
13
+ 2. Symmetrise: p_{ij} = (p_{j|i} + p_{i|j}) / 2n.
14
+ 3. Initialise low-dimensional embedding Y randomly.
15
+ 4. Compute q_{ij} in Y using a Student-t kernel (df=1).
16
+ 5. Gradient descent on KL(P || Q) with momentum.
17
+
18
+ Reference: van der Maaten & Hinton (2008).
19
+ Only numpy is used.
20
+ """
21
+
22
+ import numpy as np
23
+
24
+
25
+ class TSNE:
26
+ """
27
+ t-SNE dimensionality reduction.
28
+
29
+ Parameters
30
+ ----------
31
+ n_components : int
32
+ Dimension of the embedding (almost always 2 or 3).
33
+ perplexity : float
34
+ Effective number of neighbours; typical values 5–50.
35
+ n_iter : int
36
+ Number of gradient-descent iterations.
37
+ learning_rate : float
38
+ Step size for gradient descent.
39
+ momentum : float
40
+ Momentum coefficient for gradient updates.
41
+ random_state : int or None
42
+ Seed for reproducibility.
43
+ """
44
+
45
+ def __init__(
46
+ self,
47
+ n_components: int = 2,
48
+ perplexity: float = 30.0,
49
+ n_iter: int = 1000,
50
+ learning_rate: float = 200.0,
51
+ momentum: float = 0.9,
52
+ random_state: int | None = None,
53
+ ):
54
+ self.n_components = n_components
55
+ self.perplexity = perplexity
56
+ self.n_iter = n_iter
57
+ self.learning_rate = learning_rate
58
+ self.momentum = momentum
59
+ self.random_state = random_state
60
+ self.embedding_ = None
61
+
62
+ # ------------------------------------------------------------------
63
+ # High-dimensional affinities
64
+ # ------------------------------------------------------------------
65
+
66
+ def _pairwise_sq_distances(self, X: np.ndarray) -> np.ndarray:
67
+ """Return matrix of squared Euclidean distances."""
68
+ sum_sq = np.sum(X ** 2, axis=1, keepdims=True)
69
+ D_sq = sum_sq + sum_sq.T - 2.0 * (X @ X.T)
70
+ np.fill_diagonal(D_sq, 0.0)
71
+ return np.maximum(D_sq, 0.0)
72
+
73
+ def _conditional_probabilities(
74
+ self, D_sq: np.ndarray, sigma: float, i: int
75
+ ) -> np.ndarray:
76
+ """Compute p_{j|i} for a given bandwidth sigma."""
77
+ d = D_sq[i].copy()
78
+ d[i] = np.inf # exclude self
79
+ exp_d = np.exp(-d / (2.0 * sigma ** 2))
80
+ denom = exp_d.sum()
81
+ return exp_d / (denom + 1e-12)
82
+
83
+ def _binary_search_sigma(
84
+ self, D_sq: np.ndarray, i: int, target_perp: float,
85
+ tol: float = 1e-5, max_iter: int = 50
86
+ ) -> float:
87
+ """Find sigma_i such that perplexity(p_{.|i}) == target_perp."""
88
+ sigma_low, sigma_high = 1e-10, 1e5
89
+ sigma = 1.0
90
+
91
+ for _ in range(max_iter):
92
+ p = self._conditional_probabilities(D_sq, sigma, i)
93
+ # Shannon entropy
94
+ p_safe = np.maximum(p, 1e-12)
95
+ H = -np.sum(p_safe * np.log2(p_safe))
96
+ perp = 2.0 ** H
97
+
98
+ if abs(perp - target_perp) < tol:
99
+ break
100
+ if perp < target_perp:
101
+ sigma_low = sigma
102
+ sigma = (sigma + sigma_high) / 2.0
103
+ else:
104
+ sigma_high = sigma
105
+ sigma = (sigma + sigma_low) / 2.0
106
+
107
+ return sigma
108
+
109
+ def _compute_P(self, X: np.ndarray) -> np.ndarray:
110
+ """Compute symmetric joint probabilities P."""
111
+ n = len(X)
112
+ D_sq = self._pairwise_sq_distances(X)
113
+ P = np.zeros((n, n))
114
+
115
+ for i in range(n):
116
+ sigma = self._binary_search_sigma(D_sq, i, self.perplexity)
117
+ P[i] = self._conditional_probabilities(D_sq, sigma, i)
118
+
119
+ # Symmetrise and normalise
120
+ P = (P + P.T) / (2.0 * n)
121
+ P = np.maximum(P, 1e-12)
122
+ return P
123
+
124
+ # ------------------------------------------------------------------
125
+ # Low-dimensional affinities
126
+ # ------------------------------------------------------------------
127
+
128
+ def _compute_Q(self, Y: np.ndarray) -> tuple:
129
+ """
130
+ Compute Student-t affinities in the embedding.
131
+
132
+ Returns
133
+ -------
134
+ Q : normalised affinities
135
+ num : unnormalised numerator (needed for gradient)
136
+ """
137
+ D_sq = self._pairwise_sq_distances(Y)
138
+ num = 1.0 / (1.0 + D_sq)
139
+ np.fill_diagonal(num, 0.0)
140
+ denom = num.sum()
141
+ Q = num / (denom + 1e-12)
142
+ Q = np.maximum(Q, 1e-12)
143
+ return Q, num
144
+
145
+ # ------------------------------------------------------------------
146
+ # Public API
147
+ # ------------------------------------------------------------------
148
+
149
+ def fit_transform(self, X: np.ndarray) -> np.ndarray:
150
+ """
151
+ Fit t-SNE and return 2-D (or n_components-D) embedding.
152
+
153
+ Parameters
154
+ ----------
155
+ X : ndarray of shape (n_samples, n_features)
156
+
157
+ Returns
158
+ -------
159
+ Y : ndarray of shape (n_samples, n_components)
160
+ """
161
+ rng = np.random.default_rng(self.random_state)
162
+ n = len(X)
163
+
164
+ # Step 1: compute high-dim affinities
165
+ P = self._compute_P(X)
166
+ # Early exaggeration (first 250 iters)
167
+ P_exag = P * 4.0
168
+
169
+ # Step 2: random initialisation of embedding
170
+ Y = rng.standard_normal((n, self.n_components)) * 1e-4
171
+ velocity = np.zeros_like(Y)
172
+
173
+ for t in range(self.n_iter):
174
+ p_use = P_exag if t < 250 else P
175
+ Q, num = self._compute_Q(Y)
176
+
177
+ # Gradient of KL divergence
178
+ PQ_diff = p_use - Q # (n, n)
179
+ grad = np.zeros_like(Y)
180
+ for i in range(n):
181
+ # dC/dY_i = 4 * sum_j (p_ij - q_ij) * (y_i - y_j) * (1 + ||y_i-y_j||^2)^-1
182
+ diff = Y[i] - Y # (n, n_components)
183
+ grad[i] = 4.0 * (PQ_diff[i] * num[i] @ diff.reshape(n, -1)).sum(axis=0) \
184
+ if self.n_components == 1 \
185
+ else 4.0 * np.dot(PQ_diff[i] * num[i], diff)
186
+
187
+ # Momentum update
188
+ velocity = self.momentum * velocity - self.learning_rate * grad
189
+ Y = Y + velocity
190
+
191
+ # Centre embedding
192
+ Y -= Y.mean(axis=0)
193
+
194
+ self.embedding_ = Y
195
+ return Y
196
+
197
+ def fit(self, X: np.ndarray) -> "TSNE":
198
+ """Fit t-SNE (embedding stored in self.embedding_)."""
199
+ self.fit_transform(X)
200
+ return self