scikit-learn-som 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ Metadata-Version: 2.4
2
+ Name: scikit-learn-som
3
+ Version: 0.0.2
4
+ Requires-Python: >=3.12
5
+ Description-Content-Type: text/markdown
6
+ Requires-Dist: scikit-learn>=1.6.1
File without changes
@@ -0,0 +1,16 @@
1
+ [project]
2
+ name = "scikit-learn-som"
3
+ version = "0.0.2"
4
+ description = ""
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "scikit-learn>=1.6.1",
9
+ ]
10
+
11
+ [dependency-groups]
12
+ dev = [
13
+ "ipython>=9.1.0",
14
+ "jupyter>=1.1.1",
15
+ "matplotlib>=3.10.1",
16
+ ]
@@ -0,0 +1,6 @@
1
+ Metadata-Version: 2.4
2
+ Name: scikit-learn-som
3
+ Version: 0.0.2
4
+ Requires-Python: >=3.12
5
+ Description-Content-Type: text/markdown
6
+ Requires-Dist: scikit-learn>=1.6.1
@@ -0,0 +1,8 @@
1
+ README.md
2
+ pyproject.toml
3
+ som.py
4
+ scikit_learn_som.egg-info/PKG-INFO
5
+ scikit_learn_som.egg-info/SOURCES.txt
6
+ scikit_learn_som.egg-info/dependency_links.txt
7
+ scikit_learn_som.egg-info/requires.txt
8
+ scikit_learn_som.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ scikit-learn>=1.6.1
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,174 @@
1
+ import numpy as np
2
+
3
+ from sklearn.base import (
4
+ BaseEstimator,
5
+ ClassNamePrefixFeaturesOutMixin,
6
+ ClusterMixin,
7
+ TransformerMixin,
8
+ _fit_context
9
+ )
10
+
11
+
12
+ from sklearn.utils.validation import (
13
+ _check_sample_weight,
14
+ _is_arraylike_not_scalar,
15
+ check_random_state,
16
+ check_is_fitted,
17
+ validate_data
18
+ )
19
+
20
+ from numbers import Integral, Real
21
+ from sklearn.utils._param_validation import Interval, StrOptions, validate_params
22
+
23
+
24
+ class SOM(ClassNamePrefixFeaturesOutMixin, TransformerMixin, ClusterMixin, BaseEstimator):
25
+
26
+ _parameter_constraints: dict = {
27
+ "lattice_rows": [Interval(Integral, 1, None, closed="left")],
28
+ "lattice_columns": [Interval(Integral, 1, None, closed="left")],
29
+ "neighbourhood_radius": [Interval(Integral, 1, None, closed="left")],
30
+ "initial_learning_rate": [Interval(Real, 0, None, closed="left")],
31
+ "max_iter": [Interval(Integral, 1, None, closed="left")],
32
+ "verbose": ["verbose"],
33
+ "random_state": ["random_state"],
34
+ }
35
+
36
+ @property
37
+ def grid_shape(self):
38
+ return (self.lattice_rows, self.lattice_columns)
39
+
40
+ def __init__(self, *, lattice_rows=10, lattice_columns=10, initial_learning_rate=1, neighbourhood_radius=None, max_iters=300, random_state=None, verbose=False):
41
+ self.lattice_rows = lattice_rows
42
+ self.lattice_columns = lattice_columns
43
+
44
+ self.initial_learning_rate = initial_learning_rate
45
+
46
+ if neighbourhood_radius == None:
47
+ neighbourhood_radius = max(self.lattice_columns, self.lattice_rows) // 2
48
+
49
+ self.neighbourhood_radius = neighbourhood_radius
50
+
51
+ self.max_iters = max_iters
52
+ self.random_state = random_state
53
+ self.verbose = verbose
54
+
55
+
56
+ @_fit_context(prefer_skip_nested_validation=True)
57
+ def fit(self, X, y=None, sample_weight=None):
58
+ X = validate_data(
59
+ self,
60
+ X,
61
+ accept_sparse="csr",
62
+ dtype=[np.float64, np.float32],
63
+ order="C",
64
+ accept_large_sparse=False,
65
+ )
66
+
67
+ random_state = check_random_state(self.random_state)
68
+ sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
69
+
70
+ n_samples, n_features = X.shape
71
+
72
+ lattice_weights = random_state.rand(self.lattice_rows, self.lattice_columns, n_features)
73
+
74
+ best_inertia, best_winner_neurons, best_weights = None, None, None
75
+ inertia_history = []
76
+
77
+ for itr in range(self.max_iters):
78
+ learning_rate=self.initial_learning_rate*np.exp(-(itr+1)/self.max_iters)
79
+
80
+ neighbour_hood_factor= self.neighbourhood_radius*np.exp(-(itr+1)/self.max_iters)
81
+
82
+ inertia = 0
83
+ winner_neurons = []
84
+
85
+ for sample_no in range(n_samples):
86
+ # import pdb; pdb.set_trace()
87
+ input_vector=X[sample_no]
88
+ diff = lattice_weights-input_vector.reshape(1, 1, -1)
89
+ dist = np.linalg.norm(diff, axis=2)
90
+
91
+ #Finding BMU
92
+ bmu_index = np.unravel_index(np.argmin(dist), (self.lattice_rows, self.lattice_rows))
93
+
94
+ winner_neurons.append(bmu_index)
95
+ inertia += np.sum(np.linalg.norm(diff[bmu_index]))
96
+
97
+ #Calculating distance of all neurons to BMU
98
+ for row_idx in range(self.lattice_rows):
99
+ for column_idx in range(self.lattice_columns):
100
+ neuron_position = np.array([row_idx, column_idx])
101
+ dist_to_bmu = np.linalg.norm(neuron_position - bmu_index)**2
102
+ #Adjusting weights for relevant neurons
103
+
104
+ neighbour_hood_value = np.exp(-dist_to_bmu/(2*neighbour_hood_factor*neighbour_hood_factor))
105
+ error = input_vector - lattice_weights[row_idx, column_idx]
106
+ lattice_weights[row_idx, column_idx] += learning_rate * neighbour_hood_value * error
107
+
108
+ inertia_history.append(inertia)
109
+
110
+ if best_inertia is None or inertia < best_inertia:
111
+ best_inertia = inertia
112
+ best_winner_neurons = winner_neurons
113
+ best_weights = lattice_weights
114
+
115
+ if self.verbose:
116
+ print(f"Iter: {itr+1}: inertia: {inertia:.2f} | Learning Rate: {learning_rate:.3f} | Neighbourhood factor: {neighbour_hood_factor:.3f}")
117
+
118
+
119
+ self.best_winner_neurons_ = np.array(best_winner_neurons)
120
+
121
+ # Map each unique coord to a label
122
+ coord_to_label = {(i,j): i * self.lattice_rows + j
123
+ for i in range(self.lattice_rows)
124
+ for j in range(self.lattice_columns)}
125
+
126
+ # Convert each coord to its label
127
+ cluster_labels = [coord_to_label[coord] for coord in best_winner_neurons]
128
+ self.coord_label_map_ = coord_to_label
129
+ self.labels_ = np.array(cluster_labels)
130
+ self.inertia_ = best_inertia
131
+ self.inertia_history_ = np.array(inertia_history)
132
+ self.weights_ = best_weights
133
+
134
+ distinct_clusters = len(cluster_labels)
135
+ self.clusters_ = distinct_clusters
136
+
137
+ if self.verbose:
138
+ print(f"Number of Unique Clusters: {distinct_clusters}")
139
+
140
+ return self
141
+
142
+ def predict(self, X, return_inertia=False):
143
+ check_is_fitted(self)
144
+
145
+ X = validate_data(
146
+ self,
147
+ X,
148
+ accept_sparse="csr",
149
+ dtype=[np.float64, np.float32],
150
+ order="C",
151
+ accept_large_sparse=False,
152
+ )
153
+
154
+ n_samples, n_features = X.shape
155
+
156
+ winner_neurons = []
157
+ inertia = 0
158
+
159
+ for sample_no in range(n_samples):
160
+ input_vector=X[sample_no]
161
+ diff = self.weights_-input_vector.reshape(1, 1, -1)
162
+ dist = np.linalg.norm(diff, axis=2)**2
163
+
164
+ #Finding BMU
165
+ bmu_index = np.unravel_index(np.argmin(dist), (self.lattice_rows, self.lattice_rows))
166
+
167
+ winner_neurons.append(bmu_index)
168
+ inertia += np.sum(np.linalg.norm(diff[bmu_index]))
169
+
170
+
171
+ if return_inertia:
172
+ return np.array(winner_neurons), inertia
173
+ else:
174
+ return np.array(winner_neurons)