scikit-learn-som 0.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scikit_learn_som-0.0.2/PKG-INFO +6 -0
- scikit_learn_som-0.0.2/README.md +0 -0
- scikit_learn_som-0.0.2/pyproject.toml +16 -0
- scikit_learn_som-0.0.2/scikit_learn_som.egg-info/PKG-INFO +6 -0
- scikit_learn_som-0.0.2/scikit_learn_som.egg-info/SOURCES.txt +8 -0
- scikit_learn_som-0.0.2/scikit_learn_som.egg-info/dependency_links.txt +1 -0
- scikit_learn_som-0.0.2/scikit_learn_som.egg-info/requires.txt +1 -0
- scikit_learn_som-0.0.2/scikit_learn_som.egg-info/top_level.txt +1 -0
- scikit_learn_som-0.0.2/setup.cfg +4 -0
- scikit_learn_som-0.0.2/som.py +174 -0
|
File without changes
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "scikit-learn-som"
|
|
3
|
+
version = "0.0.2"
|
|
4
|
+
description = ""
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.12"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"scikit-learn>=1.6.1",
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
[dependency-groups]
|
|
12
|
+
dev = [
|
|
13
|
+
"ipython>=9.1.0",
|
|
14
|
+
"jupyter>=1.1.1",
|
|
15
|
+
"matplotlib>=3.10.1",
|
|
16
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
scikit-learn>=1.6.1
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
som
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from sklearn.base import (
|
|
4
|
+
BaseEstimator,
|
|
5
|
+
ClassNamePrefixFeaturesOutMixin,
|
|
6
|
+
ClusterMixin,
|
|
7
|
+
TransformerMixin,
|
|
8
|
+
_fit_context
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
from sklearn.utils.validation import (
|
|
13
|
+
_check_sample_weight,
|
|
14
|
+
_is_arraylike_not_scalar,
|
|
15
|
+
check_random_state,
|
|
16
|
+
check_is_fitted,
|
|
17
|
+
validate_data
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from numbers import Integral, Real
|
|
21
|
+
from sklearn.utils._param_validation import Interval, StrOptions, validate_params
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class SOM(ClassNamePrefixFeaturesOutMixin, TransformerMixin, ClusterMixin, BaseEstimator):
|
|
25
|
+
|
|
26
|
+
_parameter_constraints: dict = {
|
|
27
|
+
"lattice_rows": [Interval(Integral, 1, None, closed="left")],
|
|
28
|
+
"lattice_columns": [Interval(Integral, 1, None, closed="left")],
|
|
29
|
+
"neighbourhood_radius": [Interval(Integral, 1, None, closed="left")],
|
|
30
|
+
"initial_learning_rate": [Interval(Real, 0, None, closed="left")],
|
|
31
|
+
"max_iter": [Interval(Integral, 1, None, closed="left")],
|
|
32
|
+
"verbose": ["verbose"],
|
|
33
|
+
"random_state": ["random_state"],
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def grid_shape(self):
|
|
38
|
+
return (self.lattice_rows, self.lattice_columns)
|
|
39
|
+
|
|
40
|
+
def __init__(self, *, lattice_rows=10, lattice_columns=10, initial_learning_rate=1, neighbourhood_radius=None, max_iters=300, random_state=None, verbose=False):
|
|
41
|
+
self.lattice_rows = lattice_rows
|
|
42
|
+
self.lattice_columns = lattice_columns
|
|
43
|
+
|
|
44
|
+
self.initial_learning_rate = initial_learning_rate
|
|
45
|
+
|
|
46
|
+
if neighbourhood_radius == None:
|
|
47
|
+
neighbourhood_radius = max(self.lattice_columns, self.lattice_rows) // 2
|
|
48
|
+
|
|
49
|
+
self.neighbourhood_radius = neighbourhood_radius
|
|
50
|
+
|
|
51
|
+
self.max_iters = max_iters
|
|
52
|
+
self.random_state = random_state
|
|
53
|
+
self.verbose = verbose
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@_fit_context(prefer_skip_nested_validation=True)
|
|
57
|
+
def fit(self, X, y=None, sample_weight=None):
|
|
58
|
+
X = validate_data(
|
|
59
|
+
self,
|
|
60
|
+
X,
|
|
61
|
+
accept_sparse="csr",
|
|
62
|
+
dtype=[np.float64, np.float32],
|
|
63
|
+
order="C",
|
|
64
|
+
accept_large_sparse=False,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
random_state = check_random_state(self.random_state)
|
|
68
|
+
sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
|
|
69
|
+
|
|
70
|
+
n_samples, n_features = X.shape
|
|
71
|
+
|
|
72
|
+
lattice_weights = random_state.rand(self.lattice_rows, self.lattice_columns, n_features)
|
|
73
|
+
|
|
74
|
+
best_inertia, best_winner_neurons, best_weights = None, None, None
|
|
75
|
+
inertia_history = []
|
|
76
|
+
|
|
77
|
+
for itr in range(self.max_iters):
|
|
78
|
+
learning_rate=self.initial_learning_rate*np.exp(-(itr+1)/self.max_iters)
|
|
79
|
+
|
|
80
|
+
neighbour_hood_factor= self.neighbourhood_radius*np.exp(-(itr+1)/self.max_iters)
|
|
81
|
+
|
|
82
|
+
inertia = 0
|
|
83
|
+
winner_neurons = []
|
|
84
|
+
|
|
85
|
+
for sample_no in range(n_samples):
|
|
86
|
+
# import pdb; pdb.set_trace()
|
|
87
|
+
input_vector=X[sample_no]
|
|
88
|
+
diff = lattice_weights-input_vector.reshape(1, 1, -1)
|
|
89
|
+
dist = np.linalg.norm(diff, axis=2)
|
|
90
|
+
|
|
91
|
+
#Finding BMU
|
|
92
|
+
bmu_index = np.unravel_index(np.argmin(dist), (self.lattice_rows, self.lattice_rows))
|
|
93
|
+
|
|
94
|
+
winner_neurons.append(bmu_index)
|
|
95
|
+
inertia += np.sum(np.linalg.norm(diff[bmu_index]))
|
|
96
|
+
|
|
97
|
+
#Calculating distance of all neurons to BMU
|
|
98
|
+
for row_idx in range(self.lattice_rows):
|
|
99
|
+
for column_idx in range(self.lattice_columns):
|
|
100
|
+
neuron_position = np.array([row_idx, column_idx])
|
|
101
|
+
dist_to_bmu = np.linalg.norm(neuron_position - bmu_index)**2
|
|
102
|
+
#Adjusting weights for relevant neurons
|
|
103
|
+
|
|
104
|
+
neighbour_hood_value = np.exp(-dist_to_bmu/(2*neighbour_hood_factor*neighbour_hood_factor))
|
|
105
|
+
error = input_vector - lattice_weights[row_idx, column_idx]
|
|
106
|
+
lattice_weights[row_idx, column_idx] += learning_rate * neighbour_hood_value * error
|
|
107
|
+
|
|
108
|
+
inertia_history.append(inertia)
|
|
109
|
+
|
|
110
|
+
if best_inertia is None or inertia < best_inertia:
|
|
111
|
+
best_inertia = inertia
|
|
112
|
+
best_winner_neurons = winner_neurons
|
|
113
|
+
best_weights = lattice_weights
|
|
114
|
+
|
|
115
|
+
if self.verbose:
|
|
116
|
+
print(f"Iter: {itr+1}: inertia: {inertia:.2f} | Learning Rate: {learning_rate:.3f} | Neighbourhood factor: {neighbour_hood_factor:.3f}")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
self.best_winner_neurons_ = np.array(best_winner_neurons)
|
|
120
|
+
|
|
121
|
+
# Map each unique coord to a label
|
|
122
|
+
coord_to_label = {(i,j): i * self.lattice_rows + j
|
|
123
|
+
for i in range(self.lattice_rows)
|
|
124
|
+
for j in range(self.lattice_columns)}
|
|
125
|
+
|
|
126
|
+
# Convert each coord to its label
|
|
127
|
+
cluster_labels = [coord_to_label[coord] for coord in best_winner_neurons]
|
|
128
|
+
self.coord_label_map_ = coord_to_label
|
|
129
|
+
self.labels_ = np.array(cluster_labels)
|
|
130
|
+
self.inertia_ = best_inertia
|
|
131
|
+
self.inertia_history_ = np.array(inertia_history)
|
|
132
|
+
self.weights_ = best_weights
|
|
133
|
+
|
|
134
|
+
distinct_clusters = len(cluster_labels)
|
|
135
|
+
self.clusters_ = distinct_clusters
|
|
136
|
+
|
|
137
|
+
if self.verbose:
|
|
138
|
+
print(f"Number of Unique Clusters: {distinct_clusters}")
|
|
139
|
+
|
|
140
|
+
return self
|
|
141
|
+
|
|
142
|
+
def predict(self, X, return_inertia=False):
|
|
143
|
+
check_is_fitted(self)
|
|
144
|
+
|
|
145
|
+
X = validate_data(
|
|
146
|
+
self,
|
|
147
|
+
X,
|
|
148
|
+
accept_sparse="csr",
|
|
149
|
+
dtype=[np.float64, np.float32],
|
|
150
|
+
order="C",
|
|
151
|
+
accept_large_sparse=False,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
n_samples, n_features = X.shape
|
|
155
|
+
|
|
156
|
+
winner_neurons = []
|
|
157
|
+
inertia = 0
|
|
158
|
+
|
|
159
|
+
for sample_no in range(n_samples):
|
|
160
|
+
input_vector=X[sample_no]
|
|
161
|
+
diff = self.weights_-input_vector.reshape(1, 1, -1)
|
|
162
|
+
dist = np.linalg.norm(diff, axis=2)**2
|
|
163
|
+
|
|
164
|
+
#Finding BMU
|
|
165
|
+
bmu_index = np.unravel_index(np.argmin(dist), (self.lattice_rows, self.lattice_rows))
|
|
166
|
+
|
|
167
|
+
winner_neurons.append(bmu_index)
|
|
168
|
+
inertia += np.sum(np.linalg.norm(diff[bmu_index]))
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
if return_inertia:
|
|
172
|
+
return np.array(winner_neurons), inertia
|
|
173
|
+
else:
|
|
174
|
+
return np.array(winner_neurons)
|