sil-score 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sil_score-0.1.4/LICENSE +21 -0
- sil_score-0.1.4/PKG-INFO +244 -0
- sil_score-0.1.4/README.md +210 -0
- sil_score-0.1.4/setup.cfg +4 -0
- sil_score-0.1.4/setup.py +31 -0
- sil_score-0.1.4/sil_score/__init__.py +15 -0
- sil_score-0.1.4/sil_score/sil_score.py +253 -0
- sil_score-0.1.4/sil_score.egg-info/PKG-INFO +244 -0
- sil_score-0.1.4/sil_score.egg-info/SOURCES.txt +10 -0
- sil_score-0.1.4/sil_score.egg-info/dependency_links.txt +1 -0
- sil_score-0.1.4/sil_score.egg-info/requires.txt +2 -0
- sil_score-0.1.4/sil_score.egg-info/top_level.txt +1 -0
sil_score-0.1.4/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Aggelos Semoglou
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
sil_score-0.1.4/PKG-INFO
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sil-score
|
|
3
|
+
Version: 0.1.4
|
|
4
|
+
Summary: Exact and approximate silhouette scoring with micro, macro, and weighted cluster averages.
|
|
5
|
+
Home-page: https://github.com/semoglou/sil_score
|
|
6
|
+
Author: Aggelos Semoglou
|
|
7
|
+
License: MIT
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Science/Research
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Requires-Python: >=3.8
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: numpy>=1.21
|
|
23
|
+
Requires-Dist: scikit-learn>=1.0
|
|
24
|
+
Dynamic: author
|
|
25
|
+
Dynamic: classifier
|
|
26
|
+
Dynamic: description
|
|
27
|
+
Dynamic: description-content-type
|
|
28
|
+
Dynamic: home-page
|
|
29
|
+
Dynamic: license
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
Dynamic: requires-dist
|
|
32
|
+
Dynamic: requires-python
|
|
33
|
+
Dynamic: summary
|
|
34
|
+
|
|
35
|
+
# sil_score
|
|
36
|
+
|
|
37
|
+
[](https://pypi.org/project/sil-score/)
|
|
38
|
+
[](https://pypi.org/project/sil-score/)
|
|
39
|
+
[](LICENSE)
|
|
40
|
+
|
|
41
|
+
`sil-score` is a small Python package for exact and fast approximate silhouette scoring.
|
|
42
|
+
|
|
43
|
+
It extends the usual silhouette workflow with:
|
|
44
|
+
|
|
45
|
+
- per-sample silhouette scores
|
|
46
|
+
- micro-averaged silhouette score
|
|
47
|
+
- macro-averaged silhouette score
|
|
48
|
+
- cluster-weighted macro silhouette score
|
|
49
|
+
- exact vs approximate comparison report
|
|
50
|
+
|
|
51
|
+
The exact mode uses scikit-learn's `silhouette_samples`.
|
|
52
|
+
The approximate mode uses Euclidean distances to cluster centroids, making it faster but not identical to the classical silhouette definition.
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Installation
|
|
57
|
+
|
|
58
|
+
Install from PyPI:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install sil-score
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Quick example
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
import numpy as np
|
|
68
|
+
from sil_score import (
|
|
69
|
+
sil_samples,
|
|
70
|
+
micro_sil_score,
|
|
71
|
+
macro_sil_score,
|
|
72
|
+
weighted_macro_sil_score,
|
|
73
|
+
sil_approximation_report,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
X = np.array([
|
|
77
|
+
[0.0],
|
|
78
|
+
[2.0],
|
|
79
|
+
[10.0],
|
|
80
|
+
[12.0],
|
|
81
|
+
])
|
|
82
|
+
|
|
83
|
+
labels = np.array([0, 0, 1, 1])
|
|
84
|
+
|
|
85
|
+
samples = sil_samples(X, labels)
|
|
86
|
+
micro = micro_sil_score(X, labels)
|
|
87
|
+
macro = macro_sil_score(X, labels)
|
|
88
|
+
|
|
89
|
+
print(samples)
|
|
90
|
+
print(micro)
|
|
91
|
+
print(macro)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Output:
|
|
95
|
+
|
|
96
|
+
[0.81818182 0.77777778 0.77777778 0.81818182]
|
|
97
|
+
0.797979797979798
|
|
98
|
+
0.797979797979798
|
|
99
|
+
|
|
100
|
+
## Functions
|
|
101
|
+
|
|
102
|
+
### `sil_samples`
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
sil_samples(X, labels, approximation=False, centers=None)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Computes the silhouette score for each sample.
|
|
109
|
+
|
|
110
|
+
By default, it computes the exact silhouette values using scikit-learn.
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
scores = sil_samples(X, labels)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
For a faster centroid-based approximation:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
scores = sil_samples(X, labels, approximation=True)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
You can also pass precomputed cluster centers:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
scores = sil_samples(
|
|
126
|
+
X,
|
|
127
|
+
labels,
|
|
128
|
+
approximation=True,
|
|
129
|
+
centers=centers,
|
|
130
|
+
)
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
### `micro_sil_score`
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
micro_sil_score(X, labels, approximation=False, centers=None)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Computes the mean of all sample-level silhouette scores. This is the usual average silhouette score. Larger clusters naturally have more influence because they contain more samples.
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
# Standard usage
|
|
145
|
+
score = micro_sil_score(X, labels)
|
|
146
|
+
|
|
147
|
+
# Approximate version
|
|
148
|
+
score = micro_sil_score(X, labels, approximation=True)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
### `macro_sil_score`
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
macro_sil_score(X, labels, approximation=False, centers=None)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Computes the mean silhouette score inside each cluster, then averages the cluster means equally. This gives every cluster the same importance, regardless of its size.
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
# Standard usage
|
|
163
|
+
score = macro_sil_score(X, labels)
|
|
164
|
+
|
|
165
|
+
# Approximate version
|
|
166
|
+
score = macro_sil_score(X, labels, approximation=True)
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
### `weighted_macro_sil_score`
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
weighted_macro_sil_score(X, labels, cluster_weights, approximation=False, centers=None)
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
Computes a cluster-weighted macro silhouette score. First, it computes the mean silhouette score for each cluster, then combines those cluster means using custom cluster weights.
|
|
178
|
+
|
|
179
|
+
Using a dictionary:
|
|
180
|
+
```python
|
|
181
|
+
weights = {
|
|
182
|
+
0: 0.2,
|
|
183
|
+
1: 0.3,
|
|
184
|
+
2: 0.5,
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
score = weighted_macro_sil_score(X, labels, cluster_weights=weights)
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Using an array:
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
weights = [0.2, 0.3, 0.5]
|
|
194
|
+
|
|
195
|
+
score = weighted_macro_sil_score(X, labels, cluster_weights=weights)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
### `sil_approximation_report`
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
sil_approximation_report(X, labels, centers=None, return_samples=False)
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
Compares exact silhouette scores with centroid-based approximate scores. It returns(Pearson) correlation and error metrics:
|
|
207
|
+
|
|
208
|
+
```python
|
|
209
|
+
report = sil_approximation_report(X, labels)
|
|
210
|
+
print(report)
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
Example output:
|
|
214
|
+
|
|
215
|
+
```
|
|
216
|
+
{
|
|
217
|
+
"correlation": 0.96,
|
|
218
|
+
"mean_absolute_error": 0.03,
|
|
219
|
+
"mean_squared_error": 0.002,
|
|
220
|
+
"root_mean_squared_error": 0.045,
|
|
221
|
+
"max_absolute_error": 0.12,
|
|
222
|
+
"mean_error": 0.01,
|
|
223
|
+
"mean_exact_score": 0.52,
|
|
224
|
+
"mean_approximate_score": 0.53,
|
|
225
|
+
"n_samples": 300,
|
|
226
|
+
}
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
Use `return_samples=True` to also include the exact scores, approximate scores, and per-sample errors.
|
|
230
|
+
|
|
231
|
+
---
|
|
232
|
+
|
|
233
|
+
### Exact vs Approximate mode
|
|
234
|
+
|
|
235
|
+
- **Exact mode**: `sil_samples(X, labels, approximation=False)`. Uses the classical silhouette definition based on distances between samples.
|
|
236
|
+
- **Approximate mode**: `sil_samples(X, labels, approximation=True)`. Uses distances from each sample to cluster centroids. This can be significantly faster for larger datasets.
|
|
237
|
+
|
|
238
|
+
## Requirements
|
|
239
|
+
`sil-score` depends on:
|
|
240
|
+
- NumPy
|
|
241
|
+
- scikit-learn
|
|
242
|
+
|
|
243
|
+
## License
|
|
244
|
+
This project is licensed under the MIT License.
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# sil_score
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/sil-score/)
|
|
4
|
+
[](https://pypi.org/project/sil-score/)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
|
|
7
|
+
`sil-score` is a small Python package for exact and fast approximate silhouette scoring.
|
|
8
|
+
|
|
9
|
+
It extends the usual silhouette workflow with:
|
|
10
|
+
|
|
11
|
+
- per-sample silhouette scores
|
|
12
|
+
- micro-averaged silhouette score
|
|
13
|
+
- macro-averaged silhouette score
|
|
14
|
+
- cluster-weighted macro silhouette score
|
|
15
|
+
- exact vs approximate comparison report
|
|
16
|
+
|
|
17
|
+
The exact mode uses scikit-learn's `silhouette_samples`.
|
|
18
|
+
The approximate mode uses Euclidean distances to cluster centroids, making it faster but not identical to the classical silhouette definition.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
Install from PyPI:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install sil-score
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quick example
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import numpy as np
|
|
34
|
+
from sil_score import (
|
|
35
|
+
sil_samples,
|
|
36
|
+
micro_sil_score,
|
|
37
|
+
macro_sil_score,
|
|
38
|
+
weighted_macro_sil_score,
|
|
39
|
+
sil_approximation_report,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
X = np.array([
|
|
43
|
+
[0.0],
|
|
44
|
+
[2.0],
|
|
45
|
+
[10.0],
|
|
46
|
+
[12.0],
|
|
47
|
+
])
|
|
48
|
+
|
|
49
|
+
labels = np.array([0, 0, 1, 1])
|
|
50
|
+
|
|
51
|
+
samples = sil_samples(X, labels)
|
|
52
|
+
micro = micro_sil_score(X, labels)
|
|
53
|
+
macro = macro_sil_score(X, labels)
|
|
54
|
+
|
|
55
|
+
print(samples)
|
|
56
|
+
print(micro)
|
|
57
|
+
print(macro)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Output:
|
|
61
|
+
|
|
62
|
+
[0.81818182 0.77777778 0.77777778 0.81818182]
|
|
63
|
+
0.797979797979798
|
|
64
|
+
0.797979797979798
|
|
65
|
+
|
|
66
|
+
## Functions
|
|
67
|
+
|
|
68
|
+
### `sil_samples`
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
sil_samples(X, labels, approximation=False, centers=None)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Computes the silhouette score for each sample.
|
|
75
|
+
|
|
76
|
+
By default, it computes the exact silhouette values using scikit-learn.
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
scores = sil_samples(X, labels)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
For a faster centroid-based approximation:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
scores = sil_samples(X, labels, approximation=True)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
You can also pass precomputed cluster centers:
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
scores = sil_samples(
|
|
92
|
+
X,
|
|
93
|
+
labels,
|
|
94
|
+
approximation=True,
|
|
95
|
+
centers=centers,
|
|
96
|
+
)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
### `micro_sil_score`
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
micro_sil_score(X, labels, approximation=False, centers=None)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Computes the mean of all sample-level silhouette scores. This is the usual average silhouette score. Larger clusters naturally have more influence because they contain more samples.
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
# Standard usage
|
|
111
|
+
score = micro_sil_score(X, labels)
|
|
112
|
+
|
|
113
|
+
# Approximate version
|
|
114
|
+
score = micro_sil_score(X, labels, approximation=True)
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
### `macro_sil_score`
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
macro_sil_score(X, labels, approximation=False, centers=None)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
Computes the mean silhouette score inside each cluster, then averages the cluster means equally. This gives every cluster the same importance, regardless of its size.
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
# Standard usage
|
|
129
|
+
score = macro_sil_score(X, labels)
|
|
130
|
+
|
|
131
|
+
# Approximate version
|
|
132
|
+
score = macro_sil_score(X, labels, approximation=True)
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
### `weighted_macro_sil_score`
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
weighted_macro_sil_score(X, labels, cluster_weights, approximation=False, centers=None)
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Computes a cluster-weighted macro silhouette score. First, it computes the mean silhouette score for each cluster, then combines those cluster means using custom cluster weights.
|
|
144
|
+
|
|
145
|
+
Using a dictionary:
|
|
146
|
+
```python
|
|
147
|
+
weights = {
|
|
148
|
+
0: 0.2,
|
|
149
|
+
1: 0.3,
|
|
150
|
+
2: 0.5,
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
score = weighted_macro_sil_score(X, labels, cluster_weights=weights)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Using an array:
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
weights = [0.2, 0.3, 0.5]
|
|
160
|
+
|
|
161
|
+
score = weighted_macro_sil_score(X, labels, cluster_weights=weights)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
### `sil_approximation_report`
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
sil_approximation_report(X, labels, centers=None, return_samples=False)
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Compares exact silhouette scores with centroid-based approximate scores. It returns(Pearson) correlation and error metrics:
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
report = sil_approximation_report(X, labels)
|
|
176
|
+
print(report)
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
Example output:
|
|
180
|
+
|
|
181
|
+
```
|
|
182
|
+
{
|
|
183
|
+
"correlation": 0.96,
|
|
184
|
+
"mean_absolute_error": 0.03,
|
|
185
|
+
"mean_squared_error": 0.002,
|
|
186
|
+
"root_mean_squared_error": 0.045,
|
|
187
|
+
"max_absolute_error": 0.12,
|
|
188
|
+
"mean_error": 0.01,
|
|
189
|
+
"mean_exact_score": 0.52,
|
|
190
|
+
"mean_approximate_score": 0.53,
|
|
191
|
+
"n_samples": 300,
|
|
192
|
+
}
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
Use `return_samples=True` to also include the exact scores, approximate scores, and per-sample errors.
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
### Exact vs Approximate mode
|
|
200
|
+
|
|
201
|
+
- **Exact mode**: `sil_samples(X, labels, approximation=False)`. Uses the classical silhouette definition based on distances between samples.
|
|
202
|
+
- **Approximate mode**: `sil_samples(X, labels, approximation=True)`. Uses distances from each sample to cluster centroids. This can be significantly faster for larger datasets.
|
|
203
|
+
|
|
204
|
+
## Requirements
|
|
205
|
+
`sil-score` depends on:
|
|
206
|
+
- NumPy
|
|
207
|
+
- scikit-learn
|
|
208
|
+
|
|
209
|
+
## License
|
|
210
|
+
This project is licensed under the MIT License.
|
sil_score-0.1.4/setup.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="sil-score",
|
|
5
|
+
version="0.1.4",
|
|
6
|
+
description="Exact and approximate silhouette scoring with micro, macro, and weighted cluster averages.",
|
|
7
|
+
long_description=open("README.md", encoding="utf-8").read(),
|
|
8
|
+
long_description_content_type="text/markdown",
|
|
9
|
+
author="Aggelos Semoglou",
|
|
10
|
+
license="MIT",
|
|
11
|
+
url="https://github.com/semoglou/sil_score",
|
|
12
|
+
packages=find_packages(),
|
|
13
|
+
python_requires=">=3.8",
|
|
14
|
+
install_requires=[
|
|
15
|
+
"numpy>=1.21",
|
|
16
|
+
"scikit-learn>=1.0",
|
|
17
|
+
],
|
|
18
|
+
classifiers=[
|
|
19
|
+
"Development Status :: 3 - Alpha",
|
|
20
|
+
"Intended Audience :: Science/Research",
|
|
21
|
+
"Intended Audience :: Developers",
|
|
22
|
+
"License :: OSI Approved :: MIT License",
|
|
23
|
+
"Programming Language :: Python :: 3",
|
|
24
|
+
"Programming Language :: Python :: 3.8",
|
|
25
|
+
"Programming Language :: Python :: 3.9",
|
|
26
|
+
"Programming Language :: Python :: 3.10",
|
|
27
|
+
"Programming Language :: Python :: 3.11",
|
|
28
|
+
"Programming Language :: Python :: 3.12",
|
|
29
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
30
|
+
],
|
|
31
|
+
)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .sil_score import (
|
|
2
|
+
sil_samples,
|
|
3
|
+
micro_sil_score,
|
|
4
|
+
macro_sil_score,
|
|
5
|
+
weighted_macro_sil_score,
|
|
6
|
+
sil_approximation_report,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"sil_samples",
|
|
11
|
+
"micro_sil_score",
|
|
12
|
+
"macro_sil_score",
|
|
13
|
+
"weighted_macro_sil_score",
|
|
14
|
+
"sil_approximation_report",
|
|
15
|
+
]
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
"""
|
|
2
|
+
sil_score.metrics
|
|
3
|
+
|
|
4
|
+
Utilities for exact and approximate silhouette scoring.
|
|
5
|
+
|
|
6
|
+
The exact mode delegates to scikit-learn's silhouette_samples.
|
|
7
|
+
The approximate mode uses Euclidean distances to cluster centroids, so it is
|
|
8
|
+
faster but not identical to the classical silhouette definition.
|
|
9
|
+
"""
|
|
10
|
+
import numpy as np
|
|
11
|
+
from sklearn.metrics import silhouette_samples
|
|
12
|
+
from sklearn.metrics.pairwise import euclidean_distances
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def sil_samples(X, labels, approximation: bool = False, centers=None) -> np.ndarray:
|
|
16
|
+
"""
|
|
17
|
+
Compute silhouette scores for each point in the dataset,
|
|
18
|
+
with approximate fast centroid-based computation option.
|
|
19
|
+
"""
|
|
20
|
+
# Ensure arrays
|
|
21
|
+
X = np.asarray(X)
|
|
22
|
+
labels = np.asarray(labels)
|
|
23
|
+
if X.ndim != 2:
|
|
24
|
+
raise ValueError("X must be a 2D array of shape (n_samples, n_features).")
|
|
25
|
+
if labels.ndim != 1 or labels.shape[0] != X.shape[0]:
|
|
26
|
+
raise ValueError("labels must be a 1D array of length n_samples.")
|
|
27
|
+
|
|
28
|
+
unique_labels, inv = np.unique(labels, return_inverse=True)
|
|
29
|
+
k = unique_labels.size
|
|
30
|
+
if k < 2:
|
|
31
|
+
raise ValueError("Silhouette computation requires at least 2 clusters.")
|
|
32
|
+
|
|
33
|
+
# Exact silhouette scores
|
|
34
|
+
if approximation == False:
|
|
35
|
+
silhouette_scores = silhouette_samples(X, labels=labels)
|
|
36
|
+
return silhouette_scores
|
|
37
|
+
|
|
38
|
+
# Centroid-based approximate silhouette scores
|
|
39
|
+
n_samples, n_features = X.shape
|
|
40
|
+
|
|
41
|
+
if centers is None:
|
|
42
|
+
centers = np.array([X[inv == i].mean(axis=0) for i in range(k)], dtype=float)
|
|
43
|
+
else:
|
|
44
|
+
centers = np.asarray(centers, dtype=float)
|
|
45
|
+
if centers.ndim != 2 or centers.shape[1] != n_features:
|
|
46
|
+
raise ValueError(f"centers must have shape (k, d) with d={n_features}.")
|
|
47
|
+
if centers.shape[0] != k:
|
|
48
|
+
raise ValueError(f"centers.shape[0] must equal number of clusters k={k}.")
|
|
49
|
+
if not np.array_equal(unique_labels, np.arange(k)):
|
|
50
|
+
raise ValueError("When passing ndarray centers, labels must be dense 0..k-1.")
|
|
51
|
+
|
|
52
|
+
# Squared distances to all centroids
|
|
53
|
+
D_sq = euclidean_distances(X, centers, squared=True)
|
|
54
|
+
|
|
55
|
+
# a(i): distance to own centroid
|
|
56
|
+
a = np.sqrt(np.maximum(D_sq[np.arange(n_samples), inv], 0.0))
|
|
57
|
+
|
|
58
|
+
# b(i): distance to nearest other centroid
|
|
59
|
+
D_sq[np.arange(n_samples), inv] = np.inf
|
|
60
|
+
b = np.sqrt(np.min(D_sq, axis=1))
|
|
61
|
+
|
|
62
|
+
# Silhouette per point
|
|
63
|
+
denom = np.maximum(np.maximum(a, b), 1e-12)
|
|
64
|
+
s_point = (b - a) / denom
|
|
65
|
+
|
|
66
|
+
# Singleton clusters -> silhouette = 0
|
|
67
|
+
counts = np.bincount(inv, minlength=k).astype(int)
|
|
68
|
+
s_point[counts[inv] < 2] = 0.0
|
|
69
|
+
|
|
70
|
+
silhouette_scores = np.clip(s_point, -1.0, 1.0)
|
|
71
|
+
|
|
72
|
+
return silhouette_scores
|
|
73
|
+
|
|
74
|
+
def micro_sil_score(X, labels, approximation: bool = False, centers=None) -> float:
|
|
75
|
+
"""
|
|
76
|
+
Compute the micro-averaged silhouette score.
|
|
77
|
+
|
|
78
|
+
This is the mean of all sample-level silhouette scores.
|
|
79
|
+
Larger clusters have more influence because they contain more samples.
|
|
80
|
+
"""
|
|
81
|
+
silhouette_scores = sil_samples(
|
|
82
|
+
X,
|
|
83
|
+
labels,
|
|
84
|
+
approximation=approximation,
|
|
85
|
+
centers=centers,
|
|
86
|
+
)
|
|
87
|
+
return float(np.mean(silhouette_scores))
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def macro_sil_score(X, labels, approximation: bool = False, centers=None) -> float:
|
|
91
|
+
"""
|
|
92
|
+
Compute the macro-averaged silhouette score.
|
|
93
|
+
|
|
94
|
+
This first computes the mean silhouette score inside each cluster,
|
|
95
|
+
then averages those cluster means equally.
|
|
96
|
+
Each cluster contributes the same weight regardless of size.
|
|
97
|
+
"""
|
|
98
|
+
labels = np.asarray(labels)
|
|
99
|
+
silhouette_scores = sil_samples(
|
|
100
|
+
X,
|
|
101
|
+
labels,
|
|
102
|
+
approximation=approximation,
|
|
103
|
+
centers=centers,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
unique_labels = np.unique(labels)
|
|
107
|
+
cluster_scores = [
|
|
108
|
+
np.mean(silhouette_scores[labels == label])
|
|
109
|
+
for label in unique_labels
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
return float(np.mean(cluster_scores))
|
|
113
|
+
|
|
114
|
+
def sil_approximation_report(X, labels, centers=None, return_samples=False):
|
|
115
|
+
"""
|
|
116
|
+
Compare exact silhouette scores with centroid-based approximate scores.
|
|
117
|
+
|
|
118
|
+
Parameters
|
|
119
|
+
----------
|
|
120
|
+
X : array-like of shape (n_samples, n_features)
|
|
121
|
+
Input data.
|
|
122
|
+
|
|
123
|
+
labels : array-like of shape (n_samples,)
|
|
124
|
+
Cluster labels for each sample.
|
|
125
|
+
|
|
126
|
+
centers : array-like of shape (n_clusters, n_features), optional
|
|
127
|
+
Cluster centers used for the approximate silhouette computation.
|
|
128
|
+
If None, centers are computed from X and labels.
|
|
129
|
+
|
|
130
|
+
return_samples : bool, default=False
|
|
131
|
+
If True, include exact scores, approximate scores, and errors
|
|
132
|
+
in the returned dictionary.
|
|
133
|
+
|
|
134
|
+
Returns
|
|
135
|
+
-------
|
|
136
|
+
report : dict
|
|
137
|
+
Dictionary with correlation and error metrics.
|
|
138
|
+
"""
|
|
139
|
+
exact_scores = sil_samples(X, labels, approximation=False)
|
|
140
|
+
approximate_scores = sil_samples(
|
|
141
|
+
X,
|
|
142
|
+
labels,
|
|
143
|
+
approximation=True,
|
|
144
|
+
centers=centers,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
errors = approximate_scores - exact_scores
|
|
148
|
+
absolute_errors = np.abs(errors)
|
|
149
|
+
squared_errors = errors ** 2
|
|
150
|
+
|
|
151
|
+
if np.std(exact_scores) == 0 or np.std(approximate_scores) == 0:
|
|
152
|
+
correlation = np.nan
|
|
153
|
+
else:
|
|
154
|
+
correlation = float(np.corrcoef(exact_scores, approximate_scores)[0, 1])
|
|
155
|
+
|
|
156
|
+
report = {
|
|
157
|
+
"correlation": correlation,
|
|
158
|
+
"mean_absolute_error": float(np.mean(absolute_errors)),
|
|
159
|
+
"mean_squared_error": float(np.mean(squared_errors)),
|
|
160
|
+
"root_mean_squared_error": float(np.sqrt(np.mean(squared_errors))),
|
|
161
|
+
"max_absolute_error": float(np.max(absolute_errors)),
|
|
162
|
+
"mean_error": float(np.mean(errors)),
|
|
163
|
+
"mean_exact_score": float(np.mean(exact_scores)),
|
|
164
|
+
"mean_approximate_score": float(np.mean(approximate_scores)),
|
|
165
|
+
"n_samples": int(len(exact_scores)),
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
if return_samples:
|
|
169
|
+
report["exact_scores"] = exact_scores
|
|
170
|
+
report["approximate_scores"] = approximate_scores
|
|
171
|
+
report["errors"] = errors
|
|
172
|
+
report["absolute_errors"] = absolute_errors
|
|
173
|
+
|
|
174
|
+
return report
|
|
175
|
+
|
|
176
|
+
def weighted_macro_sil_score(X, labels, cluster_weights, approximation: bool = False, centers=None) -> float:
|
|
177
|
+
"""
|
|
178
|
+
Compute a cluster-weighted macro silhouette score.
|
|
179
|
+
|
|
180
|
+
First computes the mean silhouette score inside each cluster.
|
|
181
|
+
Then averages those cluster means using user-provided cluster weights.
|
|
182
|
+
|
|
183
|
+
Parameters
|
|
184
|
+
----------
|
|
185
|
+
X : array-like of shape (n_samples, n_features)
|
|
186
|
+
Input data.
|
|
187
|
+
|
|
188
|
+
labels : array-like of shape (n_samples,)
|
|
189
|
+
Cluster labels.
|
|
190
|
+
|
|
191
|
+
cluster_weights : dict or array-like
|
|
192
|
+
Weights for each cluster.
|
|
193
|
+
|
|
194
|
+
If dict:
|
|
195
|
+
keys must be cluster labels, values must be weights.
|
|
196
|
+
Example: {0: 0.2, 1: 0.3, 2: 0.5}
|
|
197
|
+
|
|
198
|
+
If array-like:
|
|
199
|
+
labels must be dense integers 0, 1, ..., k-1.
|
|
200
|
+
Example: [0.2, 0.3, 0.5]
|
|
201
|
+
|
|
202
|
+
approximation : bool, default=False
|
|
203
|
+
If False, use exact silhouette scores.
|
|
204
|
+
If True, use centroid-based approximate silhouette scores.
|
|
205
|
+
|
|
206
|
+
centers : array-like of shape (n_clusters, n_features), optional
|
|
207
|
+
Cluster centers for approximate mode.
|
|
208
|
+
|
|
209
|
+
Returns
|
|
210
|
+
-------
|
|
211
|
+
score : float
|
|
212
|
+
Cluster-weighted macro silhouette score.
|
|
213
|
+
"""
|
|
214
|
+
labels = np.asarray(labels)
|
|
215
|
+
|
|
216
|
+
silhouette_scores = sil_samples(
|
|
217
|
+
X,
|
|
218
|
+
labels,
|
|
219
|
+
approximation=approximation,
|
|
220
|
+
centers=centers,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
unique_labels = np.unique(labels)
|
|
224
|
+
|
|
225
|
+
cluster_scores = np.array([
|
|
226
|
+
np.mean(silhouette_scores[labels == label])
|
|
227
|
+
for label in unique_labels
|
|
228
|
+
])
|
|
229
|
+
|
|
230
|
+
if isinstance(cluster_weights, dict):
|
|
231
|
+
weights = np.array([
|
|
232
|
+
cluster_weights[label]
|
|
233
|
+
for label in unique_labels
|
|
234
|
+
], dtype=float)
|
|
235
|
+
else:
|
|
236
|
+
weights = np.asarray(cluster_weights, dtype=float)
|
|
237
|
+
|
|
238
|
+
if weights.ndim != 1 or weights.shape[0] != unique_labels.size:
|
|
239
|
+
raise ValueError("cluster_weights must have one weight per cluster.")
|
|
240
|
+
|
|
241
|
+
if not np.array_equal(unique_labels, np.arange(unique_labels.size)):
|
|
242
|
+
raise ValueError(
|
|
243
|
+
"When cluster_weights is array-like, labels must be dense integers 0..k-1. "
|
|
244
|
+
"Use a dict for non-dense labels."
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
if np.any(weights < 0):
|
|
248
|
+
raise ValueError("cluster_weights must be non-negative.")
|
|
249
|
+
|
|
250
|
+
if np.sum(weights) == 0:
|
|
251
|
+
raise ValueError("cluster_weights must sum to a positive value.")
|
|
252
|
+
|
|
253
|
+
return float(np.average(cluster_scores, weights=weights))
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sil-score
|
|
3
|
+
Version: 0.1.4
|
|
4
|
+
Summary: Exact and approximate silhouette scoring with micro, macro, and weighted cluster averages.
|
|
5
|
+
Home-page: https://github.com/semoglou/sil_score
|
|
6
|
+
Author: Aggelos Semoglou
|
|
7
|
+
License: MIT
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Science/Research
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Requires-Python: >=3.8
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: numpy>=1.21
|
|
23
|
+
Requires-Dist: scikit-learn>=1.0
|
|
24
|
+
Dynamic: author
|
|
25
|
+
Dynamic: classifier
|
|
26
|
+
Dynamic: description
|
|
27
|
+
Dynamic: description-content-type
|
|
28
|
+
Dynamic: home-page
|
|
29
|
+
Dynamic: license
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
Dynamic: requires-dist
|
|
32
|
+
Dynamic: requires-python
|
|
33
|
+
Dynamic: summary
|
|
34
|
+
|
|
35
|
+
# sil_score
|
|
36
|
+
|
|
37
|
+
[](https://pypi.org/project/sil-score/)
|
|
38
|
+
[](https://pypi.org/project/sil-score/)
|
|
39
|
+
[](LICENSE)
|
|
40
|
+
|
|
41
|
+
`sil-score` is a small Python package for exact and fast approximate silhouette scoring.
|
|
42
|
+
|
|
43
|
+
It extends the usual silhouette workflow with:
|
|
44
|
+
|
|
45
|
+
- per-sample silhouette scores
|
|
46
|
+
- micro-averaged silhouette score
|
|
47
|
+
- macro-averaged silhouette score
|
|
48
|
+
- cluster-weighted macro silhouette score
|
|
49
|
+
- exact vs approximate comparison report
|
|
50
|
+
|
|
51
|
+
The exact mode uses scikit-learn's `silhouette_samples`.
|
|
52
|
+
The approximate mode uses Euclidean distances to cluster centroids, making it faster but not identical to the classical silhouette definition.
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Installation
|
|
57
|
+
|
|
58
|
+
Install from PyPI:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install sil-score
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Quick example
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
import numpy as np
|
|
68
|
+
from sil_score import (
|
|
69
|
+
sil_samples,
|
|
70
|
+
micro_sil_score,
|
|
71
|
+
macro_sil_score,
|
|
72
|
+
weighted_macro_sil_score,
|
|
73
|
+
sil_approximation_report,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
X = np.array([
|
|
77
|
+
[0.0],
|
|
78
|
+
[2.0],
|
|
79
|
+
[10.0],
|
|
80
|
+
[12.0],
|
|
81
|
+
])
|
|
82
|
+
|
|
83
|
+
labels = np.array([0, 0, 1, 1])
|
|
84
|
+
|
|
85
|
+
samples = sil_samples(X, labels)
|
|
86
|
+
micro = micro_sil_score(X, labels)
|
|
87
|
+
macro = macro_sil_score(X, labels)
|
|
88
|
+
|
|
89
|
+
print(samples)
|
|
90
|
+
print(micro)
|
|
91
|
+
print(macro)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Output:
|
|
95
|
+
|
|
96
|
+
[0.81818182 0.77777778 0.77777778 0.81818182]
|
|
97
|
+
0.797979797979798
|
|
98
|
+
0.797979797979798
|
|
99
|
+
|
|
100
|
+
## Functions
|
|
101
|
+
|
|
102
|
+
### `sil_samples`
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
sil_samples(X, labels, approximation=False, centers=None)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Computes the silhouette score for each sample.
|
|
109
|
+
|
|
110
|
+
By default, it computes the exact silhouette values using scikit-learn.
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
scores = sil_samples(X, labels)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
For a faster centroid-based approximation:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
scores = sil_samples(X, labels, approximation=True)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
You can also pass precomputed cluster centers:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
scores = sil_samples(
|
|
126
|
+
X,
|
|
127
|
+
labels,
|
|
128
|
+
approximation=True,
|
|
129
|
+
centers=centers,
|
|
130
|
+
)
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
### `micro_sil_score`
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
micro_sil_score(X, labels, approximation=False, centers=None)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Computes the mean of all sample-level silhouette scores. This is the usual average silhouette score. Larger clusters naturally have more influence because they contain more samples.
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
# Standard usage
|
|
145
|
+
score = micro_sil_score(X, labels)
|
|
146
|
+
|
|
147
|
+
# Approximate version
|
|
148
|
+
score = micro_sil_score(X, labels, approximation=True)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
### `macro_sil_score`
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
macro_sil_score(X, labels, approximation=False, centers=None)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Computes the mean silhouette score inside each cluster, then averages the cluster means equally. This gives every cluster the same importance, regardless of its size.
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
# Standard usage
|
|
163
|
+
score = macro_sil_score(X, labels)
|
|
164
|
+
|
|
165
|
+
# Approximate version
|
|
166
|
+
score = macro_sil_score(X, labels, approximation=True)
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
### `weighted_macro_sil_score`
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
weighted_macro_sil_score(X, labels, cluster_weights, approximation=False, centers=None)
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
Computes a cluster-weighted macro silhouette score. First, it computes the mean silhouette score for each cluster, then combines those cluster means using custom cluster weights.
|
|
178
|
+
|
|
179
|
+
Using a dictionary:
|
|
180
|
+
```python
|
|
181
|
+
weights = {
|
|
182
|
+
0: 0.2,
|
|
183
|
+
1: 0.3,
|
|
184
|
+
2: 0.5,
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
score = weighted_macro_sil_score(X, labels, cluster_weights=weights)
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Using an array:
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
weights = [0.2, 0.3, 0.5]
|
|
194
|
+
|
|
195
|
+
score = weighted_macro_sil_score(X, labels, cluster_weights=weights)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
### `sil_approximation_report`
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
sil_approximation_report(X, labels, centers=None, return_samples=False)
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
Compares exact silhouette scores with centroid-based approximate scores. It returns(Pearson) correlation and error metrics:
|
|
207
|
+
|
|
208
|
+
```python
|
|
209
|
+
report = sil_approximation_report(X, labels)
|
|
210
|
+
print(report)
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
Example output:
|
|
214
|
+
|
|
215
|
+
```
|
|
216
|
+
{
|
|
217
|
+
"correlation": 0.96,
|
|
218
|
+
"mean_absolute_error": 0.03,
|
|
219
|
+
"mean_squared_error": 0.002,
|
|
220
|
+
"root_mean_squared_error": 0.045,
|
|
221
|
+
"max_absolute_error": 0.12,
|
|
222
|
+
"mean_error": 0.01,
|
|
223
|
+
"mean_exact_score": 0.52,
|
|
224
|
+
"mean_approximate_score": 0.53,
|
|
225
|
+
"n_samples": 300,
|
|
226
|
+
}
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
Use `return_samples=True` to also include the exact scores, approximate scores, and per-sample errors.
|
|
230
|
+
|
|
231
|
+
---
|
|
232
|
+
|
|
233
|
+
### Exact vs Approximate mode
|
|
234
|
+
|
|
235
|
+
- **Exact mode**: `sil_samples(X, labels, approximation=False)`. Uses the classical silhouette definition based on distances between samples.
|
|
236
|
+
- **Approximate mode**: `sil_samples(X, labels, approximation=True)`. Uses distances from each sample to cluster centroids. This can be significantly faster for larger datasets.
|
|
237
|
+
|
|
238
|
+
## Requirements
|
|
239
|
+
`sil-score` depends on:
|
|
240
|
+
- NumPy
|
|
241
|
+
- scikit-learn
|
|
242
|
+
|
|
243
|
+
## License
|
|
244
|
+
This project is licensed under the MIT License.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
sil_score
|