distclassipy 0.1.5__py3-none-any.whl → 0.2.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- distclassipy/__init__.py +1 -1
- distclassipy/classifier.py +104 -190
- distclassipy/distances.py +903 -903
- {distclassipy-0.1.5.dist-info → distclassipy-0.2.0a0.dist-info}/METADATA +24 -21
- distclassipy-0.2.0a0.dist-info/RECORD +8 -0
- {distclassipy-0.1.5.dist-info → distclassipy-0.2.0a0.dist-info}/WHEEL +1 -1
- distclassipy-0.1.5.dist-info/RECORD +0 -8
- {distclassipy-0.1.5.dist-info → distclassipy-0.2.0a0.dist-info}/LICENSE +0 -0
- {distclassipy-0.1.5.dist-info → distclassipy-0.2.0a0.dist-info}/top_level.txt +0 -0
distclassipy/__init__.py
CHANGED
distclassipy/classifier.py
CHANGED
|
@@ -28,7 +28,6 @@ import pandas as pd
|
|
|
28
28
|
import scipy
|
|
29
29
|
|
|
30
30
|
from sklearn.base import BaseEstimator, ClassifierMixin
|
|
31
|
-
from sklearn.neighbors import KernelDensity
|
|
32
31
|
from sklearn.utils.multiclass import unique_labels
|
|
33
32
|
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
|
|
34
33
|
|
|
@@ -41,6 +40,52 @@ METRIC_SOURCES_ = {
|
|
|
41
40
|
}
|
|
42
41
|
|
|
43
42
|
|
|
43
|
+
def initialize_metric_function(metric):
|
|
44
|
+
"""Set the metric function based on the provided metric.
|
|
45
|
+
|
|
46
|
+
If the metric is a string, the function will look for a corresponding
|
|
47
|
+
function in scipy.spatial.distance or distances.Distance. If the metric
|
|
48
|
+
is a function, it will be used directly.
|
|
49
|
+
"""
|
|
50
|
+
if callable(metric):
|
|
51
|
+
metric_fn_ = metric
|
|
52
|
+
metric_arg_ = metric
|
|
53
|
+
|
|
54
|
+
elif isinstance(metric, str):
|
|
55
|
+
metric_str_lowercase = metric.lower()
|
|
56
|
+
metric_found = False
|
|
57
|
+
for package_str, source in METRIC_SOURCES_.items():
|
|
58
|
+
|
|
59
|
+
# Don't use scipy for jaccard as their implementation only works with
|
|
60
|
+
# booleans - use custom jaccard instead
|
|
61
|
+
if (
|
|
62
|
+
package_str == "scipy.spatial.distance"
|
|
63
|
+
and metric_str_lowercase == "jaccard"
|
|
64
|
+
):
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
if hasattr(source, metric_str_lowercase):
|
|
68
|
+
metric_fn_ = getattr(source, metric_str_lowercase)
|
|
69
|
+
metric_found = True
|
|
70
|
+
|
|
71
|
+
# Use the string as an argument if it belongs to scipy as it is
|
|
72
|
+
# optimized
|
|
73
|
+
metric_arg_ = (
|
|
74
|
+
metric if package_str == "scipy.spatial.distance" else metric_fn_
|
|
75
|
+
)
|
|
76
|
+
break
|
|
77
|
+
if not metric_found:
|
|
78
|
+
raise ValueError(
|
|
79
|
+
f"{metric} metric not found. Please pass a string of the "
|
|
80
|
+
"name of a metric in scipy.spatial.distance or "
|
|
81
|
+
"distances.Distance, or pass a metric function directly. For a "
|
|
82
|
+
"list of available metrics, see: "
|
|
83
|
+
"https://sidchaini.github.io/DistClassiPy/distances.html or "
|
|
84
|
+
"https://docs.scipy.org/doc/scipy/reference/spatial.distance.html"
|
|
85
|
+
)
|
|
86
|
+
return metric_fn_, metric_arg_
|
|
87
|
+
|
|
88
|
+
|
|
44
89
|
class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
45
90
|
"""A distance-based classifier that supports different distance metrics.
|
|
46
91
|
|
|
@@ -54,8 +99,6 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
54
99
|
|
|
55
100
|
Parameters
|
|
56
101
|
----------
|
|
57
|
-
metric : str or callable, default="euclidean"
|
|
58
|
-
The distance metric to use for calculating the distance between features.
|
|
59
102
|
scale : bool, default=True
|
|
60
103
|
Whether to scale the distance between the test object and the centroid for a
|
|
61
104
|
class in the feature space. If True, the data will be scaled based on the
|
|
@@ -70,36 +113,15 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
70
113
|
|
|
71
114
|
.. versionadded:: 0.1.0
|
|
72
115
|
|
|
73
|
-
calculate_kde : bool, default=False
|
|
74
|
-
Whether to calculate a kernel density estimate based confidence parameter.
|
|
75
|
-
calculate_1d_dist : bool, default=False
|
|
76
|
-
Whether to calculate the 1-dimensional distance based confidence parameter.
|
|
77
116
|
|
|
78
117
|
Attributes
|
|
79
118
|
----------
|
|
80
|
-
metric : str or callable
|
|
81
|
-
The distance metric used for classification.
|
|
82
119
|
scale : bool
|
|
83
120
|
Indicates whether the data is scaled.
|
|
84
121
|
central_stat : str
|
|
85
122
|
The statistic used for calculating central tendency.
|
|
86
123
|
dispersion_stat : str
|
|
87
124
|
The statistic used for calculating dispersion.
|
|
88
|
-
calculate_kde : bool
|
|
89
|
-
Indicates whether a kernel density estimate is calculated.
|
|
90
|
-
calculate_1d_dist : bool
|
|
91
|
-
Indicates whether 1-dimensional distances are calculated.
|
|
92
|
-
|
|
93
|
-
See Also
|
|
94
|
-
--------
|
|
95
|
-
scipy.spatial.dist : Other distance metrics provided in SciPy
|
|
96
|
-
distclassipy.Distance : Distance metrics included with DistClassiPy
|
|
97
|
-
|
|
98
|
-
Notes
|
|
99
|
-
-----
|
|
100
|
-
If using distance metrics supported by SciPy, it is desirable to pass a string,
|
|
101
|
-
which allows SciPy to use an optimized C version of the code instead of the slower
|
|
102
|
-
Python version.
|
|
103
125
|
|
|
104
126
|
References
|
|
105
127
|
----------
|
|
@@ -122,66 +144,14 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
122
144
|
|
|
123
145
|
def __init__(
|
|
124
146
|
self,
|
|
125
|
-
metric: str | Callable = "euclidean",
|
|
126
147
|
scale: bool = True,
|
|
127
148
|
central_stat: str = "median",
|
|
128
149
|
dispersion_stat: str = "std",
|
|
129
|
-
calculate_kde: bool = True,
|
|
130
|
-
calculate_1d_dist: bool = True,
|
|
131
150
|
):
|
|
132
151
|
"""Initialize the classifier with specified parameters."""
|
|
133
|
-
self.metric = metric
|
|
134
152
|
self.scale = scale
|
|
135
153
|
self.central_stat = central_stat
|
|
136
154
|
self.dispersion_stat = dispersion_stat
|
|
137
|
-
self.calculate_kde = calculate_kde
|
|
138
|
-
self.calculate_1d_dist = calculate_1d_dist
|
|
139
|
-
|
|
140
|
-
def initialize_metric_function(self):
|
|
141
|
-
"""Set the metric function based on the provided metric.
|
|
142
|
-
|
|
143
|
-
If the metric is a string, the function will look for a corresponding
|
|
144
|
-
function in scipy.spatial.distance or distances.Distance. If the metric
|
|
145
|
-
is a function, it will be used directly.
|
|
146
|
-
"""
|
|
147
|
-
if callable(self.metric):
|
|
148
|
-
self.metric_fn_ = self.metric
|
|
149
|
-
self.metric_arg_ = self.metric
|
|
150
|
-
|
|
151
|
-
elif isinstance(self.metric, str):
|
|
152
|
-
metric_str_lowercase = self.metric.lower()
|
|
153
|
-
metric_found = False
|
|
154
|
-
for package_str, source in METRIC_SOURCES_.items():
|
|
155
|
-
|
|
156
|
-
# Don't use scipy for jaccard as their implementation only works with
|
|
157
|
-
# booleans - use custom jaccard instead
|
|
158
|
-
if (
|
|
159
|
-
package_str == "scipy.spatial.distance"
|
|
160
|
-
and metric_str_lowercase == "jaccard"
|
|
161
|
-
):
|
|
162
|
-
continue
|
|
163
|
-
|
|
164
|
-
if hasattr(source, metric_str_lowercase):
|
|
165
|
-
self.metric_fn_ = getattr(source, metric_str_lowercase)
|
|
166
|
-
metric_found = True
|
|
167
|
-
|
|
168
|
-
# Use the string as an argument if it belongs to scipy as it is
|
|
169
|
-
# optimized
|
|
170
|
-
self.metric_arg_ = (
|
|
171
|
-
self.metric
|
|
172
|
-
if package_str == "scipy.spatial.distance"
|
|
173
|
-
else self.metric_fn_
|
|
174
|
-
)
|
|
175
|
-
break
|
|
176
|
-
if not metric_found:
|
|
177
|
-
raise ValueError(
|
|
178
|
-
f"{self.metric} metric not found. Please pass a string of the "
|
|
179
|
-
"name of a metric in scipy.spatial.distance or "
|
|
180
|
-
"distances.Distance, or pass a metric function directly. For a "
|
|
181
|
-
"list of available metrics, see: "
|
|
182
|
-
"https://sidchaini.github.io/DistClassiPy/distances.html or "
|
|
183
|
-
"https://docs.scipy.org/doc/scipy/reference/spatial.distance.html"
|
|
184
|
-
)
|
|
185
155
|
|
|
186
156
|
def fit(self, X: np.array, y: np.array, feat_labels: list[str] = None):
|
|
187
157
|
"""Calculate the feature space centroid for all classes.
|
|
@@ -209,9 +179,9 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
209
179
|
"""
|
|
210
180
|
X, y = check_X_y(X, y)
|
|
211
181
|
self.classes_ = unique_labels(y)
|
|
212
|
-
self.n_features_in_ = X.shape[
|
|
213
|
-
|
|
214
|
-
|
|
182
|
+
self.n_features_in_ = X.shape[
|
|
183
|
+
1
|
|
184
|
+
] # Number of features seen during fit - required for sklearn compatibility.
|
|
215
185
|
|
|
216
186
|
if feat_labels is None:
|
|
217
187
|
feat_labels = [f"Feature_{x}" for x in range(X.shape[1])]
|
|
@@ -256,27 +226,15 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
256
226
|
)
|
|
257
227
|
self.df_iqr_ = df_iqr
|
|
258
228
|
|
|
259
|
-
if self.calculate_kde:
|
|
260
|
-
self.kde_dict_ = {}
|
|
261
|
-
|
|
262
|
-
for cl in self.classes_:
|
|
263
|
-
subX = X[y == cl]
|
|
264
|
-
# Implement the following in an if-else to save computational time.
|
|
265
|
-
# kde = KernelDensity(bandwidth='scott', metric=self.metric)
|
|
266
|
-
# kde.fit(subX)
|
|
267
|
-
kde = KernelDensity(
|
|
268
|
-
bandwidth="scott",
|
|
269
|
-
metric="pyfunc",
|
|
270
|
-
metric_params={"func": self.metric_fn_},
|
|
271
|
-
)
|
|
272
|
-
kde.fit(subX)
|
|
273
|
-
self.kde_dict_[cl] = kde
|
|
274
|
-
|
|
275
229
|
self.is_fitted_ = True
|
|
276
230
|
|
|
277
231
|
return self
|
|
278
232
|
|
|
279
|
-
def predict(
|
|
233
|
+
def predict(
|
|
234
|
+
self,
|
|
235
|
+
X: np.array,
|
|
236
|
+
metric: str | Callable = "euclidean",
|
|
237
|
+
):
|
|
280
238
|
"""Predict the class labels for the provided X.
|
|
281
239
|
|
|
282
240
|
The prediction is based on the distance of each data point in the input sample
|
|
@@ -287,18 +245,33 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
287
245
|
----------
|
|
288
246
|
X : array-like of shape (n_samples, n_features)
|
|
289
247
|
The input samples.
|
|
248
|
+
metric : str or callable, default="euclidean"
|
|
249
|
+
The distance metric to use for calculating the distance between features.
|
|
290
250
|
|
|
291
251
|
Returns
|
|
292
252
|
-------
|
|
293
253
|
y : ndarray of shape (n_samples,)
|
|
294
254
|
The predicted classes.
|
|
255
|
+
|
|
256
|
+
See Also
|
|
257
|
+
--------
|
|
258
|
+
scipy.spatial.dist : Other distance metrics provided in SciPy
|
|
259
|
+
distclassipy.Distance : Distance metrics included with DistClassiPy
|
|
260
|
+
|
|
261
|
+
Notes
|
|
262
|
+
-----
|
|
263
|
+
If using distance metrics supported by SciPy, it is desirable to pass a string,
|
|
264
|
+
which allows SciPy to use an optimized C version of the code instead of the
|
|
265
|
+
slower Python version.
|
|
295
266
|
"""
|
|
296
267
|
check_is_fitted(self, "is_fitted_")
|
|
297
268
|
X = check_array(X)
|
|
298
269
|
|
|
270
|
+
metric_fn_, metric_arg_ = initialize_metric_function(metric)
|
|
271
|
+
|
|
299
272
|
if not self.scale:
|
|
300
273
|
dist_arr = scipy.spatial.distance.cdist(
|
|
301
|
-
XA=X, XB=self.df_centroid_.to_numpy(), metric=
|
|
274
|
+
XA=X, XB=self.df_centroid_.to_numpy(), metric=metric_arg_
|
|
302
275
|
)
|
|
303
276
|
|
|
304
277
|
else:
|
|
@@ -315,16 +288,18 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
315
288
|
w = wtdf.loc[cl].to_numpy() # 1/std dev
|
|
316
289
|
XB = XB * w # w is for this class only
|
|
317
290
|
XA = X * w # w is for this class only
|
|
318
|
-
cl_dist = scipy.spatial.distance.cdist(
|
|
319
|
-
XA=XA, XB=XB, metric=self.metric_arg_
|
|
320
|
-
)
|
|
291
|
+
cl_dist = scipy.spatial.distance.cdist(XA=XA, XB=XB, metric=metric_arg_)
|
|
321
292
|
dist_arr_list.append(cl_dist)
|
|
322
293
|
dist_arr = np.column_stack(dist_arr_list)
|
|
323
294
|
|
|
324
295
|
y_pred = self.classes_[dist_arr.argmin(axis=1)]
|
|
325
296
|
return y_pred
|
|
326
297
|
|
|
327
|
-
def predict_and_analyse(
|
|
298
|
+
def predict_and_analyse(
|
|
299
|
+
self,
|
|
300
|
+
X: np.array,
|
|
301
|
+
metric: str | Callable = "euclidean",
|
|
302
|
+
):
|
|
328
303
|
"""Predict the class labels for the provided X and perform analysis.
|
|
329
304
|
|
|
330
305
|
The prediction is based on the distance of each data point in the input sample
|
|
@@ -338,18 +313,35 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
338
313
|
----------
|
|
339
314
|
X : array-like of shape (n_samples, n_features)
|
|
340
315
|
The input samples.
|
|
316
|
+
metric : str or callable, default="euclidean"
|
|
317
|
+
The distance metric to use for calculating the distance between features.
|
|
318
|
+
|
|
341
319
|
|
|
342
320
|
Returns
|
|
343
321
|
-------
|
|
344
322
|
y : ndarray of shape (n_samples,)
|
|
345
323
|
The predicted classes.
|
|
324
|
+
|
|
325
|
+
See Also
|
|
326
|
+
--------
|
|
327
|
+
scipy.spatial.dist : Other distance metrics provided in SciPy
|
|
328
|
+
distclassipy.Distance : Distance metrics included with DistClassiPy
|
|
329
|
+
|
|
330
|
+
Notes
|
|
331
|
+
-----
|
|
332
|
+
If using distance metrics supported by SciPy, it is desirable to pass a string,
|
|
333
|
+
which allows SciPy to use an optimized C version of the code instead
|
|
334
|
+
of the slower Python version.
|
|
335
|
+
|
|
346
336
|
"""
|
|
347
337
|
check_is_fitted(self, "is_fitted_")
|
|
348
338
|
X = check_array(X)
|
|
349
339
|
|
|
340
|
+
metric_fn_, metric_arg_ = initialize_metric_function(metric)
|
|
341
|
+
|
|
350
342
|
if not self.scale:
|
|
351
343
|
dist_arr = scipy.spatial.distance.cdist(
|
|
352
|
-
XA=X, XB=self.df_centroid_.to_numpy(), metric=
|
|
344
|
+
XA=X, XB=self.df_centroid_.to_numpy(), metric=metric_arg_
|
|
353
345
|
)
|
|
354
346
|
|
|
355
347
|
else:
|
|
@@ -366,9 +358,7 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
366
358
|
w = wtdf.loc[cl].to_numpy() # 1/std dev
|
|
367
359
|
XB = XB * w # w is for this class only
|
|
368
360
|
XA = X * w # w is for this class only
|
|
369
|
-
cl_dist = scipy.spatial.distance.cdist(
|
|
370
|
-
XA=XA, XB=XB, metric=self.metric_arg_
|
|
371
|
-
)
|
|
361
|
+
cl_dist = scipy.spatial.distance.cdist(XA=XA, XB=XB, metric=metric_arg_)
|
|
372
362
|
dist_arr_list.append(cl_dist)
|
|
373
363
|
dist_arr = np.column_stack(dist_arr_list)
|
|
374
364
|
|
|
@@ -381,68 +371,15 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
381
371
|
|
|
382
372
|
y_pred = self.classes_[dist_arr.argmin(axis=1)]
|
|
383
373
|
|
|
384
|
-
if self.calculate_kde:
|
|
385
|
-
# NEW: Rescale in terms of median likelihoods - calculate here
|
|
386
|
-
scale_factors = np.exp(
|
|
387
|
-
[
|
|
388
|
-
self.kde_dict_[cl].score_samples(
|
|
389
|
-
self.df_centroid_.loc[cl].to_numpy().reshape(1, -1)
|
|
390
|
-
)[0]
|
|
391
|
-
for cl in self.classes_
|
|
392
|
-
]
|
|
393
|
-
)
|
|
394
|
-
|
|
395
|
-
likelihood_arr = []
|
|
396
|
-
for k in self.kde_dict_.keys():
|
|
397
|
-
log_pdf = self.kde_dict_[k].score_samples(X)
|
|
398
|
-
likelihood_val = np.exp(log_pdf)
|
|
399
|
-
likelihood_arr.append(likelihood_val)
|
|
400
|
-
self.likelihood_arr_ = np.array(likelihood_arr).T
|
|
401
|
-
|
|
402
|
-
# NEW: Rescale in terms of median likelihoods - rescale here
|
|
403
|
-
self.likelihood_arr_ = self.likelihood_arr_ / scale_factors
|
|
404
|
-
|
|
405
|
-
if self.calculate_1d_dist:
|
|
406
|
-
conf_cl = []
|
|
407
|
-
Xdf_temp = pd.DataFrame(data=X, columns=self.df_centroid_.columns)
|
|
408
|
-
for cl in self.classes_:
|
|
409
|
-
sum_1d_dists = np.zeros(shape=(len(Xdf_temp)))
|
|
410
|
-
for feat in Xdf_temp.columns:
|
|
411
|
-
dists = scipy.spatial.distance.cdist(
|
|
412
|
-
XA=np.zeros(shape=(1, 1)),
|
|
413
|
-
XB=(self.df_centroid_.loc[cl] - Xdf_temp)[feat]
|
|
414
|
-
.to_numpy()
|
|
415
|
-
.reshape(-1, 1),
|
|
416
|
-
metric=self.metric_arg_,
|
|
417
|
-
).ravel()
|
|
418
|
-
if self.scale and self.dispersion_stat == "std":
|
|
419
|
-
sum_1d_dists = sum_1d_dists + dists / self.df_std_.loc[cl, feat]
|
|
420
|
-
elif self.scale and self.dispersion_stat == "std":
|
|
421
|
-
sum_1d_dists = sum_1d_dists + dists / self.df_iqr_.loc[cl, feat]
|
|
422
|
-
else:
|
|
423
|
-
sum_1d_dists = sum_1d_dists + dists
|
|
424
|
-
confs = 1 / np.clip(sum_1d_dists, a_min=np.finfo(float).eps, a_max=None)
|
|
425
|
-
conf_cl.append(confs)
|
|
426
|
-
conf_cl = np.array(conf_cl)
|
|
427
|
-
self.conf_cl_ = conf_cl
|
|
428
|
-
|
|
429
374
|
self.analyis_ = True
|
|
430
375
|
|
|
431
376
|
return y_pred
|
|
432
377
|
|
|
433
|
-
def calculate_confidence(self
|
|
378
|
+
def calculate_confidence(self):
|
|
434
379
|
"""Calculate the confidence for each prediction.
|
|
435
380
|
|
|
436
|
-
The confidence is calculated
|
|
437
|
-
the centroids of the training data
|
|
438
|
-
1-dimensional distance.
|
|
439
|
-
|
|
440
|
-
Parameters
|
|
441
|
-
----------
|
|
442
|
-
method : {"distance_inverse", "1d_distance_inverse","kde_likelihood"},
|
|
443
|
-
default="distance_inverse"
|
|
444
|
-
The method to use for calculating confidence. Default is
|
|
445
|
-
'distance_inverse'.
|
|
381
|
+
The confidence is calculated as the inverse of the distance of each data point
|
|
382
|
+
to the centroids of the training data.
|
|
446
383
|
"""
|
|
447
384
|
check_is_fitted(self, "is_fitted_")
|
|
448
385
|
if not hasattr(self, "analyis_"):
|
|
@@ -452,34 +389,11 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
452
389
|
)
|
|
453
390
|
|
|
454
391
|
# Calculate confidence for each prediction
|
|
455
|
-
|
|
456
|
-
self.
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
self.confidence_df_.columns
|
|
460
|
-
|
|
461
|
-
]
|
|
462
|
-
|
|
463
|
-
elif method == "1d_distance_inverse":
|
|
464
|
-
if not self.calculate_1d_dist:
|
|
465
|
-
raise ValueError(
|
|
466
|
-
"method='1d_distance_inverse' is only valid if calculate_1d_dist "
|
|
467
|
-
"is set to True"
|
|
468
|
-
)
|
|
469
|
-
self.confidence_df_ = pd.DataFrame(
|
|
470
|
-
data=self.conf_cl_.T, columns=[f"{x}_conf" for x in self.classes_]
|
|
471
|
-
)
|
|
472
|
-
|
|
473
|
-
elif method == "kde_likelihood":
|
|
474
|
-
if not self.calculate_kde:
|
|
475
|
-
raise ValueError(
|
|
476
|
-
"method='kde_likelihood' is only valid if calculate_kde is set "
|
|
477
|
-
"to True"
|
|
478
|
-
)
|
|
479
|
-
|
|
480
|
-
self.confidence_df_ = pd.DataFrame(
|
|
481
|
-
data=self.likelihood_arr_,
|
|
482
|
-
columns=[f"{x}_conf" for x in self.kde_dict_.keys()],
|
|
483
|
-
)
|
|
392
|
+
self.confidence_df_ = 1 / np.clip(
|
|
393
|
+
self.centroid_dist_df_, a_min=np.finfo(float).eps, a_max=None
|
|
394
|
+
)
|
|
395
|
+
self.confidence_df_.columns = [
|
|
396
|
+
x.replace("_dist", "_conf") for x in self.confidence_df_.columns
|
|
397
|
+
]
|
|
484
398
|
|
|
485
399
|
return self.confidence_df_.to_numpy()
|