distclassipy 0.1.4__py3-none-any.whl → 0.1.6a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- distclassipy/__init__.py +22 -10
- distclassipy/classifier.py +162 -58
- distclassipy/distances.py +1177 -1141
- {distclassipy-0.1.4.dist-info → distclassipy-0.1.6a0.dist-info}/METADATA +22 -19
- distclassipy-0.1.6a0.dist-info/RECORD +8 -0
- {distclassipy-0.1.4.dist-info → distclassipy-0.1.6a0.dist-info}/WHEEL +1 -1
- distclassipy-0.1.4.dist-info/RECORD +0 -8
- {distclassipy-0.1.4.dist-info → distclassipy-0.1.6a0.dist-info}/LICENSE +0 -0
- {distclassipy-0.1.4.dist-info → distclassipy-0.1.6a0.dist-info}/top_level.txt +0 -0
distclassipy/__init__.py
CHANGED
|
@@ -1,16 +1,28 @@
|
|
|
1
|
-
"""
|
|
2
|
-
A module for using distance metrics for classification.
|
|
1
|
+
"""A module for using distance metrics for classification.
|
|
3
2
|
|
|
4
3
|
Classes:
|
|
5
|
-
DistanceMetricClassifier - A classifier that uses a specified distance metric for
|
|
4
|
+
DistanceMetricClassifier - A classifier that uses a specified distance metric for
|
|
5
|
+
classification.
|
|
6
6
|
Distance - A class that provides various distance metrics for use in classification.
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
Copyright (C) 2024 Siddharth Chaini
|
|
10
|
+
-----
|
|
11
|
+
This program is free software: you can redistribute it and/or modify
|
|
12
|
+
it under the terms of the GNU General Public License as published by
|
|
13
|
+
the Free Software Foundation, either version 3 of the License, or
|
|
14
|
+
(at your option) any later version.
|
|
15
|
+
|
|
16
|
+
This program is distributed in the hope that it will be useful,
|
|
17
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
18
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
19
|
+
GNU General Public License for more details.
|
|
20
|
+
|
|
21
|
+
You should have received a copy of the GNU General Public License
|
|
22
|
+
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
7
23
|
"""
|
|
8
24
|
|
|
9
|
-
from .classifier import
|
|
10
|
-
|
|
11
|
-
) # Importing the DistanceMetricClassifier from the classifier module
|
|
12
|
-
from .distances import (
|
|
13
|
-
Distance,
|
|
14
|
-
) # Importing the Distance class from the distances module
|
|
25
|
+
from .classifier import DistanceMetricClassifier # noqa
|
|
26
|
+
from .distances import Distance # noqa
|
|
15
27
|
|
|
16
|
-
__version__ = "0.1.
|
|
28
|
+
__version__ = "0.1.6a0"
|
distclassipy/classifier.py
CHANGED
|
@@ -1,41 +1,87 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
1
|
+
"""A module containing the distance metric classifier.
|
|
2
|
+
|
|
3
|
+
This module contains the DistanceMetricClassifier introduced by Chaini et al. (2024)
|
|
4
|
+
in "Light Curve Classification with DistClassiPy: a new distance-based classifier"
|
|
5
|
+
|
|
6
|
+
Copyright (C) 2024 Siddharth Chaini
|
|
7
|
+
-----
|
|
8
|
+
This program is free software: you can redistribute it and/or modify
|
|
9
|
+
it under the terms of the GNU General Public License as published by
|
|
10
|
+
the Free Software Foundation, either version 3 of the License, or
|
|
11
|
+
(at your option) any later version.
|
|
12
|
+
|
|
13
|
+
This program is distributed in the hope that it will be useful,
|
|
14
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16
|
+
GNU General Public License for more details.
|
|
17
|
+
|
|
18
|
+
You should have received a copy of the GNU General Public License
|
|
19
|
+
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
3
20
|
"""
|
|
4
21
|
|
|
22
|
+
import warnings
|
|
23
|
+
from typing import Callable
|
|
24
|
+
|
|
5
25
|
import numpy as np
|
|
26
|
+
|
|
6
27
|
import pandas as pd
|
|
28
|
+
|
|
7
29
|
import scipy
|
|
8
|
-
|
|
30
|
+
|
|
9
31
|
from sklearn.base import BaseEstimator, ClassifierMixin
|
|
10
|
-
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
|
|
11
|
-
from sklearn.utils.multiclass import unique_labels
|
|
12
32
|
from sklearn.neighbors import KernelDensity
|
|
13
|
-
from
|
|
33
|
+
from sklearn.utils.multiclass import unique_labels
|
|
34
|
+
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
|
|
35
|
+
|
|
36
|
+
from .distances import Distance
|
|
37
|
+
|
|
38
|
+
# Hardcoded source packages to check for distance metrics.
|
|
39
|
+
METRIC_SOURCES_ = {
|
|
40
|
+
"scipy.spatial.distance": scipy.spatial.distance,
|
|
41
|
+
"distances.Distance": Distance(),
|
|
42
|
+
}
|
|
14
43
|
|
|
15
44
|
|
|
16
45
|
class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
17
|
-
"""
|
|
18
|
-
A distance-based classifier that supports the use of various distance metrics.
|
|
46
|
+
"""A distance-based classifier that supports different distance metrics.
|
|
19
47
|
|
|
20
|
-
The distance metric classifier determines the similarity between features in a
|
|
48
|
+
The distance metric classifier determines the similarity between features in a
|
|
49
|
+
dataset by leveraging the use of different distance metrics to. A specified
|
|
50
|
+
distance metric is used to compute the distance between a given object and a
|
|
51
|
+
centroid for every training class in the feature space. The classifier supports
|
|
52
|
+
the use of different statistical measures for constructing the centroid and scaling
|
|
53
|
+
the computed distance. Additionally, the distance metric classifier also
|
|
54
|
+
optionally provides an estimate of the confidence of the classifier's predictions.
|
|
21
55
|
|
|
22
56
|
Parameters
|
|
23
57
|
----------
|
|
24
58
|
metric : str or callable, default="euclidean"
|
|
25
59
|
The distance metric to use for calculating the distance between features.
|
|
26
60
|
scale : bool, default=True
|
|
27
|
-
Whether to scale the distance between the test object and the centroid for a
|
|
61
|
+
Whether to scale the distance between the test object and the centroid for a
|
|
62
|
+
class in the feature space. If True, the data will be scaled based on the
|
|
63
|
+
specified dispersion statistic.
|
|
28
64
|
central_stat : {"mean", "median"}, default="median"
|
|
29
|
-
The statistic used to calculate the central tendency of the data to construct
|
|
65
|
+
The statistic used to calculate the central tendency of the data to construct
|
|
66
|
+
the feature-space centroid. Supported statistics are "mean" and "median".
|
|
30
67
|
dispersion_stat : {"std", "iqr"}, default="std"
|
|
31
|
-
The statistic used to calculate the dispersion of the data for scaling the
|
|
68
|
+
The statistic used to calculate the dispersion of the data for scaling the
|
|
69
|
+
distance. Supported statistics are "std" for standard deviation and "iqr"
|
|
70
|
+
for inter-quartile range.
|
|
32
71
|
|
|
33
72
|
.. versionadded:: 0.1.0
|
|
34
73
|
|
|
35
74
|
calculate_kde : bool, default=False
|
|
36
75
|
Whether to calculate a kernel density estimate based confidence parameter.
|
|
76
|
+
.. deprecated:: 0.2.0
|
|
77
|
+
This parameter will be removed in a future version and only the
|
|
78
|
+
distance confidence parameter will be available.
|
|
37
79
|
calculate_1d_dist : bool, default=False
|
|
38
80
|
Whether to calculate the 1-dimensional distance based confidence parameter.
|
|
81
|
+
.. deprecated:: 0.2.0
|
|
82
|
+
This parameter will be removed in a future version and only the
|
|
83
|
+
distance confidence parameter will be available.
|
|
84
|
+
Whether to calculate the 1-dimensional distance based confidence parameter.
|
|
39
85
|
|
|
40
86
|
Attributes
|
|
41
87
|
----------
|
|
@@ -49,8 +95,12 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
49
95
|
The statistic used for calculating dispersion.
|
|
50
96
|
calculate_kde : bool
|
|
51
97
|
Indicates whether a kernel density estimate is calculated.
|
|
98
|
+
.. deprecated:: 0.2.0
|
|
99
|
+
This parameter will be removed in a future version.
|
|
52
100
|
calculate_1d_dist : bool
|
|
53
101
|
Indicates whether 1-dimensional distances are calculated.
|
|
102
|
+
.. deprecated:: 0.2.0
|
|
103
|
+
This parameter will be removed in a future version.
|
|
54
104
|
|
|
55
105
|
See Also
|
|
56
106
|
--------
|
|
@@ -59,11 +109,14 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
59
109
|
|
|
60
110
|
Notes
|
|
61
111
|
-----
|
|
62
|
-
If using distance metrics supported by SciPy, it is desirable to pass a string,
|
|
112
|
+
If using distance metrics supported by SciPy, it is desirable to pass a string,
|
|
113
|
+
which allows SciPy to use an optimized C version of the code instead of the slower
|
|
114
|
+
Python version.
|
|
63
115
|
|
|
64
116
|
References
|
|
65
117
|
----------
|
|
66
|
-
.. [1] "Light Curve Classification with DistClassiPy: a new distance-based
|
|
118
|
+
.. [1] "Light Curve Classification with DistClassiPy: a new distance-based
|
|
119
|
+
classifier"
|
|
67
120
|
|
|
68
121
|
Examples
|
|
69
122
|
--------
|
|
@@ -85,32 +138,35 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
85
138
|
scale: bool = True,
|
|
86
139
|
central_stat: str = "median",
|
|
87
140
|
dispersion_stat: str = "std",
|
|
88
|
-
calculate_kde: bool = True,
|
|
89
|
-
calculate_1d_dist: bool = True,
|
|
141
|
+
calculate_kde: bool = True, # deprecated in 0.2.0
|
|
142
|
+
calculate_1d_dist: bool = True, # deprecated in 0.2.0
|
|
90
143
|
):
|
|
91
|
-
"""
|
|
92
|
-
Initialize the classifier with specified parameters.
|
|
93
|
-
"""
|
|
144
|
+
"""Initialize the classifier with specified parameters."""
|
|
94
145
|
self.metric = metric
|
|
95
146
|
self.scale = scale
|
|
96
147
|
self.central_stat = central_stat
|
|
97
148
|
self.dispersion_stat = dispersion_stat
|
|
149
|
+
if calculate_kde:
|
|
150
|
+
warnings.warn(
|
|
151
|
+
"calculate_kde is deprecated and will be removed in version 0.2.0",
|
|
152
|
+
DeprecationWarning,
|
|
153
|
+
)
|
|
98
154
|
self.calculate_kde = calculate_kde
|
|
155
|
+
|
|
156
|
+
if calculate_1d_dist:
|
|
157
|
+
warnings.warn(
|
|
158
|
+
"calculate_1d_dist is deprecated and will be removed in version 0.2.0",
|
|
159
|
+
DeprecationWarning,
|
|
160
|
+
)
|
|
99
161
|
self.calculate_1d_dist = calculate_1d_dist
|
|
100
162
|
|
|
101
|
-
def
|
|
102
|
-
"""
|
|
103
|
-
Set the metric function based on the provided metric.
|
|
163
|
+
def initialize_metric_function(self):
|
|
164
|
+
"""Set the metric function based on the provided metric.
|
|
104
165
|
|
|
105
|
-
If the metric is a string, the function will look for a corresponding
|
|
166
|
+
If the metric is a string, the function will look for a corresponding
|
|
167
|
+
function in scipy.spatial.distance or distances.Distance. If the metric
|
|
168
|
+
is a function, it will be used directly.
|
|
106
169
|
"""
|
|
107
|
-
|
|
108
|
-
# Hardcoded source packages to check for distance metrics.
|
|
109
|
-
metric_sources_ = {
|
|
110
|
-
"scipy.spatial.distance": scipy.spatial.distance,
|
|
111
|
-
"distances.Distance": Distance(),
|
|
112
|
-
}
|
|
113
|
-
|
|
114
170
|
if callable(self.metric):
|
|
115
171
|
self.metric_fn_ = self.metric
|
|
116
172
|
self.metric_arg_ = self.metric
|
|
@@ -118,9 +174,10 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
118
174
|
elif isinstance(self.metric, str):
|
|
119
175
|
metric_str_lowercase = self.metric.lower()
|
|
120
176
|
metric_found = False
|
|
121
|
-
for package_str, source in
|
|
177
|
+
for package_str, source in METRIC_SOURCES_.items():
|
|
122
178
|
|
|
123
|
-
# Don't use scipy for jaccard as their implementation only works with
|
|
179
|
+
# Don't use scipy for jaccard as their implementation only works with
|
|
180
|
+
# booleans - use custom jaccard instead
|
|
124
181
|
if (
|
|
125
182
|
package_str == "scipy.spatial.distance"
|
|
126
183
|
and metric_str_lowercase == "jaccard"
|
|
@@ -131,7 +188,8 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
131
188
|
self.metric_fn_ = getattr(source, metric_str_lowercase)
|
|
132
189
|
metric_found = True
|
|
133
190
|
|
|
134
|
-
# Use the string as an argument if it belongs to scipy as it is
|
|
191
|
+
# Use the string as an argument if it belongs to scipy as it is
|
|
192
|
+
# optimized
|
|
135
193
|
self.metric_arg_ = (
|
|
136
194
|
self.metric
|
|
137
195
|
if package_str == "scipy.spatial.distance"
|
|
@@ -140,14 +198,22 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
140
198
|
break
|
|
141
199
|
if not metric_found:
|
|
142
200
|
raise ValueError(
|
|
143
|
-
f"{self.metric} metric not found. Please pass a string of the
|
|
201
|
+
f"{self.metric} metric not found. Please pass a string of the "
|
|
202
|
+
"name of a metric in scipy.spatial.distance or "
|
|
203
|
+
"distances.Distance, or pass a metric function directly. For a "
|
|
204
|
+
"list of available metrics, see: "
|
|
205
|
+
"https://sidchaini.github.io/DistClassiPy/distances.html or "
|
|
206
|
+
"https://docs.scipy.org/doc/scipy/reference/spatial.distance.html"
|
|
144
207
|
)
|
|
145
208
|
|
|
146
209
|
def fit(self, X: np.array, y: np.array, feat_labels: list[str] = None):
|
|
147
|
-
"""
|
|
148
|
-
Calculate the feature space centroid for all classes in the training set (X,y) using the central statistic. If scaling is enabled, also calculate the appropriate dispersion statistic.
|
|
210
|
+
"""Calculate the feature space centroid for all classes.
|
|
149
211
|
|
|
150
|
-
This
|
|
212
|
+
This function calculates the feature space centroid in the training
|
|
213
|
+
set (X, y) for all classes using the central statistic. If scaling
|
|
214
|
+
is enabled, it also calculates the appropriate dispersion statistic.
|
|
215
|
+
This involves computing the centroid for every class in the feature space and
|
|
216
|
+
optionally calculating the kernel density estimate and 1-dimensional distance.
|
|
151
217
|
|
|
152
218
|
Parameters
|
|
153
219
|
----------
|
|
@@ -156,7 +222,8 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
156
222
|
y : array-like of shape (n_samples,)
|
|
157
223
|
The target values (class labels).
|
|
158
224
|
feat_labels : list of str, optional, default=None
|
|
159
|
-
The feature labels. If not provided, default labels representing feature
|
|
225
|
+
The feature labels. If not provided, default labels representing feature
|
|
226
|
+
number will be used.
|
|
160
227
|
|
|
161
228
|
Returns
|
|
162
229
|
-------
|
|
@@ -165,9 +232,11 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
165
232
|
"""
|
|
166
233
|
X, y = check_X_y(X, y)
|
|
167
234
|
self.classes_ = unique_labels(y)
|
|
168
|
-
self.n_features_in_ = X.shape[
|
|
235
|
+
self.n_features_in_ = X.shape[
|
|
236
|
+
1
|
|
237
|
+
] # Number of features seen during fit - required for sklearn compatibility.
|
|
169
238
|
|
|
170
|
-
self.
|
|
239
|
+
self.initialize_metric_function()
|
|
171
240
|
|
|
172
241
|
if feat_labels is None:
|
|
173
242
|
feat_labels = [f"Feature_{x}" for x in range(X.shape[1])]
|
|
@@ -188,7 +257,8 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
188
257
|
std_list = []
|
|
189
258
|
for cur_class in self.classes_:
|
|
190
259
|
cur_X = X[y == cur_class]
|
|
191
|
-
# Note we're using ddof=1 because we're dealing with a sample.
|
|
260
|
+
# Note we're using ddof=1 because we're dealing with a sample.
|
|
261
|
+
# See more: https://stackoverflow.com/a/46083501/10743245
|
|
192
262
|
std_list.append(np.std(cur_X, axis=0, ddof=1).ravel())
|
|
193
263
|
df_std = pd.DataFrame(
|
|
194
264
|
data=np.array(std_list), index=self.classes_, columns=feat_labels
|
|
@@ -200,7 +270,8 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
200
270
|
|
|
201
271
|
for cur_class in self.classes_:
|
|
202
272
|
cur_X = X[y == cur_class]
|
|
203
|
-
# Note we're using ddof=1 because we're dealing with a sample.
|
|
273
|
+
# Note we're using ddof=1 because we're dealing with a sample.
|
|
274
|
+
# See more: https://stackoverflow.com/a/46083501/10743245
|
|
204
275
|
iqr_list.append(
|
|
205
276
|
np.quantile(cur_X, q=0.75, axis=0).ravel()
|
|
206
277
|
- np.quantile(cur_X, q=0.25, axis=0).ravel()
|
|
@@ -211,6 +282,10 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
211
282
|
self.df_iqr_ = df_iqr
|
|
212
283
|
|
|
213
284
|
if self.calculate_kde:
|
|
285
|
+
warnings.warn(
|
|
286
|
+
"KDE calculation is deprecated and will be removed in version 0.2.0",
|
|
287
|
+
DeprecationWarning,
|
|
288
|
+
)
|
|
214
289
|
self.kde_dict_ = {}
|
|
215
290
|
|
|
216
291
|
for cl in self.classes_:
|
|
@@ -225,7 +300,6 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
225
300
|
)
|
|
226
301
|
kde.fit(subX)
|
|
227
302
|
self.kde_dict_[cl] = kde
|
|
228
|
-
|
|
229
303
|
self.is_fitted_ = True
|
|
230
304
|
|
|
231
305
|
return self
|
|
@@ -233,7 +307,9 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
233
307
|
def predict(self, X: np.array):
|
|
234
308
|
"""Predict the class labels for the provided X.
|
|
235
309
|
|
|
236
|
-
The prediction is based on the distance of each data point in the input sample
|
|
310
|
+
The prediction is based on the distance of each data point in the input sample
|
|
311
|
+
to the centroid for each class in the feature space. The predicted class is the
|
|
312
|
+
one whose centroid is the closest to the input sample.
|
|
237
313
|
|
|
238
314
|
Parameters
|
|
239
315
|
----------
|
|
@@ -277,12 +353,14 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
277
353
|
return y_pred
|
|
278
354
|
|
|
279
355
|
def predict_and_analyse(self, X: np.array):
|
|
280
|
-
"""
|
|
281
|
-
Predict the class labels for the provided X and perform analysis.
|
|
356
|
+
"""Predict the class labels for the provided X and perform analysis.
|
|
282
357
|
|
|
283
|
-
The prediction is based on the distance of each data point in the input sample
|
|
358
|
+
The prediction is based on the distance of each data point in the input sample
|
|
359
|
+
to the centroid for each class in the feature space. The predicted class is the
|
|
360
|
+
one whose centroid is the closest to the input sample.
|
|
284
361
|
|
|
285
|
-
The analysis involves saving all calculated distances and confidences as an
|
|
362
|
+
The analysis involves saving all calculated distances and confidences as an
|
|
363
|
+
attribute for inspection and analysis later.
|
|
286
364
|
|
|
287
365
|
Parameters
|
|
288
366
|
----------
|
|
@@ -332,6 +410,11 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
332
410
|
y_pred = self.classes_[dist_arr.argmin(axis=1)]
|
|
333
411
|
|
|
334
412
|
if self.calculate_kde:
|
|
413
|
+
warnings.warn(
|
|
414
|
+
"KDE calculation in predict_and_analyse is deprecated "
|
|
415
|
+
"and will be removed in version 0.2.0",
|
|
416
|
+
DeprecationWarning,
|
|
417
|
+
)
|
|
335
418
|
# NEW: Rescale in terms of median likelihoods - calculate here
|
|
336
419
|
scale_factors = np.exp(
|
|
337
420
|
[
|
|
@@ -351,8 +434,11 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
351
434
|
|
|
352
435
|
# NEW: Rescale in terms of median likelihoods - rescale here
|
|
353
436
|
self.likelihood_arr_ = self.likelihood_arr_ / scale_factors
|
|
354
|
-
|
|
355
437
|
if self.calculate_1d_dist:
|
|
438
|
+
warnings.warn(
|
|
439
|
+
"calculate_1d_dist is deprecated and will be removed in version 0.2.0",
|
|
440
|
+
DeprecationWarning,
|
|
441
|
+
)
|
|
356
442
|
conf_cl = []
|
|
357
443
|
Xdf_temp = pd.DataFrame(data=X, columns=self.df_centroid_.columns)
|
|
358
444
|
for cl in self.classes_:
|
|
@@ -375,26 +461,32 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
375
461
|
conf_cl.append(confs)
|
|
376
462
|
conf_cl = np.array(conf_cl)
|
|
377
463
|
self.conf_cl_ = conf_cl
|
|
378
|
-
|
|
379
464
|
self.analyis_ = True
|
|
380
465
|
|
|
381
466
|
return y_pred
|
|
382
467
|
|
|
383
468
|
def calculate_confidence(self, method: str = "distance_inverse"):
|
|
384
|
-
"""
|
|
385
|
-
Calculate the confidence for each prediction.
|
|
469
|
+
"""Calculate the confidence for each prediction.
|
|
386
470
|
|
|
387
|
-
The confidence is calculated based on either the distance of each data point to
|
|
471
|
+
The confidence is calculated based on either the distance of each data point to
|
|
472
|
+
the centroids of the training data, optionally the kernel density estimate or
|
|
473
|
+
1-dimensional distance.
|
|
388
474
|
|
|
389
475
|
Parameters
|
|
390
476
|
----------
|
|
391
|
-
method : {"distance_inverse", "1d_distance_inverse", "kde_likelihood"},
|
|
392
|
-
|
|
477
|
+
method : {"distance_inverse", "1d_distance_inverse", "kde_likelihood"},
|
|
478
|
+
default="distance_inverse"
|
|
479
|
+
The method to use for calculating confidence. Default is
|
|
480
|
+
'distance_inverse'.
|
|
481
|
+
.. deprecated:: 0.2.0
|
|
482
|
+
The methods '1d_distance_inverse' and
|
|
483
|
+
'kde_likelihood' will be removed in version 0.2.0.
|
|
393
484
|
"""
|
|
394
485
|
check_is_fitted(self, "is_fitted_")
|
|
395
486
|
if not hasattr(self, "analyis_"):
|
|
396
487
|
raise ValueError(
|
|
397
|
-
"Use predict_and_analyse() instead of predict() for
|
|
488
|
+
"Use predict_and_analyse() instead of predict() for "
|
|
489
|
+
"confidence calculation."
|
|
398
490
|
)
|
|
399
491
|
|
|
400
492
|
# Calculate confidence for each prediction
|
|
@@ -407,18 +499,30 @@ class DistanceMetricClassifier(BaseEstimator, ClassifierMixin):
|
|
|
407
499
|
]
|
|
408
500
|
|
|
409
501
|
elif method == "1d_distance_inverse":
|
|
502
|
+
warnings.warn(
|
|
503
|
+
"The '1d_distance_inverse' method is deprecated "
|
|
504
|
+
"and will be removed in version 0.2.0",
|
|
505
|
+
DeprecationWarning,
|
|
506
|
+
)
|
|
410
507
|
if not self.calculate_1d_dist:
|
|
411
508
|
raise ValueError(
|
|
412
|
-
"method='1d_distance_inverse' is only valid if calculate_1d_dist
|
|
509
|
+
"method='1d_distance_inverse' is only valid if calculate_1d_dist "
|
|
510
|
+
"is set to True"
|
|
413
511
|
)
|
|
414
512
|
self.confidence_df_ = pd.DataFrame(
|
|
415
513
|
data=self.conf_cl_.T, columns=[f"{x}_conf" for x in self.classes_]
|
|
416
514
|
)
|
|
417
515
|
|
|
418
516
|
elif method == "kde_likelihood":
|
|
517
|
+
warnings.warn(
|
|
518
|
+
"The 'kde_likelihood' method is deprecated and will be "
|
|
519
|
+
"removed in version 0.2.0",
|
|
520
|
+
DeprecationWarning,
|
|
521
|
+
)
|
|
419
522
|
if not self.calculate_kde:
|
|
420
523
|
raise ValueError(
|
|
421
|
-
"method='kde_likelihood' is only valid if calculate_kde is set
|
|
524
|
+
"method='kde_likelihood' is only valid if calculate_kde is set "
|
|
525
|
+
"to True"
|
|
422
526
|
)
|
|
423
527
|
|
|
424
528
|
self.confidence_df_ = pd.DataFrame(
|