scikit-learn-intelex 2024.0.1__py312-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/__init__.py +61 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/__main__.py +59 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/_config.py +110 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/_device_offload.py +223 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/_utils.py +95 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/basic_statistics/__init__.py +20 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/basic_statistics/basic_statistics.py +17 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/cluster/__init__.py +21 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/cluster/dbscan.py +187 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/cluster/k_means.py +18 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/cluster/tests/test_dbscan.py +37 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/cluster/tests/test_kmeans.py +31 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/decomposition/__init__.py +20 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/decomposition/pca.py +18 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/decomposition/tests/test_pca.py +28 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/dispatcher.py +329 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/doc/third-party-programs.txt +424 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/ensemble/__init__.py +30 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/ensemble/_forest.py +1947 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/ensemble/tests/test_forest.py +118 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/glob/__main__.py +73 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/glob/dispatcher.py +88 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/linear_model/__init__.py +30 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/linear_model/coordinate_descent.py +18 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/linear_model/linear.py +373 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/linear_model/logistic_path.py +18 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/linear_model/ridge.py +18 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/linear_model/tests/test_linear.py +77 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/linear_model/tests/test_logreg.py +29 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/manifold/__init__.py +20 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/manifold/t_sne.py +18 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/manifold/tests/test_tsne.py +27 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/metrics/__init__.py +24 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/metrics/pairwise.py +18 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/metrics/ranking.py +18 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/metrics/tests/test_metrics.py +40 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/model_selection/__init__.py +22 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/model_selection/split.py +18 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/model_selection/tests/test_model_selection.py +35 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/__init__.py +28 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/common.py +264 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/knn_classification.py +331 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/knn_regression.py +307 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/knn_unsupervised.py +220 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/lof.py +437 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/tests/test_neighbors.py +85 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/__init__.py +18 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/cluster/__init__.py +20 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/cluster/_common.py +84 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/cluster/k_means.py +370 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/decomposition/__init__.py +20 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/decomposition/pca.py +376 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/decomposition/tests/test_preview_pca.py +38 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/__init__.py +24 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/basic_statistics/__init__.py +19 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/cluster/__init__.py +30 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/cluster/dbscan.py +50 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/cluster/kmeans.py +21 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/decomposition/__init__.py +19 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/decomposition/pca.py +21 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/ensemble/__init__.py +19 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/ensemble/forest.py +79 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/linear_model/__init__.py +19 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/linear_model/linear_model.py +21 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/neighbors/__init__.py +19 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/neighbors/neighbors.py +25 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/svm/__init__.py +30 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/svm/_common.py +188 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/svm/nusvc.py +272 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/svm/nusvr.py +163 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/svm/svc.py +301 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/svm/svr.py +164 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/svm/tests/test_svm.py +102 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/_models_info.py +170 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/test_config.py +39 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/test_memory_usage.py +225 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/test_monkeypatch.py +210 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/test_parallel.py +50 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/test_patching.py +122 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/test_run_to_run_stability_tests.py +428 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/utils/_launch_algorithms.py +118 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/utils/__init__.py +19 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/utils/parallel.py +59 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/utils/validation.py +18 -0
- scikit_learn_intelex-2024.0.1.dist-info/LICENSE.txt +202 -0
- scikit_learn_intelex-2024.0.1.dist-info/METADATA +230 -0
- scikit_learn_intelex-2024.0.1.dist-info/RECORD +90 -0
- scikit_learn_intelex-2024.0.1.dist-info/WHEEL +5 -0
- scikit_learn_intelex-2024.0.1.dist-info/top_level.txt +1 -0
scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/knn_unsupervised.py
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# ===============================================================================
|
|
3
|
+
# Copyright 2021 Intel Corporation
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
# ===============================================================================
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from packaging.version import Version
|
|
20
|
+
except ImportError:
|
|
21
|
+
from distutils.version import LooseVersion as Version
|
|
22
|
+
|
|
23
|
+
import warnings
|
|
24
|
+
|
|
25
|
+
import numpy as np
|
|
26
|
+
from sklearn import __version__ as sklearn_version
|
|
27
|
+
from sklearn.neighbors._ball_tree import BallTree
|
|
28
|
+
from sklearn.neighbors._base import VALID_METRICS
|
|
29
|
+
from sklearn.neighbors._base import NeighborsBase as sklearn_NeighborsBase
|
|
30
|
+
from sklearn.neighbors._kd_tree import KDTree
|
|
31
|
+
from sklearn.neighbors._unsupervised import NearestNeighbors as sklearn_NearestNeighbors
|
|
32
|
+
from sklearn.utils.validation import _deprecate_positional_args, check_is_fitted
|
|
33
|
+
|
|
34
|
+
from daal4py.sklearn._utils import sklearn_check_version
|
|
35
|
+
from onedal.neighbors import NearestNeighbors as onedal_NearestNeighbors
|
|
36
|
+
from onedal.utils import _check_array, _num_features, _num_samples
|
|
37
|
+
|
|
38
|
+
from .._device_offload import dispatch, wrap_output_data
|
|
39
|
+
from .common import KNeighborsDispatchingBase
|
|
40
|
+
|
|
41
|
+
if sklearn_check_version("0.22") and Version(sklearn_version) < Version("0.23"):
|
|
42
|
+
|
|
43
|
+
class NearestNeighbors_(sklearn_NearestNeighbors):
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
n_neighbors=5,
|
|
47
|
+
radius=1.0,
|
|
48
|
+
algorithm="auto",
|
|
49
|
+
leaf_size=30,
|
|
50
|
+
metric="minkowski",
|
|
51
|
+
p=2,
|
|
52
|
+
metric_params=None,
|
|
53
|
+
n_jobs=None,
|
|
54
|
+
):
|
|
55
|
+
super().__init__(
|
|
56
|
+
n_neighbors=n_neighbors,
|
|
57
|
+
radius=radius,
|
|
58
|
+
algorithm=algorithm,
|
|
59
|
+
leaf_size=leaf_size,
|
|
60
|
+
metric=metric,
|
|
61
|
+
p=p,
|
|
62
|
+
metric_params=metric_params,
|
|
63
|
+
n_jobs=n_jobs,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
else:
|
|
67
|
+
|
|
68
|
+
class NearestNeighbors_(sklearn_NearestNeighbors):
|
|
69
|
+
if sklearn_check_version("1.2"):
|
|
70
|
+
_parameter_constraints: dict = {
|
|
71
|
+
**sklearn_NearestNeighbors._parameter_constraints
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
@_deprecate_positional_args
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
*,
|
|
78
|
+
n_neighbors=5,
|
|
79
|
+
radius=1.0,
|
|
80
|
+
algorithm="auto",
|
|
81
|
+
leaf_size=30,
|
|
82
|
+
metric="minkowski",
|
|
83
|
+
p=2,
|
|
84
|
+
metric_params=None,
|
|
85
|
+
n_jobs=None,
|
|
86
|
+
):
|
|
87
|
+
super().__init__(
|
|
88
|
+
n_neighbors=n_neighbors,
|
|
89
|
+
radius=radius,
|
|
90
|
+
algorithm=algorithm,
|
|
91
|
+
leaf_size=leaf_size,
|
|
92
|
+
metric=metric,
|
|
93
|
+
p=p,
|
|
94
|
+
metric_params=metric_params,
|
|
95
|
+
n_jobs=n_jobs,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class NearestNeighbors(NearestNeighbors_, KNeighborsDispatchingBase):
|
|
100
|
+
if sklearn_check_version("1.2"):
|
|
101
|
+
_parameter_constraints: dict = {**NearestNeighbors_._parameter_constraints}
|
|
102
|
+
|
|
103
|
+
@_deprecate_positional_args
|
|
104
|
+
def __init__(
|
|
105
|
+
self,
|
|
106
|
+
n_neighbors=5,
|
|
107
|
+
radius=1.0,
|
|
108
|
+
algorithm="auto",
|
|
109
|
+
leaf_size=30,
|
|
110
|
+
metric="minkowski",
|
|
111
|
+
p=2,
|
|
112
|
+
metric_params=None,
|
|
113
|
+
n_jobs=None,
|
|
114
|
+
):
|
|
115
|
+
super().__init__(
|
|
116
|
+
n_neighbors=n_neighbors,
|
|
117
|
+
radius=radius,
|
|
118
|
+
algorithm=algorithm,
|
|
119
|
+
leaf_size=leaf_size,
|
|
120
|
+
metric=metric,
|
|
121
|
+
p=p,
|
|
122
|
+
metric_params=metric_params,
|
|
123
|
+
n_jobs=n_jobs,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def fit(self, X, y=None):
|
|
127
|
+
self._fit_validation(X, y)
|
|
128
|
+
dispatch(
|
|
129
|
+
self,
|
|
130
|
+
"fit",
|
|
131
|
+
{
|
|
132
|
+
"onedal": self.__class__._onedal_fit,
|
|
133
|
+
"sklearn": sklearn_NearestNeighbors.fit,
|
|
134
|
+
},
|
|
135
|
+
X,
|
|
136
|
+
None,
|
|
137
|
+
)
|
|
138
|
+
return self
|
|
139
|
+
|
|
140
|
+
@wrap_output_data
|
|
141
|
+
def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
|
|
142
|
+
check_is_fitted(self)
|
|
143
|
+
if sklearn_check_version("1.0") and X is not None:
|
|
144
|
+
self._check_feature_names(X, reset=False)
|
|
145
|
+
return dispatch(
|
|
146
|
+
self,
|
|
147
|
+
"kneighbors",
|
|
148
|
+
{
|
|
149
|
+
"onedal": self.__class__._onedal_kneighbors,
|
|
150
|
+
"sklearn": sklearn_NearestNeighbors.kneighbors,
|
|
151
|
+
},
|
|
152
|
+
X,
|
|
153
|
+
n_neighbors,
|
|
154
|
+
return_distance,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
@wrap_output_data
|
|
158
|
+
def radius_neighbors(
|
|
159
|
+
self, X=None, radius=None, return_distance=True, sort_results=False
|
|
160
|
+
):
|
|
161
|
+
_onedal_estimator = getattr(self, "_onedal_estimator", None)
|
|
162
|
+
|
|
163
|
+
if (
|
|
164
|
+
_onedal_estimator is not None
|
|
165
|
+
or getattr(self, "_tree", 0) is None
|
|
166
|
+
and self._fit_method == "kd_tree"
|
|
167
|
+
):
|
|
168
|
+
if sklearn_check_version("0.24"):
|
|
169
|
+
sklearn_NearestNeighbors.fit(self, self._fit_X, getattr(self, "_y", None))
|
|
170
|
+
else:
|
|
171
|
+
sklearn_NearestNeighbors.fit(self, self._fit_X)
|
|
172
|
+
if sklearn_check_version("0.22"):
|
|
173
|
+
result = sklearn_NearestNeighbors.radius_neighbors(
|
|
174
|
+
self, X, radius, return_distance, sort_results
|
|
175
|
+
)
|
|
176
|
+
else:
|
|
177
|
+
result = sklearn_NearestNeighbors.radius_neighbors(
|
|
178
|
+
self, X, radius, return_distance
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
return result
|
|
182
|
+
|
|
183
|
+
def _onedal_fit(self, X, y=None, queue=None):
|
|
184
|
+
onedal_params = {
|
|
185
|
+
"n_neighbors": self.n_neighbors,
|
|
186
|
+
"algorithm": self.algorithm,
|
|
187
|
+
"metric": self.effective_metric_,
|
|
188
|
+
"p": self.effective_metric_params_["p"],
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
try:
|
|
192
|
+
requires_y = self._get_tags()["requires_y"]
|
|
193
|
+
except KeyError:
|
|
194
|
+
requires_y = False
|
|
195
|
+
|
|
196
|
+
self._onedal_estimator = onedal_NearestNeighbors(**onedal_params)
|
|
197
|
+
self._onedal_estimator.requires_y = requires_y
|
|
198
|
+
self._onedal_estimator.effective_metric_ = self.effective_metric_
|
|
199
|
+
self._onedal_estimator.effective_metric_params_ = self.effective_metric_params_
|
|
200
|
+
self._onedal_estimator.fit(X, y, queue=queue)
|
|
201
|
+
|
|
202
|
+
self._save_attributes()
|
|
203
|
+
|
|
204
|
+
def _onedal_predict(self, X, queue=None):
|
|
205
|
+
return self._onedal_estimator.predict(X, queue=queue)
|
|
206
|
+
|
|
207
|
+
def _onedal_kneighbors(
|
|
208
|
+
self, X=None, n_neighbors=None, return_distance=True, queue=None
|
|
209
|
+
):
|
|
210
|
+
return self._onedal_estimator.kneighbors(
|
|
211
|
+
X, n_neighbors, return_distance, queue=queue
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
def _save_attributes(self):
|
|
215
|
+
self.classes_ = self._onedal_estimator.classes_
|
|
216
|
+
self.n_features_in_ = self._onedal_estimator.n_features_in_
|
|
217
|
+
self.n_samples_fit_ = self._onedal_estimator.n_samples_fit_
|
|
218
|
+
self._fit_X = self._onedal_estimator._fit_X
|
|
219
|
+
self._fit_method = self._onedal_estimator._fit_method
|
|
220
|
+
self._tree = self._onedal_estimator._tree
|
|
@@ -0,0 +1,437 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# ===============================================================================
|
|
3
|
+
# Copyright 2023 Intel Corporation
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
# ===============================================================================
|
|
17
|
+
|
|
18
|
+
import warnings
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
from sklearn.neighbors._lof import LocalOutlierFactor as sklearn_LocalOutlierFactor
|
|
22
|
+
|
|
23
|
+
from .knn_unsupervised import NearestNeighbors
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
from sklearn.utils.metaestimators import available_if
|
|
27
|
+
except ImportError:
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
from sklearn.utils import check_array
|
|
31
|
+
from sklearn.utils.validation import check_is_fitted
|
|
32
|
+
|
|
33
|
+
from daal4py.sklearn._utils import sklearn_check_version
|
|
34
|
+
|
|
35
|
+
from .._config import config_context
|
|
36
|
+
from .._device_offload import dispatch, wrap_output_data
|
|
37
|
+
from .._utils import PatchingConditionsChain
|
|
38
|
+
|
|
39
|
+
if sklearn_check_version("1.0"):
|
|
40
|
+
|
|
41
|
+
class LocalOutlierFactor(sklearn_LocalOutlierFactor):
|
|
42
|
+
if sklearn_check_version("1.2"):
|
|
43
|
+
_parameter_constraints: dict = {
|
|
44
|
+
**sklearn_LocalOutlierFactor._parameter_constraints
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
n_neighbors=20,
|
|
50
|
+
*,
|
|
51
|
+
algorithm="auto",
|
|
52
|
+
leaf_size=30,
|
|
53
|
+
metric="minkowski",
|
|
54
|
+
p=2,
|
|
55
|
+
metric_params=None,
|
|
56
|
+
contamination="auto",
|
|
57
|
+
novelty=False,
|
|
58
|
+
n_jobs=None,
|
|
59
|
+
):
|
|
60
|
+
super().__init__(
|
|
61
|
+
n_neighbors=n_neighbors,
|
|
62
|
+
algorithm=algorithm,
|
|
63
|
+
leaf_size=leaf_size,
|
|
64
|
+
metric=metric,
|
|
65
|
+
p=p,
|
|
66
|
+
metric_params=metric_params,
|
|
67
|
+
n_jobs=n_jobs,
|
|
68
|
+
contamination=contamination,
|
|
69
|
+
novelty=novelty,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
def _fit(self, X, y, queue=None):
|
|
73
|
+
with config_context(target_offload=queue):
|
|
74
|
+
if sklearn_check_version("1.2"):
|
|
75
|
+
self._validate_params()
|
|
76
|
+
self._knn = NearestNeighbors(
|
|
77
|
+
n_neighbors=self.n_neighbors,
|
|
78
|
+
algorithm=self.algorithm,
|
|
79
|
+
leaf_size=self.leaf_size,
|
|
80
|
+
metric=self.metric,
|
|
81
|
+
p=self.p,
|
|
82
|
+
metric_params=self.metric_params,
|
|
83
|
+
n_jobs=self.n_jobs,
|
|
84
|
+
)
|
|
85
|
+
self._knn.fit(X)
|
|
86
|
+
|
|
87
|
+
if self.contamination != "auto":
|
|
88
|
+
if not (0.0 < self.contamination <= 0.5):
|
|
89
|
+
raise ValueError(
|
|
90
|
+
"contamination must be in (0, 0.5], "
|
|
91
|
+
"got: %f" % self.contamination
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
n_samples = self._knn.n_samples_fit_
|
|
95
|
+
|
|
96
|
+
if self.n_neighbors > n_samples:
|
|
97
|
+
warnings.warn(
|
|
98
|
+
"n_neighbors (%s) is greater than the "
|
|
99
|
+
"total number of samples (%s). n_neighbors "
|
|
100
|
+
"will be set to (n_samples - 1) for estimation."
|
|
101
|
+
% (self.n_neighbors, n_samples)
|
|
102
|
+
)
|
|
103
|
+
self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))
|
|
104
|
+
|
|
105
|
+
self._distances_fit_X_, _neighbors_indices_fit_X_ = self._knn.kneighbors(
|
|
106
|
+
n_neighbors=self.n_neighbors_
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
self._lrd = self._local_reachability_density(
|
|
110
|
+
self._distances_fit_X_, _neighbors_indices_fit_X_
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Compute lof score over training samples to define offset_:
|
|
114
|
+
lrd_ratios_array = (
|
|
115
|
+
self._lrd[_neighbors_indices_fit_X_] / self._lrd[:, np.newaxis]
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)
|
|
119
|
+
|
|
120
|
+
if self.contamination == "auto":
|
|
121
|
+
# inliers score around -1 (the higher, the less abnormal).
|
|
122
|
+
self.offset_ = -1.5
|
|
123
|
+
else:
|
|
124
|
+
self.offset_ = np.percentile(
|
|
125
|
+
self.negative_outlier_factor_, 100.0 * self.contamination
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
for knn_prop_name in self._knn.__dict__.keys():
|
|
129
|
+
if knn_prop_name not in self.__dict__.keys():
|
|
130
|
+
setattr(self, knn_prop_name, self._knn.__dict__[knn_prop_name])
|
|
131
|
+
|
|
132
|
+
return self
|
|
133
|
+
|
|
134
|
+
def fit(self, X, y=None):
|
|
135
|
+
return dispatch(
|
|
136
|
+
self,
|
|
137
|
+
"neighbors.LocalOutlierFactor.fit",
|
|
138
|
+
{
|
|
139
|
+
"onedal": self.__class__._fit,
|
|
140
|
+
"sklearn": None,
|
|
141
|
+
},
|
|
142
|
+
X,
|
|
143
|
+
y,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def _onedal_predict(self, X, queue=None):
|
|
147
|
+
with config_context(target_offload=queue):
|
|
148
|
+
check_is_fitted(self)
|
|
149
|
+
|
|
150
|
+
if X is not None:
|
|
151
|
+
X = check_array(X, accept_sparse="csr")
|
|
152
|
+
is_inlier = np.ones(X.shape[0], dtype=int)
|
|
153
|
+
is_inlier[self.decision_function(X) < 0] = -1
|
|
154
|
+
else:
|
|
155
|
+
is_inlier = np.ones(self._knn.n_samples_fit_, dtype=int)
|
|
156
|
+
is_inlier[self.negative_outlier_factor_ < self.offset_] = -1
|
|
157
|
+
|
|
158
|
+
return is_inlier
|
|
159
|
+
|
|
160
|
+
@wrap_output_data
|
|
161
|
+
def _predict(self, X=None):
|
|
162
|
+
return dispatch(
|
|
163
|
+
self,
|
|
164
|
+
"neighbors.LocalOutlierFactor.predict",
|
|
165
|
+
{
|
|
166
|
+
"onedal": self.__class__._onedal_predict,
|
|
167
|
+
"sklearn": None,
|
|
168
|
+
},
|
|
169
|
+
X,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
def _score_samples(self, X, queue=None):
|
|
173
|
+
with config_context(target_offload=queue):
|
|
174
|
+
check_is_fitted(self)
|
|
175
|
+
X = check_array(X, accept_sparse="csr")
|
|
176
|
+
|
|
177
|
+
distances_X, neighbors_indices_X = self._knn.kneighbors(
|
|
178
|
+
X, n_neighbors=self.n_neighbors_
|
|
179
|
+
)
|
|
180
|
+
X_lrd = self._local_reachability_density(distances_X, neighbors_indices_X)
|
|
181
|
+
|
|
182
|
+
lrd_ratios_array = self._lrd[neighbors_indices_X] / X_lrd[:, np.newaxis]
|
|
183
|
+
|
|
184
|
+
# as bigger is better:
|
|
185
|
+
return -np.mean(lrd_ratios_array, axis=1)
|
|
186
|
+
|
|
187
|
+
def _check_novelty_score_samples(self):
|
|
188
|
+
if not self.novelty:
|
|
189
|
+
msg = (
|
|
190
|
+
"score_samples is not available when novelty=False. The "
|
|
191
|
+
"scores of the training samples are always available "
|
|
192
|
+
"through the negative_outlier_factor_ attribute. Use "
|
|
193
|
+
"novelty=True if you want to use LOF for novelty detection "
|
|
194
|
+
"and compute score_samples for new unseen data."
|
|
195
|
+
)
|
|
196
|
+
raise AttributeError(msg)
|
|
197
|
+
return True
|
|
198
|
+
|
|
199
|
+
@available_if(_check_novelty_score_samples)
|
|
200
|
+
@wrap_output_data
|
|
201
|
+
def score_samples(self, X):
|
|
202
|
+
return dispatch(
|
|
203
|
+
self,
|
|
204
|
+
"neighbors.LocalOutlierFactor.score_samples",
|
|
205
|
+
{
|
|
206
|
+
"onedal": self.__class__._score_samples,
|
|
207
|
+
"sklearn": None,
|
|
208
|
+
},
|
|
209
|
+
X,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
def _check_novelty_fit_predict(self):
|
|
213
|
+
if self.novelty:
|
|
214
|
+
msg = (
|
|
215
|
+
"fit_predict is not available when novelty=True. Use "
|
|
216
|
+
"novelty=False if you want to predict on the training set."
|
|
217
|
+
)
|
|
218
|
+
raise AttributeError(msg)
|
|
219
|
+
return True
|
|
220
|
+
|
|
221
|
+
def _fit_predict(self, X, y, queue=None):
|
|
222
|
+
with config_context(target_offload=queue):
|
|
223
|
+
return self.fit(X)._predict()
|
|
224
|
+
|
|
225
|
+
@available_if(_check_novelty_fit_predict)
|
|
226
|
+
@wrap_output_data
|
|
227
|
+
def fit_predict(self, X, y=None):
|
|
228
|
+
return dispatch(
|
|
229
|
+
self,
|
|
230
|
+
"neighbors.LocalOutlierFactor.fit_predict",
|
|
231
|
+
{
|
|
232
|
+
"onedal": self.__class__._fit_predict,
|
|
233
|
+
"sklearn": None,
|
|
234
|
+
},
|
|
235
|
+
X,
|
|
236
|
+
y,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
def _onedal_gpu_supported(self, method_name, *data):
|
|
240
|
+
class_name = self.__class__.__name__
|
|
241
|
+
patching_status = PatchingConditionsChain(
|
|
242
|
+
f"sklearn.neighbors.{class_name}.{method_name}"
|
|
243
|
+
)
|
|
244
|
+
return patching_status
|
|
245
|
+
|
|
246
|
+
def _onedal_cpu_supported(self, method_name, *data):
|
|
247
|
+
class_name = self.__class__.__name__
|
|
248
|
+
patching_status = PatchingConditionsChain(
|
|
249
|
+
f"sklearn.neighbors.{class_name}.{method_name}"
|
|
250
|
+
)
|
|
251
|
+
return patching_status
|
|
252
|
+
|
|
253
|
+
else:
|
|
254
|
+
|
|
255
|
+
class LocalOutlierFactor(sklearn_LocalOutlierFactor):
|
|
256
|
+
def __init__(
|
|
257
|
+
self,
|
|
258
|
+
n_neighbors=20,
|
|
259
|
+
*,
|
|
260
|
+
algorithm="auto",
|
|
261
|
+
leaf_size=30,
|
|
262
|
+
metric="minkowski",
|
|
263
|
+
p=2,
|
|
264
|
+
metric_params=None,
|
|
265
|
+
contamination="auto",
|
|
266
|
+
novelty=False,
|
|
267
|
+
n_jobs=None,
|
|
268
|
+
):
|
|
269
|
+
super().__init__(
|
|
270
|
+
n_neighbors=n_neighbors,
|
|
271
|
+
algorithm=algorithm,
|
|
272
|
+
leaf_size=leaf_size,
|
|
273
|
+
metric=metric,
|
|
274
|
+
p=p,
|
|
275
|
+
metric_params=metric_params,
|
|
276
|
+
n_jobs=n_jobs,
|
|
277
|
+
contamination=contamination,
|
|
278
|
+
novelty=novelty,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
def _fit(self, X, y=None, queue=None):
|
|
282
|
+
with config_context(target_offload=queue):
|
|
283
|
+
self._knn = NearestNeighbors(
|
|
284
|
+
n_neighbors=self.n_neighbors,
|
|
285
|
+
algorithm=self.algorithm,
|
|
286
|
+
leaf_size=self.leaf_size,
|
|
287
|
+
metric=self.metric,
|
|
288
|
+
p=self.p,
|
|
289
|
+
metric_params=self.metric_params,
|
|
290
|
+
n_jobs=self.n_jobs,
|
|
291
|
+
)
|
|
292
|
+
self._knn.fit(X)
|
|
293
|
+
|
|
294
|
+
if self.contamination != "auto":
|
|
295
|
+
if not (0.0 < self.contamination <= 0.5):
|
|
296
|
+
raise ValueError(
|
|
297
|
+
"contamination must be in (0, 0.5], "
|
|
298
|
+
"got: %f" % self.contamination
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
n_samples = self._knn.n_samples_fit_
|
|
302
|
+
|
|
303
|
+
if self.n_neighbors > n_samples:
|
|
304
|
+
warnings.warn(
|
|
305
|
+
"n_neighbors (%s) is greater than the "
|
|
306
|
+
"total number of samples (%s). n_neighbors "
|
|
307
|
+
"will be set to (n_samples - 1) for estimation."
|
|
308
|
+
% (self.n_neighbors, n_samples)
|
|
309
|
+
)
|
|
310
|
+
self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))
|
|
311
|
+
|
|
312
|
+
self._distances_fit_X_, _neighbors_indices_fit_X_ = self._knn.kneighbors(
|
|
313
|
+
n_neighbors=self.n_neighbors_
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
self._lrd = self._local_reachability_density(
|
|
317
|
+
self._distances_fit_X_, _neighbors_indices_fit_X_
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
# Compute lof score over training samples to define offset_:
|
|
321
|
+
lrd_ratios_array = (
|
|
322
|
+
self._lrd[_neighbors_indices_fit_X_] / self._lrd[:, np.newaxis]
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)
|
|
326
|
+
|
|
327
|
+
if self.contamination == "auto":
|
|
328
|
+
# inliers score around -1 (the higher, the less abnormal).
|
|
329
|
+
self.offset_ = -1.5
|
|
330
|
+
else:
|
|
331
|
+
self.offset_ = np.percentile(
|
|
332
|
+
self.negative_outlier_factor_, 100.0 * self.contamination
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
for knn_prop_name in self._knn.__dict__.keys():
|
|
336
|
+
if knn_prop_name not in self.__dict__.keys():
|
|
337
|
+
setattr(self, knn_prop_name, self._knn.__dict__[knn_prop_name])
|
|
338
|
+
|
|
339
|
+
return self
|
|
340
|
+
|
|
341
|
+
def fit(self, X, y=None):
|
|
342
|
+
return dispatch(
|
|
343
|
+
self,
|
|
344
|
+
"neighbors.LocalOutlierFactor.fit",
|
|
345
|
+
{
|
|
346
|
+
"onedal": self.__class__._fit,
|
|
347
|
+
"sklearn": None,
|
|
348
|
+
},
|
|
349
|
+
X,
|
|
350
|
+
y,
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
def _onedal_predict(self, X, queue=None):
|
|
354
|
+
with config_context(target_offload=queue):
|
|
355
|
+
check_is_fitted(self)
|
|
356
|
+
|
|
357
|
+
if X is not None:
|
|
358
|
+
X = check_array(X, accept_sparse="csr")
|
|
359
|
+
is_inlier = np.ones(X.shape[0], dtype=int)
|
|
360
|
+
is_inlier[self.decision_function(X) < 0] = -1
|
|
361
|
+
else:
|
|
362
|
+
is_inlier = np.ones(self._knn.n_samples_fit_, dtype=int)
|
|
363
|
+
is_inlier[self.negative_outlier_factor_ < self.offset_] = -1
|
|
364
|
+
|
|
365
|
+
return is_inlier
|
|
366
|
+
|
|
367
|
+
@wrap_output_data
|
|
368
|
+
def _predict(self, X=None):
|
|
369
|
+
return dispatch(
|
|
370
|
+
self,
|
|
371
|
+
"neighbors.LocalOutlierFactor.predict",
|
|
372
|
+
{
|
|
373
|
+
"onedal": self.__class__._onedal_predict,
|
|
374
|
+
"sklearn": None,
|
|
375
|
+
},
|
|
376
|
+
X,
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
def _onedal_score_samples(self, X, queue=None):
|
|
380
|
+
with config_context(target_offload=queue):
|
|
381
|
+
check_is_fitted(self)
|
|
382
|
+
X = check_array(X, accept_sparse="csr")
|
|
383
|
+
|
|
384
|
+
distances_X, neighbors_indices_X = self._knn.kneighbors(
|
|
385
|
+
X, n_neighbors=self.n_neighbors_
|
|
386
|
+
)
|
|
387
|
+
X_lrd = self._local_reachability_density(distances_X, neighbors_indices_X)
|
|
388
|
+
|
|
389
|
+
lrd_ratios_array = self._lrd[neighbors_indices_X] / X_lrd[:, np.newaxis]
|
|
390
|
+
|
|
391
|
+
# as bigger is better:
|
|
392
|
+
return -np.mean(lrd_ratios_array, axis=1)
|
|
393
|
+
|
|
394
|
+
@wrap_output_data
|
|
395
|
+
def _score_samples(self, X):
|
|
396
|
+
if not self.novelty:
|
|
397
|
+
msg = (
|
|
398
|
+
"score_samples is not available when novelty=False. The "
|
|
399
|
+
"scores of the training samples are always available "
|
|
400
|
+
"through the negative_outlier_factor_ attribute. Use "
|
|
401
|
+
"novelty=True if you want to use LOF for novelty detection "
|
|
402
|
+
"and compute score_samples for new unseen data."
|
|
403
|
+
)
|
|
404
|
+
raise AttributeError(msg)
|
|
405
|
+
|
|
406
|
+
return dispatch(
|
|
407
|
+
self,
|
|
408
|
+
"neighbors.LocalOutlierFactor.score_samples",
|
|
409
|
+
{
|
|
410
|
+
"onedal": self.__class__._onedal_score_samples,
|
|
411
|
+
"sklearn": None,
|
|
412
|
+
},
|
|
413
|
+
X,
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
def _onedal_fit_predict(self, X, y, queue=None):
|
|
417
|
+
with config_context(target_offload=queue):
|
|
418
|
+
return self.fit(X)._predict()
|
|
419
|
+
|
|
420
|
+
@wrap_output_data
|
|
421
|
+
def _fit_predict(self, X, y=None):
|
|
422
|
+
return dispatch(
|
|
423
|
+
self,
|
|
424
|
+
"neighbors.LocalOutlierFactor._onedal_fit_predict",
|
|
425
|
+
{
|
|
426
|
+
"onedal": self.__class__._onedal_fit_predict,
|
|
427
|
+
"sklearn": None,
|
|
428
|
+
},
|
|
429
|
+
X,
|
|
430
|
+
y,
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
def _onedal_gpu_supported(self, method_name, *data):
|
|
434
|
+
return True
|
|
435
|
+
|
|
436
|
+
def _onedal_cpu_supported(self, method_name, *data):
|
|
437
|
+
return True
|