scikit-learn-intelex 2024.1.0__py38-none-manylinux1_x86_64.whl → 2024.2.0__py38-none-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/METADATA +2 -2
- {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/RECORD +38 -34
- sklearnex/cluster/dbscan.py +3 -3
- sklearnex/{preview/linear_model → covariance}/__init__.py +3 -3
- sklearnex/covariance/incremental_covariance.py +130 -0
- sklearnex/covariance/tests/test_incremental_covariance.py +143 -0
- sklearnex/dispatcher.py +19 -18
- sklearnex/ensemble/_forest.py +5 -10
- sklearnex/linear_model/__init__.py +1 -2
- sklearnex/linear_model/linear.py +3 -10
- sklearnex/{preview/linear_model → linear_model}/logistic_regression.py +19 -38
- sklearnex/linear_model/tests/test_logreg.py +70 -5
- sklearnex/neighbors/__init__.py +1 -1
- sklearnex/neighbors/_lof.py +167 -0
- sklearnex/neighbors/knn_classification.py +6 -9
- sklearnex/neighbors/knn_regression.py +6 -8
- sklearnex/neighbors/knn_unsupervised.py +5 -7
- sklearnex/neighbors/tests/test_neighbors.py +12 -11
- sklearnex/preview/__init__.py +1 -1
- sklearnex/preview/cluster/k_means.py +3 -8
- sklearnex/preview/covariance/covariance.py +46 -12
- sklearnex/preview/decomposition/pca.py +3 -5
- sklearnex/spmd/__init__.py +1 -0
- sklearnex/spmd/covariance/__init__.py +19 -0
- sklearnex/spmd/covariance/covariance.py +21 -0
- sklearnex/spmd/linear_model/__init__.py +2 -1
- sklearnex/spmd/linear_model/logistic_regression.py +21 -0
- sklearnex/svm/nusvc.py +5 -6
- sklearnex/svm/nusvr.py +3 -4
- sklearnex/svm/svc.py +5 -6
- sklearnex/svm/svr.py +3 -4
- sklearnex/tests/test_memory_usage.py +1 -4
- sklearnex/tests/test_monkeypatch.py +33 -20
- sklearnex/tests/test_n_jobs_support.py +71 -9
- sklearnex/tests/test_patching.py +19 -5
- sklearnex/neighbors/lof.py +0 -436
- sklearnex/preview/linear_model/tests/test_preview_logistic_regression.py +0 -59
- {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/WHEEL +0 -0
- {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/top_level.txt +0 -0
sklearnex/linear_model/linear.py
CHANGED
|
@@ -65,13 +65,8 @@ if daal_check_version((2023, "P", 100)):
|
|
|
65
65
|
import numpy as np
|
|
66
66
|
from sklearn.linear_model import LinearRegression as sklearn_LinearRegression
|
|
67
67
|
|
|
68
|
-
from daal4py.sklearn.
|
|
69
|
-
|
|
70
|
-
get_dtype,
|
|
71
|
-
make2d,
|
|
72
|
-
run_with_n_jobs,
|
|
73
|
-
sklearn_check_version,
|
|
74
|
-
)
|
|
68
|
+
from daal4py.sklearn._n_jobs_support import control_n_jobs
|
|
69
|
+
from daal4py.sklearn._utils import get_dtype, make2d, sklearn_check_version
|
|
75
70
|
|
|
76
71
|
from .._device_offload import dispatch, wrap_output_data
|
|
77
72
|
from .._utils import (
|
|
@@ -93,7 +88,7 @@ if daal_check_version((2023, "P", 100)):
|
|
|
93
88
|
from onedal.utils import _num_features, _num_samples
|
|
94
89
|
|
|
95
90
|
@register_hyperparameters({"fit": get_hyperparameters("linear_regression", "train")})
|
|
96
|
-
@control_n_jobs
|
|
91
|
+
@control_n_jobs(decorated_methods=["fit", "predict"])
|
|
97
92
|
class LinearRegression(sklearn_LinearRegression, BaseLinearRegression):
|
|
98
93
|
__doc__ = sklearn_LinearRegression.__doc__
|
|
99
94
|
intercept_, coef_ = None, None
|
|
@@ -330,7 +325,6 @@ if daal_check_version((2023, "P", 100)):
|
|
|
330
325
|
onedal_params = {"fit_intercept": self.fit_intercept, "copy_X": self.copy_X}
|
|
331
326
|
self._onedal_estimator = onedal_LinearRegression(**onedal_params)
|
|
332
327
|
|
|
333
|
-
@run_with_n_jobs
|
|
334
328
|
def _onedal_fit(self, X, y, sample_weight, queue=None):
|
|
335
329
|
assert sample_weight is None
|
|
336
330
|
|
|
@@ -369,7 +363,6 @@ if daal_check_version((2023, "P", 100)):
|
|
|
369
363
|
del self._onedal_estimator
|
|
370
364
|
super().fit(X, y)
|
|
371
365
|
|
|
372
|
-
@run_with_n_jobs
|
|
373
366
|
def _onedal_predict(self, X, queue=None):
|
|
374
367
|
X = self._validate_data(X, accept_sparse=False, reset=False)
|
|
375
368
|
if not hasattr(self, "_onedal_estimator"):
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# ===============================================================================
|
|
2
|
-
# Copyright
|
|
2
|
+
# Copyright 2024 Intel Corporation
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
@@ -17,14 +17,11 @@
|
|
|
17
17
|
import logging
|
|
18
18
|
from abc import ABC
|
|
19
19
|
|
|
20
|
-
import sklearn.linear_model._logistic as logistic_module
|
|
21
|
-
|
|
22
20
|
from daal4py.sklearn._utils import daal_check_version
|
|
23
21
|
from daal4py.sklearn.linear_model.logistic_path import (
|
|
24
|
-
LogisticRegression,
|
|
25
|
-
daal4py_predict,
|
|
26
|
-
logistic_regression_path,
|
|
22
|
+
LogisticRegression as LogisticRegression_daal4py,
|
|
27
23
|
)
|
|
24
|
+
from daal4py.sklearn.linear_model.logistic_path import daal4py_fit, daal4py_predict
|
|
28
25
|
|
|
29
26
|
|
|
30
27
|
class BaseLogisticRegression(ABC):
|
|
@@ -43,14 +40,18 @@ if daal_check_version((2024, "P", 1)):
|
|
|
43
40
|
from sklearn.linear_model import LogisticRegression as sklearn_LogisticRegression
|
|
44
41
|
from sklearn.utils.validation import check_X_y
|
|
45
42
|
|
|
43
|
+
from daal4py.sklearn._n_jobs_support import control_n_jobs
|
|
46
44
|
from daal4py.sklearn._utils import sklearn_check_version
|
|
47
45
|
from onedal.linear_model import LogisticRegression as onedal_LogisticRegression
|
|
48
46
|
from onedal.utils import _num_features, _num_samples
|
|
49
47
|
|
|
50
|
-
from
|
|
51
|
-
from
|
|
52
|
-
from
|
|
48
|
+
from .._device_offload import dispatch, wrap_output_data
|
|
49
|
+
from .._utils import PatchingConditionsChain, get_patch_message
|
|
50
|
+
from ..utils.validation import _assert_all_finite
|
|
53
51
|
|
|
52
|
+
@control_n_jobs(
|
|
53
|
+
decorated_methods=["fit", "predict", "predict_proba", "predict_log_proba"]
|
|
54
|
+
)
|
|
54
55
|
class LogisticRegression(sklearn_LogisticRegression, BaseLogisticRegression):
|
|
55
56
|
__doc__ = sklearn_LogisticRegression.__doc__
|
|
56
57
|
intercept_, coef_, n_iter_ = None, None, None
|
|
@@ -97,6 +98,8 @@ if daal_check_version((2024, "P", 1)):
|
|
|
97
98
|
l1_ratio=l1_ratio,
|
|
98
99
|
)
|
|
99
100
|
|
|
101
|
+
_onedal_cpu_fit = daal4py_fit
|
|
102
|
+
|
|
100
103
|
def fit(self, X, y, sample_weight=None):
|
|
101
104
|
if sklearn_check_version("1.0"):
|
|
102
105
|
self._check_feature_names(X, reset=True)
|
|
@@ -160,10 +163,8 @@ if daal_check_version((2024, "P", 1)):
|
|
|
160
163
|
def _test_type_and_finiteness(self, X_in):
|
|
161
164
|
X = np.asarray(X_in)
|
|
162
165
|
|
|
163
|
-
|
|
164
|
-
if "complex" in str(type(dtype)):
|
|
166
|
+
if np.iscomplexobj(X):
|
|
165
167
|
return False
|
|
166
|
-
|
|
167
168
|
try:
|
|
168
169
|
_assert_all_finite(X)
|
|
169
170
|
except BaseException:
|
|
@@ -268,15 +269,6 @@ if daal_check_version((2024, "P", 1)):
|
|
|
268
269
|
}
|
|
269
270
|
self._onedal_estimator = onedal_LogisticRegression(**onedal_params)
|
|
270
271
|
|
|
271
|
-
def _onedal_cpu_fit(self, X, y, sample_weight):
|
|
272
|
-
which, what = logistic_module, "_logistic_regression_path"
|
|
273
|
-
replacer = logistic_regression_path
|
|
274
|
-
descriptor = getattr(which, what, None)
|
|
275
|
-
setattr(which, what, replacer)
|
|
276
|
-
clf = super().fit(X, y, sample_weight)
|
|
277
|
-
setattr(which, what, descriptor)
|
|
278
|
-
return clf
|
|
279
|
-
|
|
280
272
|
def _onedal_fit(self, X, y, sample_weight, queue=None):
|
|
281
273
|
if queue is None or queue.sycl_device.is_cpu:
|
|
282
274
|
return self._onedal_cpu_fit(X, y, sample_weight)
|
|
@@ -313,38 +305,27 @@ if daal_check_version((2024, "P", 1)):
|
|
|
313
305
|
return daal4py_predict(self, X, "computeClassLabels")
|
|
314
306
|
|
|
315
307
|
X = self._validate_data(X, accept_sparse=False, reset=False)
|
|
316
|
-
|
|
317
|
-
self._initialize_onedal_estimator()
|
|
318
|
-
self._onedal_estimator.coef_ = self.coef_
|
|
319
|
-
self._onedal_estimator.intercept_ = self.intercept_
|
|
320
|
-
self._onedal_estimator.classes_ = self.classes_
|
|
321
|
-
|
|
308
|
+
assert hasattr(self, "_onedal_estimator")
|
|
322
309
|
return self._onedal_estimator.predict(X, queue=queue)
|
|
323
310
|
|
|
324
311
|
def _onedal_predict_proba(self, X, queue=None):
|
|
325
312
|
if queue is None or queue.sycl_device.is_cpu:
|
|
326
313
|
return daal4py_predict(self, X, "computeClassProbabilities")
|
|
327
|
-
X = self._validate_data(X, accept_sparse=False, reset=False)
|
|
328
|
-
if not hasattr(self, "_onedal_estimator"):
|
|
329
|
-
self._initialize_onedal_estimator()
|
|
330
|
-
self._onedal_estimator.coef_ = self.coef_
|
|
331
|
-
self._onedal_estimator.intercept_ = self.intercept_
|
|
332
314
|
|
|
315
|
+
X = self._validate_data(X, accept_sparse=False, reset=False)
|
|
316
|
+
assert hasattr(self, "_onedal_estimator")
|
|
333
317
|
return self._onedal_estimator.predict_proba(X, queue=queue)
|
|
334
318
|
|
|
335
319
|
def _onedal_predict_log_proba(self, X, queue=None):
|
|
336
320
|
if queue is None or queue.sycl_device.is_cpu:
|
|
337
321
|
return daal4py_predict(self, X, "computeClassLogProbabilities")
|
|
338
|
-
X = self._validate_data(X, accept_sparse=False, reset=False)
|
|
339
|
-
if not hasattr(self, "_onedal_estimator"):
|
|
340
|
-
self._initialize_onedal_estimator()
|
|
341
|
-
self._onedal_estimator.coef_ = self.coef_
|
|
342
|
-
self._onedal_estimator.intercept_ = self.intercept_
|
|
343
322
|
|
|
323
|
+
X = self._validate_data(X, accept_sparse=False, reset=False)
|
|
324
|
+
assert hasattr(self, "_onedal_estimator")
|
|
344
325
|
return self._onedal_estimator.predict_log_proba(X, queue=queue)
|
|
345
326
|
|
|
346
327
|
else:
|
|
347
|
-
|
|
328
|
+
LogisticRegression = LogisticRegression_daal4py
|
|
348
329
|
|
|
349
330
|
logging.warning(
|
|
350
331
|
"Sklearnex LogisticRegression requires oneDAL version >= 2024.0.1 "
|
|
@@ -15,14 +15,79 @@
|
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
17
|
import numpy as np
|
|
18
|
+
import pytest
|
|
18
19
|
from numpy.testing import assert_allclose
|
|
19
|
-
from sklearn.datasets import load_iris
|
|
20
|
+
from sklearn.datasets import load_breast_cancer, load_iris
|
|
21
|
+
from sklearn.metrics import accuracy_score
|
|
22
|
+
from sklearn.model_selection import train_test_split
|
|
20
23
|
|
|
24
|
+
from daal4py.sklearn._utils import daal_check_version
|
|
25
|
+
from onedal.tests.utils._dataframes_support import (
|
|
26
|
+
_as_numpy,
|
|
27
|
+
_convert_to_dataframe,
|
|
28
|
+
get_dataframes_and_queues,
|
|
29
|
+
)
|
|
21
30
|
|
|
22
|
-
|
|
31
|
+
|
|
32
|
+
def prepare_input(X, y, dataframe, queue):
|
|
33
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
34
|
+
X, y, train_size=0.8, random_state=42
|
|
35
|
+
)
|
|
36
|
+
X_train = _convert_to_dataframe(X_train, sycl_queue=queue, target_df=dataframe)
|
|
37
|
+
y_train = _convert_to_dataframe(y_train, sycl_queue=queue, target_df=dataframe)
|
|
38
|
+
X_test = _convert_to_dataframe(X_test, sycl_queue=queue, target_df=dataframe)
|
|
39
|
+
return X_train, X_test, y_train, y_test
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@pytest.mark.parametrize(
|
|
43
|
+
"dataframe,queue",
|
|
44
|
+
get_dataframes_and_queues(device_filter_="cpu"),
|
|
45
|
+
)
|
|
46
|
+
def test_sklearnex_multiclass_classification(dataframe, queue):
|
|
23
47
|
from sklearnex.linear_model import LogisticRegression
|
|
24
48
|
|
|
25
49
|
X, y = load_iris(return_X_y=True)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
50
|
+
X_train, X_test, y_train, y_test = prepare_input(X, y, dataframe, queue)
|
|
51
|
+
|
|
52
|
+
logreg = LogisticRegression(fit_intercept=True, solver="lbfgs", max_iter=200).fit(
|
|
53
|
+
X_train, y_train
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
if daal_check_version((2024, "P", 1)):
|
|
57
|
+
assert "sklearnex" in logreg.__module__
|
|
58
|
+
else:
|
|
59
|
+
assert "daal4py" in logreg.__module__
|
|
60
|
+
|
|
61
|
+
y_pred = _as_numpy(logreg.predict(X_test))
|
|
62
|
+
assert accuracy_score(y_test, y_pred) > 0.99
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@pytest.mark.parametrize(
|
|
66
|
+
"dataframe,queue",
|
|
67
|
+
get_dataframes_and_queues(),
|
|
68
|
+
)
|
|
69
|
+
def test_sklearnex_binary_classification(dataframe, queue):
|
|
70
|
+
from sklearnex.linear_model import LogisticRegression
|
|
71
|
+
|
|
72
|
+
X, y = load_breast_cancer(return_X_y=True)
|
|
73
|
+
X_train, X_test, y_train, y_test = prepare_input(X, y, dataframe, queue)
|
|
74
|
+
|
|
75
|
+
logreg = LogisticRegression(fit_intercept=True, solver="newton-cg", max_iter=100).fit(
|
|
76
|
+
X_train, y_train
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if daal_check_version((2024, "P", 1)):
|
|
80
|
+
assert "sklearnex" in logreg.__module__
|
|
81
|
+
else:
|
|
82
|
+
assert "daal4py" in logreg.__module__
|
|
83
|
+
if (
|
|
84
|
+
dataframe != "numpy"
|
|
85
|
+
and queue is not None
|
|
86
|
+
and queue.sycl_device.is_gpu
|
|
87
|
+
and daal_check_version((2024, "P", 1))
|
|
88
|
+
):
|
|
89
|
+
# fit was done on gpu
|
|
90
|
+
assert hasattr(logreg, "_onedal_estimator")
|
|
91
|
+
|
|
92
|
+
y_pred = _as_numpy(logreg.predict(X_test))
|
|
93
|
+
assert accuracy_score(y_test, y_pred) > 0.95
|
sklearnex/neighbors/__init__.py
CHANGED
|
@@ -14,10 +14,10 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
|
+
from ._lof import LocalOutlierFactor
|
|
17
18
|
from .knn_classification import KNeighborsClassifier
|
|
18
19
|
from .knn_regression import KNeighborsRegressor
|
|
19
20
|
from .knn_unsupervised import NearestNeighbors
|
|
20
|
-
from .lof import LocalOutlierFactor
|
|
21
21
|
|
|
22
22
|
__all__ = [
|
|
23
23
|
"KNeighborsClassifier",
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
# ===============================================================================
|
|
2
|
+
# Copyright 2024 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ===============================================================================
|
|
16
|
+
|
|
17
|
+
import warnings
|
|
18
|
+
|
|
19
|
+
import numpy as np
|
|
20
|
+
from sklearn.neighbors import LocalOutlierFactor as sklearn_LocalOutlierFactor
|
|
21
|
+
from sklearn.utils.metaestimators import available_if
|
|
22
|
+
from sklearn.utils.validation import check_is_fitted
|
|
23
|
+
|
|
24
|
+
from daal4py.sklearn._n_jobs_support import control_n_jobs
|
|
25
|
+
from daal4py.sklearn._utils import sklearn_check_version
|
|
26
|
+
|
|
27
|
+
from .._device_offload import dispatch, wrap_output_data
|
|
28
|
+
from .common import KNeighborsDispatchingBase
|
|
29
|
+
from .knn_unsupervised import NearestNeighbors
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@control_n_jobs(decorated_methods=["fit", "kneighbors"])
|
|
33
|
+
class LocalOutlierFactor(KNeighborsDispatchingBase, sklearn_LocalOutlierFactor):
|
|
34
|
+
__doc__ = (
|
|
35
|
+
sklearn_LocalOutlierFactor.__doc__
|
|
36
|
+
+ "\n NOTE: When X=None, methods kneighbors, kneighbors_graph, and predict will"
|
|
37
|
+
+ "\n only output numpy arrays. In that case, the only way to offload to gpu"
|
|
38
|
+
+ "\n is to use a global queue (e.g. using config_context)"
|
|
39
|
+
)
|
|
40
|
+
if sklearn_check_version("1.2"):
|
|
41
|
+
_parameter_constraints: dict = {
|
|
42
|
+
**sklearn_LocalOutlierFactor._parameter_constraints
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
# Only certain methods should be taken from knn to prevent code
|
|
46
|
+
# duplication. Inheriting would yield a complicated inheritance
|
|
47
|
+
# structure and violate the sklearn inheritance path.
|
|
48
|
+
_save_attributes = NearestNeighbors._save_attributes
|
|
49
|
+
_onedal_knn_fit = NearestNeighbors._onedal_fit
|
|
50
|
+
_onedal_kneighbors = NearestNeighbors._onedal_kneighbors
|
|
51
|
+
|
|
52
|
+
def _onedal_fit(self, X, y, queue=None):
|
|
53
|
+
if sklearn_check_version("1.2"):
|
|
54
|
+
self._validate_params()
|
|
55
|
+
|
|
56
|
+
self._onedal_knn_fit(X, y, queue)
|
|
57
|
+
|
|
58
|
+
if self.contamination != "auto":
|
|
59
|
+
if not (0.0 < self.contamination <= 0.5):
|
|
60
|
+
raise ValueError(
|
|
61
|
+
"contamination must be in (0, 0.5], " "got: %f" % self.contamination
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
n_samples = self.n_samples_fit_
|
|
65
|
+
|
|
66
|
+
if self.n_neighbors > n_samples:
|
|
67
|
+
warnings.warn(
|
|
68
|
+
"n_neighbors (%s) is greater than the "
|
|
69
|
+
"total number of samples (%s). n_neighbors "
|
|
70
|
+
"will be set to (n_samples - 1) for estimation."
|
|
71
|
+
% (self.n_neighbors, n_samples)
|
|
72
|
+
)
|
|
73
|
+
self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))
|
|
74
|
+
|
|
75
|
+
(
|
|
76
|
+
self._distances_fit_X_,
|
|
77
|
+
_neighbors_indices_fit_X_,
|
|
78
|
+
) = self._onedal_kneighbors(n_neighbors=self.n_neighbors_, queue=queue)
|
|
79
|
+
|
|
80
|
+
# Sklearn includes a check for float32 at this point which may not be
|
|
81
|
+
# necessary for onedal
|
|
82
|
+
|
|
83
|
+
self._lrd = self._local_reachability_density(
|
|
84
|
+
self._distances_fit_X_, _neighbors_indices_fit_X_
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Compute lof score over training samples to define offset_:
|
|
88
|
+
lrd_ratios_array = self._lrd[_neighbors_indices_fit_X_] / self._lrd[:, np.newaxis]
|
|
89
|
+
|
|
90
|
+
self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)
|
|
91
|
+
|
|
92
|
+
if self.contamination == "auto":
|
|
93
|
+
# inliers score around -1 (the higher, the less abnormal).
|
|
94
|
+
self.offset_ = -1.5
|
|
95
|
+
else:
|
|
96
|
+
self.offset_ = np.percentile(
|
|
97
|
+
self.negative_outlier_factor_, 100.0 * self.contamination
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
return self
|
|
101
|
+
|
|
102
|
+
def fit(self, X, y=None):
|
|
103
|
+
self._fit_validation(X, y)
|
|
104
|
+
result = dispatch(
|
|
105
|
+
self,
|
|
106
|
+
"fit",
|
|
107
|
+
{
|
|
108
|
+
"onedal": self.__class__._onedal_fit,
|
|
109
|
+
"sklearn": sklearn_LocalOutlierFactor.fit,
|
|
110
|
+
},
|
|
111
|
+
X,
|
|
112
|
+
None,
|
|
113
|
+
)
|
|
114
|
+
return result
|
|
115
|
+
|
|
116
|
+
# Subtle order change to remove check_array and preserve dpnp and
|
|
117
|
+
# dpctl conformance. decision_function will return a dpnp or dpctl
|
|
118
|
+
# instance via kneighbors and an equivalent check_array exists in
|
|
119
|
+
# that call already in sklearn so no loss of functionality occurs
|
|
120
|
+
def _predict(self, X=None):
|
|
121
|
+
check_is_fitted(self)
|
|
122
|
+
|
|
123
|
+
if X is not None:
|
|
124
|
+
output = self.decision_function(X) < 0
|
|
125
|
+
is_inlier = np.ones(output.shape[0], dtype=int)
|
|
126
|
+
is_inlier[output] = -1
|
|
127
|
+
else:
|
|
128
|
+
is_inlier = np.ones(self.n_samples_fit_, dtype=int)
|
|
129
|
+
is_inlier[self.negative_outlier_factor_ < self.offset_] = -1
|
|
130
|
+
|
|
131
|
+
return is_inlier
|
|
132
|
+
|
|
133
|
+
# This had to be done because predict loses the queue when no
|
|
134
|
+
# argument is given and it is a dpctl tensor or dpnp array.
|
|
135
|
+
# This would cause issues in fit_predict. Also, available_if
|
|
136
|
+
# is hard to unwrap, and this is the most straighforward way.
|
|
137
|
+
@available_if(sklearn_LocalOutlierFactor._check_novelty_fit_predict)
|
|
138
|
+
@wrap_output_data
|
|
139
|
+
def fit_predict(self, X, y=None):
|
|
140
|
+
return self.fit(X)._predict()
|
|
141
|
+
|
|
142
|
+
@available_if(sklearn_LocalOutlierFactor._check_novelty_predict)
|
|
143
|
+
@wrap_output_data
|
|
144
|
+
def predict(self, X=None):
|
|
145
|
+
return self._predict(X)
|
|
146
|
+
|
|
147
|
+
@wrap_output_data
|
|
148
|
+
def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
|
|
149
|
+
check_is_fitted(self)
|
|
150
|
+
if sklearn_check_version("1.0") and X is not None:
|
|
151
|
+
self._check_feature_names(X, reset=False)
|
|
152
|
+
return dispatch(
|
|
153
|
+
self,
|
|
154
|
+
"kneighbors",
|
|
155
|
+
{
|
|
156
|
+
"onedal": self.__class__._onedal_kneighbors,
|
|
157
|
+
"sklearn": sklearn_LocalOutlierFactor.kneighbors,
|
|
158
|
+
},
|
|
159
|
+
X,
|
|
160
|
+
n_neighbors=n_neighbors,
|
|
161
|
+
return_distance=return_distance,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
fit.__doc__ = sklearn_LocalOutlierFactor.fit.__doc__
|
|
165
|
+
fit_predict.__doc__ = sklearn_LocalOutlierFactor.fit_predict.__doc__
|
|
166
|
+
predict.__doc__ = sklearn_LocalOutlierFactor.predict.__doc__
|
|
167
|
+
kneighbors.__doc__ = sklearn_LocalOutlierFactor.kneighbors.__doc__
|
|
@@ -20,7 +20,8 @@ from sklearn.neighbors._ball_tree import BallTree
|
|
|
20
20
|
from sklearn.neighbors._base import NeighborsBase as sklearn_NeighborsBase
|
|
21
21
|
from sklearn.neighbors._kd_tree import KDTree
|
|
22
22
|
|
|
23
|
-
from daal4py.sklearn.
|
|
23
|
+
from daal4py.sklearn._n_jobs_support import control_n_jobs
|
|
24
|
+
from daal4py.sklearn._utils import sklearn_check_version
|
|
24
25
|
|
|
25
26
|
if not sklearn_check_version("1.2"):
|
|
26
27
|
from sklearn.neighbors._base import _check_weights
|
|
@@ -140,7 +141,7 @@ else:
|
|
|
140
141
|
self.weights = _check_weights(weights)
|
|
141
142
|
|
|
142
143
|
|
|
143
|
-
@control_n_jobs
|
|
144
|
+
@control_n_jobs(decorated_methods=["fit", "predict", "predict_proba", "kneighbors"])
|
|
144
145
|
class KNeighborsClassifier(KNeighborsClassifier_, KNeighborsDispatchingBase):
|
|
145
146
|
if sklearn_check_version("1.2"):
|
|
146
147
|
_parameter_constraints: dict = {**KNeighborsClassifier_._parameter_constraints}
|
|
@@ -245,7 +246,7 @@ class KNeighborsClassifier(KNeighborsClassifier_, KNeighborsDispatchingBase):
|
|
|
245
246
|
@wrap_output_data
|
|
246
247
|
def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
|
|
247
248
|
check_is_fitted(self)
|
|
248
|
-
if sklearn_check_version("1.0"):
|
|
249
|
+
if sklearn_check_version("1.0") and X is not None:
|
|
249
250
|
self._check_feature_names(X, reset=False)
|
|
250
251
|
return dispatch(
|
|
251
252
|
self,
|
|
@@ -255,8 +256,8 @@ class KNeighborsClassifier(KNeighborsClassifier_, KNeighborsDispatchingBase):
|
|
|
255
256
|
"sklearn": sklearn_KNeighborsClassifier.kneighbors,
|
|
256
257
|
},
|
|
257
258
|
X,
|
|
258
|
-
n_neighbors,
|
|
259
|
-
return_distance,
|
|
259
|
+
n_neighbors=n_neighbors,
|
|
260
|
+
return_distance=return_distance,
|
|
260
261
|
)
|
|
261
262
|
|
|
262
263
|
@wrap_output_data
|
|
@@ -285,7 +286,6 @@ class KNeighborsClassifier(KNeighborsClassifier_, KNeighborsDispatchingBase):
|
|
|
285
286
|
|
|
286
287
|
return result
|
|
287
288
|
|
|
288
|
-
@run_with_n_jobs
|
|
289
289
|
def _onedal_fit(self, X, y, queue=None):
|
|
290
290
|
onedal_params = {
|
|
291
291
|
"n_neighbors": self.n_neighbors,
|
|
@@ -308,15 +308,12 @@ class KNeighborsClassifier(KNeighborsClassifier_, KNeighborsDispatchingBase):
|
|
|
308
308
|
|
|
309
309
|
self._save_attributes()
|
|
310
310
|
|
|
311
|
-
@run_with_n_jobs
|
|
312
311
|
def _onedal_predict(self, X, queue=None):
|
|
313
312
|
return self._onedal_estimator.predict(X, queue=queue)
|
|
314
313
|
|
|
315
|
-
@run_with_n_jobs
|
|
316
314
|
def _onedal_predict_proba(self, X, queue=None):
|
|
317
315
|
return self._onedal_estimator.predict_proba(X, queue=queue)
|
|
318
316
|
|
|
319
|
-
@run_with_n_jobs
|
|
320
317
|
def _onedal_kneighbors(
|
|
321
318
|
self, X=None, n_neighbors=None, return_distance=True, queue=None
|
|
322
319
|
):
|
|
@@ -20,7 +20,8 @@ from sklearn.neighbors._ball_tree import BallTree
|
|
|
20
20
|
from sklearn.neighbors._base import NeighborsBase as sklearn_NeighborsBase
|
|
21
21
|
from sklearn.neighbors._kd_tree import KDTree
|
|
22
22
|
|
|
23
|
-
from daal4py.sklearn.
|
|
23
|
+
from daal4py.sklearn._n_jobs_support import control_n_jobs
|
|
24
|
+
from daal4py.sklearn._utils import sklearn_check_version
|
|
24
25
|
|
|
25
26
|
if not sklearn_check_version("1.2"):
|
|
26
27
|
from sklearn.neighbors._base import _check_weights
|
|
@@ -136,7 +137,7 @@ else:
|
|
|
136
137
|
self.weights = _check_weights(weights)
|
|
137
138
|
|
|
138
139
|
|
|
139
|
-
@control_n_jobs
|
|
140
|
+
@control_n_jobs(decorated_methods=["fit", "predict", "kneighbors"])
|
|
140
141
|
class KNeighborsRegressor(KNeighborsRegressor_, KNeighborsDispatchingBase):
|
|
141
142
|
if sklearn_check_version("1.2"):
|
|
142
143
|
_parameter_constraints: dict = {**KNeighborsRegressor_._parameter_constraints}
|
|
@@ -226,7 +227,7 @@ class KNeighborsRegressor(KNeighborsRegressor_, KNeighborsDispatchingBase):
|
|
|
226
227
|
@wrap_output_data
|
|
227
228
|
def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
|
|
228
229
|
check_is_fitted(self)
|
|
229
|
-
if sklearn_check_version("1.0"):
|
|
230
|
+
if sklearn_check_version("1.0") and X is not None:
|
|
230
231
|
self._check_feature_names(X, reset=False)
|
|
231
232
|
return dispatch(
|
|
232
233
|
self,
|
|
@@ -236,8 +237,8 @@ class KNeighborsRegressor(KNeighborsRegressor_, KNeighborsDispatchingBase):
|
|
|
236
237
|
"sklearn": sklearn_KNeighborsRegressor.kneighbors,
|
|
237
238
|
},
|
|
238
239
|
X,
|
|
239
|
-
n_neighbors,
|
|
240
|
-
return_distance,
|
|
240
|
+
n_neighbors=n_neighbors,
|
|
241
|
+
return_distance=return_distance,
|
|
241
242
|
)
|
|
242
243
|
|
|
243
244
|
@wrap_output_data
|
|
@@ -266,7 +267,6 @@ class KNeighborsRegressor(KNeighborsRegressor_, KNeighborsDispatchingBase):
|
|
|
266
267
|
|
|
267
268
|
return result
|
|
268
269
|
|
|
269
|
-
@run_with_n_jobs
|
|
270
270
|
def _onedal_fit(self, X, y, queue=None):
|
|
271
271
|
onedal_params = {
|
|
272
272
|
"n_neighbors": self.n_neighbors,
|
|
@@ -289,11 +289,9 @@ class KNeighborsRegressor(KNeighborsRegressor_, KNeighborsDispatchingBase):
|
|
|
289
289
|
|
|
290
290
|
self._save_attributes()
|
|
291
291
|
|
|
292
|
-
@run_with_n_jobs
|
|
293
292
|
def _onedal_predict(self, X, queue=None):
|
|
294
293
|
return self._onedal_estimator.predict(X, queue=queue)
|
|
295
294
|
|
|
296
|
-
@run_with_n_jobs
|
|
297
295
|
def _onedal_kneighbors(
|
|
298
296
|
self, X=None, n_neighbors=None, return_distance=True, queue=None
|
|
299
297
|
):
|
|
@@ -30,7 +30,8 @@ from sklearn.neighbors._kd_tree import KDTree
|
|
|
30
30
|
from sklearn.neighbors._unsupervised import NearestNeighbors as sklearn_NearestNeighbors
|
|
31
31
|
from sklearn.utils.validation import _deprecate_positional_args, check_is_fitted
|
|
32
32
|
|
|
33
|
-
from daal4py.sklearn.
|
|
33
|
+
from daal4py.sklearn._n_jobs_support import control_n_jobs
|
|
34
|
+
from daal4py.sklearn._utils import sklearn_check_version
|
|
34
35
|
from onedal.neighbors import NearestNeighbors as onedal_NearestNeighbors
|
|
35
36
|
from onedal.utils import _check_array, _num_features, _num_samples
|
|
36
37
|
|
|
@@ -95,7 +96,7 @@ else:
|
|
|
95
96
|
)
|
|
96
97
|
|
|
97
98
|
|
|
98
|
-
@control_n_jobs
|
|
99
|
+
@control_n_jobs(decorated_methods=["fit", "kneighbors"])
|
|
99
100
|
class NearestNeighbors(NearestNeighbors_, KNeighborsDispatchingBase):
|
|
100
101
|
if sklearn_check_version("1.2"):
|
|
101
102
|
_parameter_constraints: dict = {**NearestNeighbors_._parameter_constraints}
|
|
@@ -150,8 +151,8 @@ class NearestNeighbors(NearestNeighbors_, KNeighborsDispatchingBase):
|
|
|
150
151
|
"sklearn": sklearn_NearestNeighbors.kneighbors,
|
|
151
152
|
},
|
|
152
153
|
X,
|
|
153
|
-
n_neighbors,
|
|
154
|
-
return_distance,
|
|
154
|
+
n_neighbors=n_neighbors,
|
|
155
|
+
return_distance=return_distance,
|
|
155
156
|
)
|
|
156
157
|
|
|
157
158
|
@wrap_output_data
|
|
@@ -180,7 +181,6 @@ class NearestNeighbors(NearestNeighbors_, KNeighborsDispatchingBase):
|
|
|
180
181
|
|
|
181
182
|
return result
|
|
182
183
|
|
|
183
|
-
@run_with_n_jobs
|
|
184
184
|
def _onedal_fit(self, X, y=None, queue=None):
|
|
185
185
|
onedal_params = {
|
|
186
186
|
"n_neighbors": self.n_neighbors,
|
|
@@ -202,11 +202,9 @@ class NearestNeighbors(NearestNeighbors_, KNeighborsDispatchingBase):
|
|
|
202
202
|
|
|
203
203
|
self._save_attributes()
|
|
204
204
|
|
|
205
|
-
@run_with_n_jobs
|
|
206
205
|
def _onedal_predict(self, X, queue=None):
|
|
207
206
|
return self._onedal_estimator.predict(X, queue=queue)
|
|
208
207
|
|
|
209
|
-
@run_with_n_jobs
|
|
210
208
|
def _onedal_kneighbors(
|
|
211
209
|
self, X=None, n_neighbors=None, return_distance=True, queue=None
|
|
212
210
|
):
|
|
@@ -23,11 +23,16 @@ from onedal.tests.utils._dataframes_support import (
|
|
|
23
23
|
_convert_to_dataframe,
|
|
24
24
|
get_dataframes_and_queues,
|
|
25
25
|
)
|
|
26
|
+
from sklearnex.neighbors import (
|
|
27
|
+
KNeighborsClassifier,
|
|
28
|
+
KNeighborsRegressor,
|
|
29
|
+
LocalOutlierFactor,
|
|
30
|
+
NearestNeighbors,
|
|
31
|
+
)
|
|
26
32
|
|
|
27
33
|
|
|
28
34
|
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
29
35
|
def test_sklearnex_import_knn_classifier(dataframe, queue):
|
|
30
|
-
from sklearnex.neighbors import KNeighborsClassifier
|
|
31
36
|
|
|
32
37
|
X = _convert_to_dataframe([[0], [1], [2], [3]], sycl_queue=queue, target_df=dataframe)
|
|
33
38
|
y = _convert_to_dataframe([0, 0, 1, 1], sycl_queue=queue, target_df=dataframe)
|
|
@@ -40,7 +45,6 @@ def test_sklearnex_import_knn_classifier(dataframe, queue):
|
|
|
40
45
|
|
|
41
46
|
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
42
47
|
def test_sklearnex_import_knn_regression(dataframe, queue):
|
|
43
|
-
from sklearnex.neighbors import KNeighborsRegressor
|
|
44
48
|
|
|
45
49
|
X = _convert_to_dataframe([[0], [1], [2], [3]], sycl_queue=queue, target_df=dataframe)
|
|
46
50
|
y = _convert_to_dataframe([0, 0, 1, 1], sycl_queue=queue, target_df=dataframe)
|
|
@@ -51,18 +55,17 @@ def test_sklearnex_import_knn_regression(dataframe, queue):
|
|
|
51
55
|
assert_allclose(pred, [0.5])
|
|
52
56
|
|
|
53
57
|
|
|
54
|
-
|
|
55
|
-
# investigate failure for `dpnp.ndarrays` and `dpctl.tensors`.
|
|
58
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
56
59
|
@pytest.mark.parametrize(
|
|
57
|
-
"
|
|
60
|
+
"estimator",
|
|
61
|
+
[LocalOutlierFactor, NearestNeighbors],
|
|
58
62
|
)
|
|
59
|
-
def
|
|
60
|
-
from sklearnex.neighbors import NearestNeighbors
|
|
63
|
+
def test_sklearnex_kneighbors(estimator, dataframe, queue):
|
|
61
64
|
|
|
62
65
|
X = [[0, 0, 2], [1, 0, 0], [0, 0, 1]]
|
|
63
66
|
X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
64
67
|
test = _convert_to_dataframe([[0, 0, 1.3]], sycl_queue=queue, target_df=dataframe)
|
|
65
|
-
neigh =
|
|
68
|
+
neigh = estimator(n_neighbors=2).fit(X)
|
|
66
69
|
result = neigh.kneighbors(test, 2, return_distance=False)
|
|
67
70
|
result = _as_numpy(result)
|
|
68
71
|
assert "sklearnex" in neigh.__module__
|
|
@@ -71,14 +74,12 @@ def test_sklearnex_import_nn(dataframe, queue):
|
|
|
71
74
|
|
|
72
75
|
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
73
76
|
def test_sklearnex_import_lof(dataframe, queue):
|
|
74
|
-
from sklearnex.neighbors import LocalOutlierFactor
|
|
75
77
|
|
|
76
78
|
X = [[7, 7, 7], [1, 0, 0], [0, 0, 1], [0, 0, 1]]
|
|
77
79
|
X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
78
80
|
lof = LocalOutlierFactor(n_neighbors=2)
|
|
79
81
|
result = lof.fit_predict(X)
|
|
80
82
|
result = _as_numpy(result)
|
|
81
|
-
assert hasattr(lof, "
|
|
83
|
+
assert hasattr(lof, "_onedal_estimator")
|
|
82
84
|
assert "sklearnex" in lof.__module__
|
|
83
|
-
assert "sklearnex" in lof._knn.__module__
|
|
84
85
|
assert_allclose(result, [-1, 1, 1, 1])
|
sklearnex/preview/__init__.py
CHANGED