scikit-learn-intelex 2024.3.0__py39-none-manylinux1_x86_64.whl → 2024.5.0__py39-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (43) hide show
  1. {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/METADATA +2 -2
  2. {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/RECORD +43 -37
  3. sklearnex/_device_offload.py +39 -5
  4. sklearnex/basic_statistics/__init__.py +2 -1
  5. sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
  6. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +384 -0
  7. sklearnex/covariance/incremental_covariance.py +217 -30
  8. sklearnex/covariance/tests/test_incremental_covariance.py +54 -17
  9. sklearnex/decomposition/pca.py +71 -19
  10. sklearnex/decomposition/tests/test_pca.py +2 -2
  11. sklearnex/dispatcher.py +33 -2
  12. sklearnex/ensemble/_forest.py +73 -79
  13. sklearnex/linear_model/__init__.py +5 -3
  14. sklearnex/linear_model/incremental_linear.py +387 -0
  15. sklearnex/linear_model/linear.py +275 -340
  16. sklearnex/linear_model/logistic_regression.py +50 -9
  17. sklearnex/linear_model/tests/test_incremental_linear.py +200 -0
  18. sklearnex/linear_model/tests/test_linear.py +40 -5
  19. sklearnex/neighbors/_lof.py +53 -36
  20. sklearnex/neighbors/common.py +4 -1
  21. sklearnex/neighbors/knn_classification.py +37 -122
  22. sklearnex/neighbors/knn_regression.py +10 -117
  23. sklearnex/neighbors/knn_unsupervised.py +6 -78
  24. sklearnex/neighbors/tests/test_neighbors.py +2 -2
  25. sklearnex/preview/cluster/k_means.py +5 -73
  26. sklearnex/preview/covariance/covariance.py +6 -5
  27. sklearnex/preview/covariance/tests/test_covariance.py +18 -5
  28. sklearnex/svm/_common.py +4 -7
  29. sklearnex/svm/nusvc.py +66 -50
  30. sklearnex/svm/nusvr.py +3 -49
  31. sklearnex/svm/svc.py +66 -51
  32. sklearnex/svm/svr.py +3 -49
  33. sklearnex/tests/_utils.py +34 -16
  34. sklearnex/tests/test_memory_usage.py +5 -1
  35. sklearnex/tests/test_n_jobs_support.py +12 -2
  36. sklearnex/tests/test_patching.py +87 -58
  37. sklearnex/tests/test_run_to_run_stability_tests.py +1 -1
  38. sklearnex/utils/__init__.py +2 -1
  39. sklearnex/utils/_namespace.py +97 -0
  40. sklearnex/utils/tests/test_finite.py +89 -0
  41. {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/LICENSE.txt +0 -0
  42. {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/WHEEL +0 -0
  43. {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/top_level.txt +0 -0
@@ -38,19 +38,27 @@ if daal_check_version((2024, "P", 1)):
38
38
  import numpy as np
39
39
  from scipy.sparse import issparse
40
40
  from sklearn.linear_model import LogisticRegression as sklearn_LogisticRegression
41
+ from sklearn.metrics import accuracy_score
42
+ from sklearn.utils.multiclass import type_of_target
41
43
  from sklearn.utils.validation import check_X_y
42
44
 
43
45
  from daal4py.sklearn._n_jobs_support import control_n_jobs
44
46
  from daal4py.sklearn._utils import sklearn_check_version
45
47
  from onedal.linear_model import LogisticRegression as onedal_LogisticRegression
46
- from onedal.utils import _num_features, _num_samples
48
+ from onedal.utils import _num_samples
47
49
 
48
50
  from .._device_offload import dispatch, wrap_output_data
49
51
  from .._utils import PatchingConditionsChain, get_patch_message
50
52
  from ..utils.validation import _assert_all_finite
51
53
 
52
54
  @control_n_jobs(
53
- decorated_methods=["fit", "predict", "predict_proba", "predict_log_proba"]
55
+ decorated_methods=[
56
+ "fit",
57
+ "predict",
58
+ "predict_proba",
59
+ "predict_log_proba",
60
+ "score",
61
+ ]
54
62
  )
55
63
  class LogisticRegression(sklearn_LogisticRegression, BaseLogisticRegression):
56
64
  __doc__ = sklearn_LogisticRegression.__doc__
@@ -72,9 +80,9 @@ if daal_check_version((2024, "P", 1)):
72
80
  intercept_scaling=1,
73
81
  class_weight=None,
74
82
  random_state=None,
75
- solver="lbfgs" if sklearn_check_version("0.22") else "liblinear",
83
+ solver="lbfgs",
76
84
  max_iter=100,
77
- multi_class="auto" if sklearn_check_version("0.22") else "ovr",
85
+ multi_class="auto",
78
86
  verbose=0,
79
87
  warm_start=False,
80
88
  n_jobs=None,
@@ -160,6 +168,27 @@ if daal_check_version((2024, "P", 1)):
160
168
  X,
161
169
  )
162
170
 
171
+ @wrap_output_data
172
+ def score(self, X, y, sample_weight=None):
173
+ if sklearn_check_version("1.0"):
174
+ self._check_feature_names(X, reset=False)
175
+ return dispatch(
176
+ self,
177
+ "score",
178
+ {
179
+ "onedal": self.__class__._onedal_score,
180
+ "sklearn": sklearn_LogisticRegression.score,
181
+ },
182
+ X,
183
+ y,
184
+ sample_weight=sample_weight,
185
+ )
186
+
187
+ def _onedal_score(self, X, y, sample_weight=None, queue=None):
188
+ return accuracy_score(
189
+ y, self._onedal_predict(X, queue=queue), sample_weight=sample_weight
190
+ )
191
+
163
192
  def _test_type_and_finiteness(self, X_in):
164
193
  X = np.asarray(X_in)
165
194
 
@@ -198,6 +227,10 @@ if daal_check_version((2024, "P", 1)):
198
227
  (self.warm_start == False, "Warm start is not supported."),
199
228
  (self.l1_ratio is None, "l1 ratio is not supported."),
200
229
  (sample_weight is None, "Sample weight is not supported."),
230
+ (
231
+ type_of_target(y) == "binary",
232
+ "Only binary classification is supported",
233
+ ),
201
234
  ]
202
235
  )
203
236
 
@@ -216,22 +249,29 @@ if daal_check_version((2024, "P", 1)):
216
249
  return patching_status
217
250
 
218
251
  def _onedal_gpu_predict_supported(self, method_name, *data):
219
- assert method_name in ["predict", "predict_proba", "predict_log_proba"]
220
- assert len(data) == 1
252
+ assert method_name in [
253
+ "predict",
254
+ "predict_proba",
255
+ "predict_log_proba",
256
+ "score",
257
+ ]
221
258
 
222
259
  class_name = self.__class__.__name__
223
260
  patching_status = PatchingConditionsChain(
224
261
  f"sklearn.linear_model.{class_name}.{method_name}"
225
262
  )
226
263
 
227
- n_samples = _num_samples(*data)
264
+ n_samples = _num_samples(data[0])
228
265
  model_is_sparse = issparse(self.coef_) or (
229
266
  self.fit_intercept and issparse(self.intercept_)
230
267
  )
231
268
  dal_ready = patching_status.and_conditions(
232
269
  [
233
270
  (n_samples > 0, "Number of samples is less than 1."),
234
- (not issparse(*data), "Sparse input is not supported."),
271
+ (
272
+ not any([issparse(i) for i in data]),
273
+ "Sparse input is not supported.",
274
+ ),
235
275
  (not model_is_sparse, "Sparse coefficients are not supported."),
236
276
  (
237
277
  hasattr(self, "_onedal_estimator"),
@@ -251,7 +291,7 @@ if daal_check_version((2024, "P", 1)):
251
291
  def _onedal_gpu_supported(self, method_name, *data):
252
292
  if method_name == "fit":
253
293
  return self._onedal_gpu_fit_supported(method_name, *data)
254
- if method_name in ["predict", "predict_proba", "predict_log_proba"]:
294
+ if method_name in ["predict", "predict_proba", "predict_log_proba", "score"]:
255
295
  return self._onedal_gpu_predict_supported(method_name, *data)
256
296
  raise RuntimeError(
257
297
  f"Unknown method {method_name} in {self.__class__.__name__}"
@@ -334,6 +374,7 @@ if daal_check_version((2024, "P", 1)):
334
374
  predict.__doc__ = sklearn_LogisticRegression.predict.__doc__
335
375
  predict_proba.__doc__ = sklearn_LogisticRegression.predict_proba.__doc__
336
376
  predict_log_proba.__doc__ = sklearn_LogisticRegression.predict_log_proba.__doc__
377
+ score.__doc__ = sklearn_LogisticRegression.score.__doc__
337
378
 
338
379
  else:
339
380
  LogisticRegression = LogisticRegression_daal4py
@@ -0,0 +1,200 @@
1
+ # ===============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.testing import assert_allclose
20
+
21
+ from onedal.tests.utils._dataframes_support import (
22
+ _as_numpy,
23
+ _convert_to_dataframe,
24
+ get_dataframes_and_queues,
25
+ )
26
+ from sklearnex.linear_model import IncrementalLinearRegression
27
+
28
+
29
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
30
+ @pytest.mark.parametrize("fit_intercept", [True, False])
31
+ @pytest.mark.parametrize("macro_block", [None, 1024])
32
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
33
+ def test_sklearnex_fit_on_gold_data(dataframe, queue, fit_intercept, macro_block, dtype):
34
+ X = np.array([[1], [2]])
35
+ X = X.astype(dtype=dtype)
36
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
37
+ y = np.array([1, 2])
38
+ y = y.astype(dtype=dtype)
39
+ y_df = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
40
+
41
+ inclin = IncrementalLinearRegression(fit_intercept=fit_intercept)
42
+ if macro_block is not None:
43
+ hparams = inclin.get_hyperparameters("fit")
44
+ hparams.cpu_macro_block = macro_block
45
+ hparams.gpu_macro_block = macro_block
46
+ inclin.fit(X_df, y_df)
47
+
48
+ y_pred = inclin.predict(X_df)
49
+
50
+ tol = 2e-6 if dtype == np.float32 else 1e-7
51
+ assert_allclose(inclin.coef_, [1], atol=tol)
52
+ if fit_intercept:
53
+ assert_allclose(inclin.intercept_, [0], atol=tol)
54
+ assert_allclose(_as_numpy(y_pred), y, atol=tol)
55
+
56
+
57
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
58
+ @pytest.mark.parametrize("fit_intercept", [True, False])
59
+ @pytest.mark.parametrize("macro_block", [None, 1024])
60
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
61
+ def test_sklearnex_partial_fit_on_gold_data(
62
+ dataframe, queue, fit_intercept, macro_block, dtype
63
+ ):
64
+ X = np.array([[1], [2], [3], [4]])
65
+ X = X.astype(dtype=dtype)
66
+ y = X + 3
67
+ y = y.astype(dtype=dtype)
68
+ X_split = np.array_split(X, 2)
69
+ y_split = np.array_split(y, 2)
70
+
71
+ inclin = IncrementalLinearRegression()
72
+ if macro_block is not None:
73
+ hparams = inclin.get_hyperparameters("fit")
74
+ hparams.cpu_macro_block = macro_block
75
+ hparams.gpu_macro_block = macro_block
76
+ for i in range(2):
77
+ X_split_df = _convert_to_dataframe(
78
+ X_split[i], sycl_queue=queue, target_df=dataframe
79
+ )
80
+ y_split_df = _convert_to_dataframe(
81
+ y_split[i], sycl_queue=queue, target_df=dataframe
82
+ )
83
+ inclin.partial_fit(X_split_df, y_split_df)
84
+
85
+ assert inclin.n_features_in_ == 1
86
+ tol = 2e-6 if dtype == np.float32 else 1e-7
87
+ assert_allclose(inclin.coef_, [[1]], atol=tol)
88
+ if fit_intercept:
89
+ assert_allclose(inclin.intercept_, 3, atol=tol)
90
+
91
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
92
+ y_pred = inclin.predict(X_df)
93
+
94
+ assert_allclose(_as_numpy(y_pred), y, atol=tol)
95
+
96
+
97
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
98
+ @pytest.mark.parametrize("fit_intercept", [True, False])
99
+ @pytest.mark.parametrize("macro_block", [None, 1024])
100
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
101
+ def test_sklearnex_partial_fit_multitarget_on_gold_data(
102
+ dataframe, queue, fit_intercept, macro_block, dtype
103
+ ):
104
+ X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
105
+ X = X.astype(dtype=dtype)
106
+ y = np.dot(X, [1, 2]) + 3
107
+ y = y.astype(dtype=dtype)
108
+ X_split = np.array_split(X, 2)
109
+ y_split = np.array_split(y, 2)
110
+
111
+ inclin = IncrementalLinearRegression()
112
+ if macro_block is not None:
113
+ hparams = inclin.get_hyperparameters("fit")
114
+ hparams.cpu_macro_block = macro_block
115
+ hparams.gpu_macro_block = macro_block
116
+ for i in range(2):
117
+ X_split_df = _convert_to_dataframe(
118
+ X_split[i], sycl_queue=queue, target_df=dataframe
119
+ )
120
+ y_split_df = _convert_to_dataframe(
121
+ y_split[i], sycl_queue=queue, target_df=dataframe
122
+ )
123
+ inclin.partial_fit(X_split_df, y_split_df)
124
+
125
+ assert inclin.n_features_in_ == 2
126
+ tol = 7e-6 if dtype == np.float32 else 1e-7
127
+ assert_allclose(inclin.coef_, [1.0, 2.0], atol=tol)
128
+ if fit_intercept:
129
+ assert_allclose(inclin.intercept_, 3.0, atol=tol)
130
+
131
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
132
+ y_pred = inclin.predict(X_df)
133
+
134
+ assert_allclose(_as_numpy(y_pred), y, atol=tol)
135
+
136
+
137
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
138
+ @pytest.mark.parametrize("fit_intercept", [True, False])
139
+ @pytest.mark.parametrize("num_samples", [100, 1000])
140
+ @pytest.mark.parametrize("num_features", [5, 10])
141
+ @pytest.mark.parametrize("num_targets", [1, 2])
142
+ @pytest.mark.parametrize("num_blocks", [1, 10])
143
+ @pytest.mark.parametrize("macro_block", [None, 1024])
144
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
145
+ def test_sklearnex_partial_fit_on_random_data(
146
+ dataframe,
147
+ queue,
148
+ fit_intercept,
149
+ num_samples,
150
+ num_features,
151
+ num_targets,
152
+ num_blocks,
153
+ macro_block,
154
+ dtype,
155
+ ):
156
+ seed = 42
157
+ gen = np.random.default_rng(seed)
158
+ intercept = gen.random(size=num_targets, dtype=dtype)
159
+ coef = gen.random(size=(num_targets, num_features), dtype=dtype).T
160
+
161
+ X = gen.random(size=(num_samples, num_features), dtype=dtype)
162
+ if fit_intercept:
163
+ y = X @ coef + intercept[np.newaxis, :]
164
+ else:
165
+ y = X @ coef
166
+
167
+ X_split = np.array_split(X, num_blocks)
168
+ y_split = np.array_split(y, num_blocks)
169
+
170
+ inclin = IncrementalLinearRegression(fit_intercept=fit_intercept)
171
+ if macro_block is not None:
172
+ hparams = inclin.get_hyperparameters("fit")
173
+ hparams.cpu_macro_block = macro_block
174
+ hparams.gpu_macro_block = macro_block
175
+ for i in range(num_blocks):
176
+ X_split_df = _convert_to_dataframe(
177
+ X_split[i], sycl_queue=queue, target_df=dataframe
178
+ )
179
+ y_split_df = _convert_to_dataframe(
180
+ y_split[i], sycl_queue=queue, target_df=dataframe
181
+ )
182
+ inclin.partial_fit(X_split_df, y_split_df)
183
+
184
+ tol = 1e-4 if dtype == np.float32 else 1e-7
185
+ assert_allclose(coef, inclin.coef_.T, atol=tol)
186
+
187
+ if fit_intercept:
188
+ assert_allclose(intercept, inclin.intercept_, atol=tol)
189
+
190
+ X_test = gen.random(size=(num_samples, num_features), dtype=dtype)
191
+ if fit_intercept:
192
+ expected_y_pred = X_test @ coef + intercept[np.newaxis, :]
193
+ else:
194
+ expected_y_pred = X_test @ coef
195
+
196
+ X_test_df = _convert_to_dataframe(X_test, sycl_queue=queue, target_df=dataframe)
197
+
198
+ y_pred = inclin.predict(X_test_df)
199
+
200
+ assert_allclose(expected_y_pred, _as_numpy(y_pred), atol=tol)
@@ -28,26 +28,33 @@ from onedal.tests.utils._dataframes_support import (
28
28
 
29
29
 
30
30
  @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
31
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
31
32
  @pytest.mark.parametrize("macro_block", [None, 1024])
32
- def test_sklearnex_import_linear(dataframe, queue, macro_block):
33
+ def test_sklearnex_import_linear(dataframe, queue, dtype, macro_block):
33
34
  from sklearnex.linear_model import LinearRegression
34
35
 
35
36
  X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
36
37
  y = np.dot(X, np.array([1, 2])) + 3
38
+ X = X.astype(dtype=dtype)
39
+ y = y.astype(dtype=dtype)
37
40
  X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
38
41
  y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
42
+
39
43
  linreg = LinearRegression()
40
44
  if daal_check_version((2024, "P", 0)) and macro_block is not None:
41
45
  hparams = linreg.get_hyperparameters("fit")
42
46
  hparams.cpu_macro_block = macro_block
43
47
  hparams.gpu_macro_block = macro_block
48
+
44
49
  linreg.fit(X, y)
45
- if daal_check_version((2023, "P", 100)):
46
- assert hasattr(linreg, "_onedal_estimator")
50
+
51
+ assert hasattr(linreg, "_onedal_estimator")
47
52
  assert "sklearnex" in linreg.__module__
48
53
  assert linreg.n_features_in_ == 2
49
- assert_allclose(_as_numpy(linreg.intercept_), 3.0)
50
- assert_allclose(_as_numpy(linreg.coef_), [1.0, 2.0])
54
+
55
+ tol = 1e-5 if dtype == np.float32 else 1e-7
56
+ assert_allclose(_as_numpy(linreg.intercept_), 3.0, rtol=tol)
57
+ assert_allclose(_as_numpy(linreg.coef_), [1.0, 2.0], rtol=tol)
51
58
 
52
59
 
53
60
  def test_sklearnex_import_ridge():
@@ -80,3 +87,31 @@ def test_sklearnex_import_elastic():
80
87
  assert "daal4py" in elasticnet.__module__
81
88
  assert_allclose(elasticnet.intercept_, 1.451, atol=1e-3)
82
89
  assert_allclose(elasticnet.coef_, [18.838, 64.559], atol=1e-3)
90
+
91
+
92
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
93
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
94
+ def test_sklearnex_reconstruct_model(dataframe, queue, dtype):
95
+ from sklearnex.linear_model import LinearRegression
96
+
97
+ seed = 42
98
+ num_samples = 3500
99
+ num_features, num_targets = 14, 9
100
+
101
+ gen = np.random.default_rng(seed)
102
+ intercept = gen.random(size=num_targets, dtype=dtype)
103
+ coef = gen.random(size=(num_targets, num_features), dtype=dtype).T
104
+
105
+ X = gen.random(size=(num_samples, num_features), dtype=dtype)
106
+ gtr = X @ coef + intercept[np.newaxis, :]
107
+
108
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
109
+
110
+ linreg = LinearRegression(fit_intercept=True)
111
+ linreg.coef_ = coef.T
112
+ linreg.intercept_ = intercept
113
+
114
+ y_pred = linreg.predict(X)
115
+
116
+ tol = 1e-5 if dtype == np.float32 else 1e-7
117
+ assert_allclose(gtr, _as_numpy(y_pred), rtol=tol)
@@ -23,13 +23,13 @@ from sklearn.utils.validation import check_is_fitted
23
23
 
24
24
  from daal4py.sklearn._n_jobs_support import control_n_jobs
25
25
  from daal4py.sklearn._utils import sklearn_check_version
26
+ from sklearnex._device_offload import dispatch, wrap_output_data
27
+ from sklearnex.neighbors.common import KNeighborsDispatchingBase
28
+ from sklearnex.neighbors.knn_unsupervised import NearestNeighbors
29
+ from sklearnex.utils import get_namespace
26
30
 
27
- from .._device_offload import dispatch, wrap_output_data
28
- from .common import KNeighborsDispatchingBase
29
- from .knn_unsupervised import NearestNeighbors
30
31
 
31
-
32
- @control_n_jobs(decorated_methods=["fit", "kneighbors"])
32
+ @control_n_jobs(decorated_methods=["fit", "_kneighbors"])
33
33
  class LocalOutlierFactor(KNeighborsDispatchingBase, sklearn_LocalOutlierFactor):
34
34
  __doc__ = (
35
35
  sklearn_LocalOutlierFactor.__doc__
@@ -100,7 +100,6 @@ class LocalOutlierFactor(KNeighborsDispatchingBase, sklearn_LocalOutlierFactor):
100
100
  return self
101
101
 
102
102
  def fit(self, X, y=None):
103
- self._fit_validation(X, y)
104
103
  result = dispatch(
105
104
  self,
106
105
  "fit",
@@ -113,16 +112,13 @@ class LocalOutlierFactor(KNeighborsDispatchingBase, sklearn_LocalOutlierFactor):
113
112
  )
114
113
  return result
115
114
 
116
- # Subtle order change to remove check_array and preserve dpnp and
117
- # dpctl conformance. decision_function will return a dpnp or dpctl
118
- # instance via kneighbors and an equivalent check_array exists in
119
- # that call already in sklearn so no loss of functionality occurs
120
115
  def _predict(self, X=None):
121
116
  check_is_fitted(self)
122
117
 
123
118
  if X is not None:
119
+ xp, _ = get_namespace(X)
124
120
  output = self.decision_function(X) < 0
125
- is_inlier = np.ones(output.shape[0], dtype=int)
121
+ is_inlier = xp.ones_like(output, dtype=int)
126
122
  is_inlier[output] = -1
127
123
  else:
128
124
  is_inlier = np.ones(self.n_samples_fit_, dtype=int)
@@ -159,16 +155,40 @@ class LocalOutlierFactor(KNeighborsDispatchingBase, sklearn_LocalOutlierFactor):
159
155
  """
160
156
  return self.fit(X)._predict()
161
157
 
162
- @available_if(sklearn_LocalOutlierFactor._check_novelty_predict)
158
+ def _kneighbors(self, X=None, n_neighbors=None, return_distance=True):
159
+ check_is_fitted(self)
160
+ if sklearn_check_version("1.0") and X is not None:
161
+ self._check_feature_names(X, reset=False)
162
+ return dispatch(
163
+ self,
164
+ "kneighbors",
165
+ {
166
+ "onedal": self.__class__._onedal_kneighbors,
167
+ "sklearn": sklearn_LocalOutlierFactor.kneighbors,
168
+ },
169
+ X,
170
+ n_neighbors=n_neighbors,
171
+ return_distance=return_distance,
172
+ )
173
+
174
+ kneighbors = wrap_output_data(_kneighbors)
175
+
176
+ @available_if(sklearn_LocalOutlierFactor._check_novelty_score_samples)
163
177
  @wrap_output_data
164
- def predict(self, X=None):
165
- """Predict the labels (1 inlier, -1 outlier) of X according to LOF.
178
+ def score_samples(self, X):
179
+ """Opposite of the Local Outlier Factor of X.
180
+
181
+ It is the opposite as bigger is better, i.e. large values correspond
182
+ to inliers.
166
183
 
167
184
  **Only available for novelty detection (when novelty is set to True).**
168
- This method allows to generalize prediction to *new observations* (not
169
- in the training set). Note that the result of ``clf.fit(X)`` then
170
- ``clf.predict(X)`` with ``novelty=True`` may differ from the result
171
- obtained by ``clf.fit_predict(X)`` with ``novelty=False``.
185
+ The argument X is supposed to contain *new data*: if X contains a
186
+ point from training, it considers the later in its own neighborhood.
187
+ Also, the samples in X are not considered in the neighborhood of any
188
+ point. Because of this, the scores obtained via ``score_samples`` may
189
+ differ from the standard LOF scores.
190
+ The standard LOF scores for the training data is available via the
191
+ ``negative_outlier_factor_`` attribute.
172
192
 
173
193
  Parameters
174
194
  ----------
@@ -178,27 +198,24 @@ class LocalOutlierFactor(KNeighborsDispatchingBase, sklearn_LocalOutlierFactor):
178
198
 
179
199
  Returns
180
200
  -------
181
- is_inlier : ndarray of shape (n_samples,)
182
- Returns -1 for anomalies/outliers and +1 for inliers.
201
+ opposite_lof_scores : ndarray of shape (n_samples,)
202
+ The opposite of the Local Outlier Factor of each input samples.
203
+ The lower, the more abnormal.
183
204
  """
184
- return self._predict(X)
185
-
186
- @wrap_output_data
187
- def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
188
205
  check_is_fitted(self)
189
- if sklearn_check_version("1.0") and X is not None:
190
- self._check_feature_names(X, reset=False)
191
- return dispatch(
192
- self,
193
- "kneighbors",
194
- {
195
- "onedal": self.__class__._onedal_kneighbors,
196
- "sklearn": sklearn_LocalOutlierFactor.kneighbors,
197
- },
198
- X,
199
- n_neighbors=n_neighbors,
200
- return_distance=return_distance,
206
+
207
+ distances_X, neighbors_indices_X = self._kneighbors(
208
+ X, n_neighbors=self.n_neighbors_
201
209
  )
202
210
 
211
+ X_lrd = self._local_reachability_density(
212
+ distances_X,
213
+ neighbors_indices_X,
214
+ )
215
+
216
+ lrd_ratios_array = self._lrd[neighbors_indices_X] / X_lrd[:, np.newaxis]
217
+
218
+ return -np.mean(lrd_ratios_array, axis=1)
219
+
203
220
  fit.__doc__ = sklearn_LocalOutlierFactor.fit.__doc__
204
221
  kneighbors.__doc__ = sklearn_LocalOutlierFactor.kneighbors.__doc__
@@ -137,6 +137,9 @@ class KNeighborsDispatchingBase:
137
137
  self.n_features_in_ = X.data.shape[1]
138
138
 
139
139
  def _onedal_supported(self, device, method_name, *data):
140
+ if method_name == "fit":
141
+ self._fit_validation(data[0], data[1])
142
+
140
143
  class_name = self.__class__.__name__
141
144
  is_classifier = "Classifier" in class_name
142
145
  is_regressor = "Regressor" in class_name
@@ -249,7 +252,7 @@ class KNeighborsDispatchingBase:
249
252
  class_count >= 2, "One-class case is not supported."
250
253
  )
251
254
  return patching_status
252
- if method_name in ["predict", "predict_proba", "kneighbors"]:
255
+ if method_name in ["predict", "predict_proba", "kneighbors", "score"]:
253
256
  patching_status.and_condition(
254
257
  hasattr(self, "_onedal_estimator"), "oneDAL model was not trained."
255
258
  )