scikit-learn-intelex 2024.1.0__py311-none-manylinux1_x86_64.whl → 2024.4.0__py311-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (62) hide show
  1. {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/METADATA +2 -2
  2. scikit_learn_intelex-2024.4.0.dist-info/RECORD +101 -0
  3. sklearnex/__init__.py +9 -7
  4. sklearnex/_device_offload.py +31 -4
  5. sklearnex/basic_statistics/__init__.py +2 -1
  6. sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
  7. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +386 -0
  8. sklearnex/cluster/dbscan.py +6 -4
  9. sklearnex/conftest.py +63 -0
  10. sklearnex/{preview/decomposition → covariance}/__init__.py +19 -19
  11. sklearnex/covariance/incremental_covariance.py +130 -0
  12. sklearnex/covariance/tests/test_incremental_covariance.py +143 -0
  13. sklearnex/decomposition/pca.py +319 -1
  14. sklearnex/decomposition/tests/test_pca.py +34 -5
  15. sklearnex/dispatcher.py +93 -61
  16. sklearnex/ensemble/_forest.py +81 -97
  17. sklearnex/ensemble/tests/test_forest.py +15 -19
  18. sklearnex/linear_model/__init__.py +1 -2
  19. sklearnex/linear_model/linear.py +275 -347
  20. sklearnex/{preview/linear_model → linear_model}/logistic_regression.py +83 -50
  21. sklearnex/linear_model/tests/test_linear.py +40 -5
  22. sklearnex/linear_model/tests/test_logreg.py +70 -7
  23. sklearnex/neighbors/__init__.py +1 -1
  24. sklearnex/neighbors/_lof.py +221 -0
  25. sklearnex/neighbors/common.py +4 -1
  26. sklearnex/neighbors/knn_classification.py +47 -137
  27. sklearnex/neighbors/knn_regression.py +20 -132
  28. sklearnex/neighbors/knn_unsupervised.py +16 -93
  29. sklearnex/neighbors/tests/test_neighbors.py +12 -16
  30. sklearnex/preview/__init__.py +1 -1
  31. sklearnex/preview/cluster/k_means.py +8 -81
  32. sklearnex/preview/covariance/covariance.py +51 -16
  33. sklearnex/preview/covariance/tests/test_covariance.py +18 -5
  34. sklearnex/spmd/__init__.py +1 -0
  35. sklearnex/{preview/linear_model → spmd/covariance}/__init__.py +5 -5
  36. sklearnex/spmd/covariance/covariance.py +21 -0
  37. sklearnex/spmd/ensemble/forest.py +4 -12
  38. sklearnex/spmd/linear_model/__init__.py +2 -1
  39. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  40. sklearnex/svm/_common.py +4 -7
  41. sklearnex/svm/nusvc.py +74 -55
  42. sklearnex/svm/nusvr.py +9 -56
  43. sklearnex/svm/svc.py +74 -56
  44. sklearnex/svm/svr.py +6 -53
  45. sklearnex/tests/_utils.py +164 -0
  46. sklearnex/tests/test_memory_usage.py +9 -7
  47. sklearnex/tests/test_monkeypatch.py +179 -138
  48. sklearnex/tests/test_n_jobs_support.py +77 -9
  49. sklearnex/tests/test_parallel.py +6 -8
  50. sklearnex/tests/test_patching.py +338 -89
  51. sklearnex/utils/__init__.py +2 -1
  52. sklearnex/utils/_namespace.py +97 -0
  53. scikit_learn_intelex-2024.1.0.dist-info/RECORD +0 -97
  54. sklearnex/neighbors/lof.py +0 -436
  55. sklearnex/preview/decomposition/pca.py +0 -376
  56. sklearnex/preview/decomposition/tests/test_preview_pca.py +0 -42
  57. sklearnex/preview/linear_model/tests/test_preview_logistic_regression.py +0 -59
  58. sklearnex/tests/_models_info.py +0 -170
  59. sklearnex/tests/utils/_launch_algorithms.py +0 -118
  60. {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/LICENSE.txt +0 -0
  61. {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/WHEEL +0 -0
  62. {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  # ===============================================================================
2
- # Copyright 2023 Intel Corporation
2
+ # Copyright 2024 Intel Corporation
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
5
  # you may not use this file except in compliance with the License.
@@ -17,14 +17,11 @@
17
17
  import logging
18
18
  from abc import ABC
19
19
 
20
- import sklearn.linear_model._logistic as logistic_module
21
-
22
20
  from daal4py.sklearn._utils import daal_check_version
23
21
  from daal4py.sklearn.linear_model.logistic_path import (
24
- LogisticRegression,
25
- daal4py_predict,
26
- logistic_regression_path,
22
+ LogisticRegression as LogisticRegression_daal4py,
27
23
  )
24
+ from daal4py.sklearn.linear_model.logistic_path import daal4py_fit, daal4py_predict
28
25
 
29
26
 
30
27
  class BaseLogisticRegression(ABC):
@@ -41,16 +38,28 @@ if daal_check_version((2024, "P", 1)):
41
38
  import numpy as np
42
39
  from scipy.sparse import issparse
43
40
  from sklearn.linear_model import LogisticRegression as sklearn_LogisticRegression
41
+ from sklearn.metrics import accuracy_score
42
+ from sklearn.utils.multiclass import type_of_target
44
43
  from sklearn.utils.validation import check_X_y
45
44
 
45
+ from daal4py.sklearn._n_jobs_support import control_n_jobs
46
46
  from daal4py.sklearn._utils import sklearn_check_version
47
47
  from onedal.linear_model import LogisticRegression as onedal_LogisticRegression
48
- from onedal.utils import _num_features, _num_samples
49
-
50
- from ..._device_offload import dispatch, wrap_output_data
51
- from ..._utils import PatchingConditionsChain, get_patch_message
52
- from ...utils.validation import _assert_all_finite
53
-
48
+ from onedal.utils import _num_samples
49
+
50
+ from .._device_offload import dispatch, wrap_output_data
51
+ from .._utils import PatchingConditionsChain, get_patch_message
52
+ from ..utils.validation import _assert_all_finite
53
+
54
+ @control_n_jobs(
55
+ decorated_methods=[
56
+ "fit",
57
+ "predict",
58
+ "predict_proba",
59
+ "predict_log_proba",
60
+ "score",
61
+ ]
62
+ )
54
63
  class LogisticRegression(sklearn_LogisticRegression, BaseLogisticRegression):
55
64
  __doc__ = sklearn_LogisticRegression.__doc__
56
65
  intercept_, coef_, n_iter_ = None, None, None
@@ -71,9 +80,9 @@ if daal_check_version((2024, "P", 1)):
71
80
  intercept_scaling=1,
72
81
  class_weight=None,
73
82
  random_state=None,
74
- solver="lbfgs" if sklearn_check_version("0.22") else "liblinear",
83
+ solver="lbfgs",
75
84
  max_iter=100,
76
- multi_class="auto" if sklearn_check_version("0.22") else "ovr",
85
+ multi_class="auto",
77
86
  verbose=0,
78
87
  warm_start=False,
79
88
  n_jobs=None,
@@ -97,6 +106,8 @@ if daal_check_version((2024, "P", 1)):
97
106
  l1_ratio=l1_ratio,
98
107
  )
99
108
 
109
+ _onedal_cpu_fit = daal4py_fit
110
+
100
111
  def fit(self, X, y, sample_weight=None):
101
112
  if sklearn_check_version("1.0"):
102
113
  self._check_feature_names(X, reset=True)
@@ -157,13 +168,32 @@ if daal_check_version((2024, "P", 1)):
157
168
  X,
158
169
  )
159
170
 
171
+ @wrap_output_data
172
+ def score(self, X, y, sample_weight=None):
173
+ if sklearn_check_version("1.0"):
174
+ self._check_feature_names(X, reset=False)
175
+ return dispatch(
176
+ self,
177
+ "score",
178
+ {
179
+ "onedal": self.__class__._onedal_score,
180
+ "sklearn": sklearn_LogisticRegression.score,
181
+ },
182
+ X,
183
+ y,
184
+ sample_weight=sample_weight,
185
+ )
186
+
187
+ def _onedal_score(self, X, y, sample_weight=None, queue=None):
188
+ return accuracy_score(
189
+ y, self._onedal_predict(X, queue=queue), sample_weight=sample_weight
190
+ )
191
+
160
192
  def _test_type_and_finiteness(self, X_in):
161
193
  X = np.asarray(X_in)
162
194
 
163
- dtype = X.dtype
164
- if "complex" in str(type(dtype)):
195
+ if np.iscomplexobj(X):
165
196
  return False
166
-
167
197
  try:
168
198
  _assert_all_finite(X)
169
199
  except BaseException:
@@ -184,7 +214,10 @@ if daal_check_version((2024, "P", 1)):
184
214
  [
185
215
  (self.penalty == "l2", "Only l2 penalty is supported."),
186
216
  (self.dual == False, "dual=True is not supported."),
187
- (self.intercept_scaling == 1, "Intercept scaling is not supported."),
217
+ (
218
+ self.intercept_scaling == 1,
219
+ "Intercept scaling is not supported.",
220
+ ),
188
221
  (self.class_weight is None, "Class weight is not supported"),
189
222
  (self.solver == "newton-cg", "Only newton-cg solver is supported."),
190
223
  (
@@ -194,6 +227,10 @@ if daal_check_version((2024, "P", 1)):
194
227
  (self.warm_start == False, "Warm start is not supported."),
195
228
  (self.l1_ratio is None, "l1 ratio is not supported."),
196
229
  (sample_weight is None, "Sample weight is not supported."),
230
+ (
231
+ type_of_target(y) == "binary",
232
+ "Only binary classification is supported",
233
+ ),
197
234
  ]
198
235
  )
199
236
 
@@ -212,24 +249,34 @@ if daal_check_version((2024, "P", 1)):
212
249
  return patching_status
213
250
 
214
251
  def _onedal_gpu_predict_supported(self, method_name, *data):
215
- assert method_name in ["predict", "predict_proba", "predict_log_proba"]
216
- assert len(data) == 1
252
+ assert method_name in [
253
+ "predict",
254
+ "predict_proba",
255
+ "predict_log_proba",
256
+ "score",
257
+ ]
217
258
 
218
259
  class_name = self.__class__.__name__
219
260
  patching_status = PatchingConditionsChain(
220
261
  f"sklearn.linear_model.{class_name}.{method_name}"
221
262
  )
222
263
 
223
- n_samples = _num_samples(*data)
264
+ n_samples = _num_samples(data[0])
224
265
  model_is_sparse = issparse(self.coef_) or (
225
266
  self.fit_intercept and issparse(self.intercept_)
226
267
  )
227
268
  dal_ready = patching_status.and_conditions(
228
269
  [
229
270
  (n_samples > 0, "Number of samples is less than 1."),
230
- (not issparse(*data), "Sparse input is not supported."),
271
+ (
272
+ not any([issparse(i) for i in data]),
273
+ "Sparse input is not supported.",
274
+ ),
231
275
  (not model_is_sparse, "Sparse coefficients are not supported."),
232
- (hasattr(self, "_onedal_estimator"), "oneDAL model was not trained."),
276
+ (
277
+ hasattr(self, "_onedal_estimator"),
278
+ "oneDAL model was not trained.",
279
+ ),
233
280
  ]
234
281
  )
235
282
  if not dal_ready:
@@ -244,7 +291,7 @@ if daal_check_version((2024, "P", 1)):
244
291
  def _onedal_gpu_supported(self, method_name, *data):
245
292
  if method_name == "fit":
246
293
  return self._onedal_gpu_fit_supported(method_name, *data)
247
- if method_name in ["predict", "predict_proba", "predict_log_proba"]:
294
+ if method_name in ["predict", "predict_proba", "predict_log_proba", "score"]:
248
295
  return self._onedal_gpu_predict_supported(method_name, *data)
249
296
  raise RuntimeError(
250
297
  f"Unknown method {method_name} in {self.__class__.__name__}"
@@ -268,15 +315,6 @@ if daal_check_version((2024, "P", 1)):
268
315
  }
269
316
  self._onedal_estimator = onedal_LogisticRegression(**onedal_params)
270
317
 
271
- def _onedal_cpu_fit(self, X, y, sample_weight):
272
- which, what = logistic_module, "_logistic_regression_path"
273
- replacer = logistic_regression_path
274
- descriptor = getattr(which, what, None)
275
- setattr(which, what, replacer)
276
- clf = super().fit(X, y, sample_weight)
277
- setattr(which, what, descriptor)
278
- return clf
279
-
280
318
  def _onedal_fit(self, X, y, sample_weight, queue=None):
281
319
  if queue is None or queue.sycl_device.is_cpu:
282
320
  return self._onedal_cpu_fit(X, y, sample_weight)
@@ -313,38 +351,33 @@ if daal_check_version((2024, "P", 1)):
313
351
  return daal4py_predict(self, X, "computeClassLabels")
314
352
 
315
353
  X = self._validate_data(X, accept_sparse=False, reset=False)
316
- if not hasattr(self, "_onedal_estimator"):
317
- self._initialize_onedal_estimator()
318
- self._onedal_estimator.coef_ = self.coef_
319
- self._onedal_estimator.intercept_ = self.intercept_
320
- self._onedal_estimator.classes_ = self.classes_
321
-
354
+ assert hasattr(self, "_onedal_estimator")
322
355
  return self._onedal_estimator.predict(X, queue=queue)
323
356
 
324
357
  def _onedal_predict_proba(self, X, queue=None):
325
358
  if queue is None or queue.sycl_device.is_cpu:
326
359
  return daal4py_predict(self, X, "computeClassProbabilities")
327
- X = self._validate_data(X, accept_sparse=False, reset=False)
328
- if not hasattr(self, "_onedal_estimator"):
329
- self._initialize_onedal_estimator()
330
- self._onedal_estimator.coef_ = self.coef_
331
- self._onedal_estimator.intercept_ = self.intercept_
332
360
 
361
+ X = self._validate_data(X, accept_sparse=False, reset=False)
362
+ assert hasattr(self, "_onedal_estimator")
333
363
  return self._onedal_estimator.predict_proba(X, queue=queue)
334
364
 
335
365
  def _onedal_predict_log_proba(self, X, queue=None):
336
366
  if queue is None or queue.sycl_device.is_cpu:
337
367
  return daal4py_predict(self, X, "computeClassLogProbabilities")
338
- X = self._validate_data(X, accept_sparse=False, reset=False)
339
- if not hasattr(self, "_onedal_estimator"):
340
- self._initialize_onedal_estimator()
341
- self._onedal_estimator.coef_ = self.coef_
342
- self._onedal_estimator.intercept_ = self.intercept_
343
368
 
369
+ X = self._validate_data(X, accept_sparse=False, reset=False)
370
+ assert hasattr(self, "_onedal_estimator")
344
371
  return self._onedal_estimator.predict_log_proba(X, queue=queue)
345
372
 
373
+ fit.__doc__ = sklearn_LogisticRegression.fit.__doc__
374
+ predict.__doc__ = sklearn_LogisticRegression.predict.__doc__
375
+ predict_proba.__doc__ = sklearn_LogisticRegression.predict_proba.__doc__
376
+ predict_log_proba.__doc__ = sklearn_LogisticRegression.predict_log_proba.__doc__
377
+ score.__doc__ = sklearn_LogisticRegression.score.__doc__
378
+
346
379
  else:
347
- from daal4py.sklearn.linear_model import LogisticRegression
380
+ LogisticRegression = LogisticRegression_daal4py
348
381
 
349
382
  logging.warning(
350
383
  "Sklearnex LogisticRegression requires oneDAL version >= 2024.0.1 "
@@ -28,26 +28,33 @@ from onedal.tests.utils._dataframes_support import (
28
28
 
29
29
 
30
30
  @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
31
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
31
32
  @pytest.mark.parametrize("macro_block", [None, 1024])
32
- def test_sklearnex_import_linear(dataframe, queue, macro_block):
33
+ def test_sklearnex_import_linear(dataframe, queue, dtype, macro_block):
33
34
  from sklearnex.linear_model import LinearRegression
34
35
 
35
36
  X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
36
37
  y = np.dot(X, np.array([1, 2])) + 3
38
+ X = X.astype(dtype=dtype)
39
+ y = y.astype(dtype=dtype)
37
40
  X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
38
41
  y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
42
+
39
43
  linreg = LinearRegression()
40
44
  if daal_check_version((2024, "P", 0)) and macro_block is not None:
41
45
  hparams = linreg.get_hyperparameters("fit")
42
46
  hparams.cpu_macro_block = macro_block
43
47
  hparams.gpu_macro_block = macro_block
48
+
44
49
  linreg.fit(X, y)
45
- if daal_check_version((2023, "P", 100)):
46
- assert hasattr(linreg, "_onedal_estimator")
50
+
51
+ assert hasattr(linreg, "_onedal_estimator")
47
52
  assert "sklearnex" in linreg.__module__
48
53
  assert linreg.n_features_in_ == 2
49
- assert_allclose(_as_numpy(linreg.intercept_), 3.0)
50
- assert_allclose(_as_numpy(linreg.coef_), [1.0, 2.0])
54
+
55
+ tol = 1e-5 if X.dtype == np.float32 else 1e-7
56
+ assert_allclose(_as_numpy(linreg.intercept_), 3.0, rtol=tol)
57
+ assert_allclose(_as_numpy(linreg.coef_), [1.0, 2.0], rtol=tol)
51
58
 
52
59
 
53
60
  def test_sklearnex_import_ridge():
@@ -80,3 +87,31 @@ def test_sklearnex_import_elastic():
80
87
  assert "daal4py" in elasticnet.__module__
81
88
  assert_allclose(elasticnet.intercept_, 1.451, atol=1e-3)
82
89
  assert_allclose(elasticnet.coef_, [18.838, 64.559], atol=1e-3)
90
+
91
+
92
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
93
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
94
+ def test_sklearnex_reconstruct_model(dataframe, queue, dtype):
95
+ from sklearnex.linear_model import LinearRegression
96
+
97
+ seed = 42
98
+ num_samples = 3500
99
+ num_features, num_targets = 14, 9
100
+
101
+ gen = np.random.default_rng(seed)
102
+ intercept = gen.random(size=num_targets, dtype=dtype)
103
+ coef = gen.random(size=(num_targets, num_features), dtype=dtype).T
104
+
105
+ X = gen.random(size=(num_samples, num_features), dtype=dtype)
106
+ gtr = X @ coef + intercept[np.newaxis, :]
107
+
108
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
109
+
110
+ linreg = LinearRegression(fit_intercept=True)
111
+ linreg.coef_ = coef.T
112
+ linreg.intercept_ = intercept
113
+
114
+ y_pred = linreg.predict(X)
115
+
116
+ tol = 1e-5 if X.dtype == np.float32 else 1e-7
117
+ assert_allclose(gtr, _as_numpy(y_pred), rtol=tol)
@@ -14,15 +14,78 @@
14
14
  # limitations under the License.
15
15
  # ===============================================================================
16
16
 
17
- import numpy as np
18
- from numpy.testing import assert_allclose
19
- from sklearn.datasets import load_iris
17
+ import pytest
18
+ from sklearn.datasets import load_breast_cancer, load_iris
19
+ from sklearn.metrics import accuracy_score
20
+ from sklearn.model_selection import train_test_split
20
21
 
22
+ from daal4py.sklearn._utils import daal_check_version
23
+ from onedal.tests.utils._dataframes_support import (
24
+ _as_numpy,
25
+ _convert_to_dataframe,
26
+ get_dataframes_and_queues,
27
+ )
21
28
 
22
- def test_sklearnex_import():
29
+
30
+ def prepare_input(X, y, dataframe, queue):
31
+ X_train, X_test, y_train, y_test = train_test_split(
32
+ X, y, train_size=0.8, random_state=42
33
+ )
34
+ X_train = _convert_to_dataframe(X_train, sycl_queue=queue, target_df=dataframe)
35
+ y_train = _convert_to_dataframe(y_train, sycl_queue=queue, target_df=dataframe)
36
+ X_test = _convert_to_dataframe(X_test, sycl_queue=queue, target_df=dataframe)
37
+ return X_train, X_test, y_train, y_test
38
+
39
+
40
+ @pytest.mark.parametrize(
41
+ "dataframe,queue",
42
+ get_dataframes_and_queues(device_filter_="cpu"),
43
+ )
44
+ def test_sklearnex_multiclass_classification(dataframe, queue):
23
45
  from sklearnex.linear_model import LogisticRegression
24
46
 
25
47
  X, y = load_iris(return_X_y=True)
26
- logreg = LogisticRegression(random_state=0, max_iter=200).fit(X, y)
27
- assert "daal4py" in logreg.__module__
28
- assert_allclose(logreg.score(X, y), 0.9733, atol=1e-3)
48
+ X_train, X_test, y_train, y_test = prepare_input(X, y, dataframe, queue)
49
+
50
+ logreg = LogisticRegression(fit_intercept=True, solver="lbfgs", max_iter=200).fit(
51
+ X_train, y_train
52
+ )
53
+
54
+ if daal_check_version((2024, "P", 1)):
55
+ assert "sklearnex" in logreg.__module__
56
+ else:
57
+ assert "daal4py" in logreg.__module__
58
+
59
+ y_pred = _as_numpy(logreg.predict(X_test))
60
+ assert accuracy_score(y_test, y_pred) > 0.99
61
+
62
+
63
+ @pytest.mark.parametrize(
64
+ "dataframe,queue",
65
+ get_dataframes_and_queues(),
66
+ )
67
+ def test_sklearnex_binary_classification(dataframe, queue):
68
+ from sklearnex.linear_model import LogisticRegression
69
+
70
+ X, y = load_breast_cancer(return_X_y=True)
71
+ X_train, X_test, y_train, y_test = prepare_input(X, y, dataframe, queue)
72
+
73
+ logreg = LogisticRegression(fit_intercept=True, solver="newton-cg", max_iter=100).fit(
74
+ X_train, y_train
75
+ )
76
+
77
+ if daal_check_version((2024, "P", 1)):
78
+ assert "sklearnex" in logreg.__module__
79
+ else:
80
+ assert "daal4py" in logreg.__module__
81
+ if (
82
+ dataframe != "numpy"
83
+ and queue is not None
84
+ and queue.sycl_device.is_gpu
85
+ and daal_check_version((2024, "P", 1))
86
+ ):
87
+ # fit was done on gpu
88
+ assert hasattr(logreg, "_onedal_estimator")
89
+
90
+ y_pred = _as_numpy(logreg.predict(X_test))
91
+ assert accuracy_score(y_test, y_pred) > 0.95
@@ -14,10 +14,10 @@
14
14
  # limitations under the License.
15
15
  # ===============================================================================
16
16
 
17
+ from ._lof import LocalOutlierFactor
17
18
  from .knn_classification import KNeighborsClassifier
18
19
  from .knn_regression import KNeighborsRegressor
19
20
  from .knn_unsupervised import NearestNeighbors
20
- from .lof import LocalOutlierFactor
21
21
 
22
22
  __all__ = [
23
23
  "KNeighborsClassifier",
@@ -0,0 +1,221 @@
1
+ # ===============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import warnings
18
+
19
+ import numpy as np
20
+ from sklearn.neighbors import LocalOutlierFactor as sklearn_LocalOutlierFactor
21
+ from sklearn.utils.metaestimators import available_if
22
+ from sklearn.utils.validation import check_is_fitted
23
+
24
+ from daal4py.sklearn._n_jobs_support import control_n_jobs
25
+ from daal4py.sklearn._utils import sklearn_check_version
26
+ from sklearnex._device_offload import dispatch, wrap_output_data
27
+ from sklearnex.neighbors.common import KNeighborsDispatchingBase
28
+ from sklearnex.neighbors.knn_unsupervised import NearestNeighbors
29
+ from sklearnex.utils import get_namespace
30
+
31
+
32
+ @control_n_jobs(decorated_methods=["fit", "_kneighbors"])
33
+ class LocalOutlierFactor(KNeighborsDispatchingBase, sklearn_LocalOutlierFactor):
34
+ __doc__ = (
35
+ sklearn_LocalOutlierFactor.__doc__
36
+ + "\n NOTE: When X=None, methods kneighbors, kneighbors_graph, and predict will"
37
+ + "\n only output numpy arrays. In that case, the only way to offload to gpu"
38
+ + "\n is to use a global queue (e.g. using config_context)"
39
+ )
40
+ if sklearn_check_version("1.2"):
41
+ _parameter_constraints: dict = {
42
+ **sklearn_LocalOutlierFactor._parameter_constraints
43
+ }
44
+
45
+ # Only certain methods should be taken from knn to prevent code
46
+ # duplication. Inheriting would yield a complicated inheritance
47
+ # structure and violate the sklearn inheritance path.
48
+ _save_attributes = NearestNeighbors._save_attributes
49
+ _onedal_knn_fit = NearestNeighbors._onedal_fit
50
+ _onedal_kneighbors = NearestNeighbors._onedal_kneighbors
51
+
52
+ def _onedal_fit(self, X, y, queue=None):
53
+ if sklearn_check_version("1.2"):
54
+ self._validate_params()
55
+
56
+ self._onedal_knn_fit(X, y, queue)
57
+
58
+ if self.contamination != "auto":
59
+ if not (0.0 < self.contamination <= 0.5):
60
+ raise ValueError(
61
+ "contamination must be in (0, 0.5], " "got: %f" % self.contamination
62
+ )
63
+
64
+ n_samples = self.n_samples_fit_
65
+
66
+ if self.n_neighbors > n_samples:
67
+ warnings.warn(
68
+ "n_neighbors (%s) is greater than the "
69
+ "total number of samples (%s). n_neighbors "
70
+ "will be set to (n_samples - 1) for estimation."
71
+ % (self.n_neighbors, n_samples)
72
+ )
73
+ self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))
74
+
75
+ (
76
+ self._distances_fit_X_,
77
+ _neighbors_indices_fit_X_,
78
+ ) = self._onedal_kneighbors(n_neighbors=self.n_neighbors_, queue=queue)
79
+
80
+ # Sklearn includes a check for float32 at this point which may not be
81
+ # necessary for onedal
82
+
83
+ self._lrd = self._local_reachability_density(
84
+ self._distances_fit_X_, _neighbors_indices_fit_X_
85
+ )
86
+
87
+ # Compute lof score over training samples to define offset_:
88
+ lrd_ratios_array = self._lrd[_neighbors_indices_fit_X_] / self._lrd[:, np.newaxis]
89
+
90
+ self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)
91
+
92
+ if self.contamination == "auto":
93
+ # inliers score around -1 (the higher, the less abnormal).
94
+ self.offset_ = -1.5
95
+ else:
96
+ self.offset_ = np.percentile(
97
+ self.negative_outlier_factor_, 100.0 * self.contamination
98
+ )
99
+
100
+ return self
101
+
102
+ def fit(self, X, y=None):
103
+ result = dispatch(
104
+ self,
105
+ "fit",
106
+ {
107
+ "onedal": self.__class__._onedal_fit,
108
+ "sklearn": sklearn_LocalOutlierFactor.fit,
109
+ },
110
+ X,
111
+ None,
112
+ )
113
+ return result
114
+
115
+ def _predict(self, X=None):
116
+ check_is_fitted(self)
117
+
118
+ if X is not None:
119
+ xp, _ = get_namespace(X)
120
+ output = self.decision_function(X) < 0
121
+ is_inlier = xp.ones_like(output, dtype=int)
122
+ is_inlier[output] = -1
123
+ else:
124
+ is_inlier = np.ones(self.n_samples_fit_, dtype=int)
125
+ is_inlier[self.negative_outlier_factor_ < self.offset_] = -1
126
+
127
+ return is_inlier
128
+
129
+ # This had to be done because predict loses the queue when no
130
+ # argument is given and it is a dpctl tensor or dpnp array.
131
+ # This would cause issues in fit_predict. Also, available_if
132
+ # is hard to unwrap, and this is the most straighforward way.
133
+ @available_if(sklearn_LocalOutlierFactor._check_novelty_fit_predict)
134
+ @wrap_output_data
135
+ def fit_predict(self, X, y=None):
136
+ """Fit the model to the training set X and return the labels.
137
+
138
+ **Not available for novelty detection (when novelty is set to True).**
139
+ Label is 1 for an inlier and -1 for an outlier according to the LOF
140
+ score and the contamination parameter.
141
+
142
+ Parameters
143
+ ----------
144
+ X : {array-like, sparse matrix} of shape (n_samples, n_features), default=None
145
+ The query sample or samples to compute the Local Outlier Factor
146
+ w.r.t. the training samples.
147
+
148
+ y : Ignored
149
+ Not used, present for API consistency by convention.
150
+
151
+ Returns
152
+ -------
153
+ is_inlier : ndarray of shape (n_samples,)
154
+ Returns -1 for anomalies/outliers and 1 for inliers.
155
+ """
156
+ return self.fit(X)._predict()
157
+
158
+ def _kneighbors(self, X=None, n_neighbors=None, return_distance=True):
159
+ check_is_fitted(self)
160
+ if sklearn_check_version("1.0") and X is not None:
161
+ self._check_feature_names(X, reset=False)
162
+ return dispatch(
163
+ self,
164
+ "kneighbors",
165
+ {
166
+ "onedal": self.__class__._onedal_kneighbors,
167
+ "sklearn": sklearn_LocalOutlierFactor.kneighbors,
168
+ },
169
+ X,
170
+ n_neighbors=n_neighbors,
171
+ return_distance=return_distance,
172
+ )
173
+
174
+ kneighbors = wrap_output_data(_kneighbors)
175
+
176
+ @available_if(sklearn_LocalOutlierFactor._check_novelty_score_samples)
177
+ @wrap_output_data
178
+ def score_samples(self, X):
179
+ """Opposite of the Local Outlier Factor of X.
180
+
181
+ It is the opposite as bigger is better, i.e. large values correspond
182
+ to inliers.
183
+
184
+ **Only available for novelty detection (when novelty is set to True).**
185
+ The argument X is supposed to contain *new data*: if X contains a
186
+ point from training, it considers the later in its own neighborhood.
187
+ Also, the samples in X are not considered in the neighborhood of any
188
+ point. Because of this, the scores obtained via ``score_samples`` may
189
+ differ from the standard LOF scores.
190
+ The standard LOF scores for the training data is available via the
191
+ ``negative_outlier_factor_`` attribute.
192
+
193
+ Parameters
194
+ ----------
195
+ X : {array-like, sparse matrix} of shape (n_samples, n_features)
196
+ The query sample or samples to compute the Local Outlier Factor
197
+ w.r.t. the training samples.
198
+
199
+ Returns
200
+ -------
201
+ opposite_lof_scores : ndarray of shape (n_samples,)
202
+ The opposite of the Local Outlier Factor of each input samples.
203
+ The lower, the more abnormal.
204
+ """
205
+ check_is_fitted(self)
206
+
207
+ distances_X, neighbors_indices_X = self._kneighbors(
208
+ X, n_neighbors=self.n_neighbors_
209
+ )
210
+
211
+ X_lrd = self._local_reachability_density(
212
+ distances_X,
213
+ neighbors_indices_X,
214
+ )
215
+
216
+ lrd_ratios_array = self._lrd[neighbors_indices_X] / X_lrd[:, np.newaxis]
217
+
218
+ return -np.mean(lrd_ratios_array, axis=1)
219
+
220
+ fit.__doc__ = sklearn_LocalOutlierFactor.fit.__doc__
221
+ kneighbors.__doc__ = sklearn_LocalOutlierFactor.kneighbors.__doc__
@@ -137,6 +137,9 @@ class KNeighborsDispatchingBase:
137
137
  self.n_features_in_ = X.data.shape[1]
138
138
 
139
139
  def _onedal_supported(self, device, method_name, *data):
140
+ if method_name == "fit":
141
+ self._fit_validation(data[0], data[1])
142
+
140
143
  class_name = self.__class__.__name__
141
144
  is_classifier = "Classifier" in class_name
142
145
  is_regressor = "Regressor" in class_name
@@ -249,7 +252,7 @@ class KNeighborsDispatchingBase:
249
252
  class_count >= 2, "One-class case is not supported."
250
253
  )
251
254
  return patching_status
252
- if method_name in ["predict", "predict_proba", "kneighbors"]:
255
+ if method_name in ["predict", "predict_proba", "kneighbors", "score"]:
253
256
  patching_status.and_condition(
254
257
  hasattr(self, "_onedal_estimator"), "oneDAL model was not trained."
255
258
  )