scikit-learn-intelex 2024.6.0__py311-none-manylinux1_x86_64.whl → 2024.7.0__py311-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (55) hide show
  1. {scikit_learn_intelex-2024.6.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/METADATA +2 -2
  2. {scikit_learn_intelex-2024.6.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/RECORD +55 -41
  3. sklearnex/_config.py +3 -15
  4. sklearnex/_device_offload.py +9 -168
  5. sklearnex/basic_statistics/basic_statistics.py +127 -1
  6. sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
  7. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +1 -1
  8. sklearnex/cluster/dbscan.py +0 -1
  9. sklearnex/cluster/k_means.py +8 -0
  10. sklearnex/cluster/tests/test_kmeans.py +15 -3
  11. sklearnex/covariance/incremental_covariance.py +64 -13
  12. sklearnex/covariance/tests/test_incremental_covariance.py +35 -0
  13. sklearnex/decomposition/pca.py +25 -1
  14. sklearnex/dispatcher.py +94 -0
  15. sklearnex/ensemble/_forest.py +8 -35
  16. sklearnex/ensemble/tests/test_forest.py +9 -12
  17. sklearnex/linear_model/coordinate_descent.py +13 -0
  18. sklearnex/linear_model/linear.py +2 -34
  19. sklearnex/linear_model/logistic_regression.py +79 -59
  20. sklearnex/linear_model/ridge.py +7 -0
  21. sklearnex/linear_model/tests/test_linear.py +28 -3
  22. sklearnex/linear_model/tests/test_logreg.py +45 -3
  23. sklearnex/manifold/t_sne.py +4 -0
  24. sklearnex/metrics/pairwise.py +5 -0
  25. sklearnex/metrics/ranking.py +3 -0
  26. sklearnex/model_selection/split.py +3 -0
  27. sklearnex/neighbors/_lof.py +9 -0
  28. sklearnex/neighbors/common.py +45 -1
  29. sklearnex/neighbors/knn_classification.py +1 -20
  30. sklearnex/neighbors/knn_regression.py +1 -20
  31. sklearnex/neighbors/knn_unsupervised.py +31 -7
  32. sklearnex/preview/__init__.py +1 -1
  33. sklearnex/preview/linear_model/__init__.py +19 -0
  34. sklearnex/preview/linear_model/ridge.py +419 -0
  35. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  36. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  37. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  38. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  39. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  40. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  41. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  42. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  43. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +163 -0
  44. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  45. sklearnex/svm/_common.py +19 -21
  46. sklearnex/svm/tests/test_svm.py +12 -20
  47. sklearnex/tests/_utils.py +143 -20
  48. sklearnex/tests/_utils_spmd.py +185 -0
  49. sklearnex/tests/test_config.py +4 -0
  50. sklearnex/tests/test_monkeypatch.py +12 -4
  51. sklearnex/tests/test_patching.py +16 -13
  52. sklearnex/tests/test_run_to_run_stability.py +21 -9
  53. {scikit_learn_intelex-2024.6.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/LICENSE.txt +0 -0
  54. {scikit_learn_intelex-2024.6.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/WHEEL +0 -0
  55. {scikit_learn_intelex-2024.6.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/top_level.txt +0 -0
@@ -38,7 +38,12 @@ from sklearn.tree import (
38
38
  )
39
39
  from sklearn.tree._tree import Tree
40
40
  from sklearn.utils import check_random_state, deprecated
41
- from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
41
+ from sklearn.utils.validation import (
42
+ _check_sample_weight,
43
+ check_array,
44
+ check_is_fitted,
45
+ check_X_y,
46
+ )
42
47
 
43
48
  from daal4py.sklearn._n_jobs_support import control_n_jobs
44
49
  from daal4py.sklearn._utils import (
@@ -70,7 +75,7 @@ class BaseForest(ABC):
70
75
  X, y = self._validate_data(
71
76
  X,
72
77
  y,
73
- multi_output=False,
78
+ multi_output=True,
74
79
  accept_sparse=False,
75
80
  dtype=[np.float64, np.float32],
76
81
  force_all_finite=False,
@@ -78,7 +83,7 @@ class BaseForest(ABC):
78
83
  )
79
84
 
80
85
  if sample_weight is not None:
81
- sample_weight = self.check_sample_weight(sample_weight, X)
86
+ sample_weight = _check_sample_weight(sample_weight, X)
82
87
 
83
88
  if y.ndim == 2 and y.shape[1] == 1:
84
89
  warnings.warn(
@@ -289,38 +294,6 @@ class BaseForest(ABC):
289
294
  "min_bin_size must be integral number but was " "%r" % self.min_bin_size
290
295
  )
291
296
 
292
- def check_sample_weight(self, sample_weight, X, dtype=None):
293
- n_samples = _num_samples(X)
294
-
295
- if dtype is not None and dtype not in [np.float32, np.float64]:
296
- dtype = np.float64
297
-
298
- if sample_weight is None:
299
- sample_weight = np.ones(n_samples, dtype=dtype)
300
- elif isinstance(sample_weight, numbers.Number):
301
- sample_weight = np.full(n_samples, sample_weight, dtype=dtype)
302
- else:
303
- if dtype is None:
304
- dtype = [np.float64, np.float32]
305
- sample_weight = check_array(
306
- sample_weight,
307
- accept_sparse=False,
308
- ensure_2d=False,
309
- dtype=dtype,
310
- order="C",
311
- force_all_finite=False,
312
- )
313
- if sample_weight.ndim != 1:
314
- raise ValueError("Sample weights must be 1D array or scalar")
315
-
316
- if sample_weight.shape != (n_samples,):
317
- raise ValueError(
318
- "sample_weight.shape == {}, expected {}!".format(
319
- sample_weight.shape, (n_samples,)
320
- )
321
- )
322
- return sample_weight
323
-
324
297
  @property
325
298
  def estimators_(self):
326
299
  if hasattr(self, "_cached_estimators_"):
@@ -46,11 +46,10 @@ def test_sklearnex_import_rf_classifier(dataframe, queue):
46
46
  assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]])))
47
47
 
48
48
 
49
- # TODO: fix RF regressor predict for the GPU sycl_queue.
50
- @pytest.mark.parametrize(
51
- "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
52
- )
49
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
53
50
  def test_sklearnex_import_rf_regression(dataframe, queue):
51
+ if queue and queue.sycl_device.is_gpu:
52
+ pytest.skip("RF regressor predict for the GPU sycl_queue is buggy.")
54
53
  from sklearnex.ensemble import RandomForestRegressor
55
54
 
56
55
  X, y = make_regression(n_features=4, n_informative=2, random_state=0, shuffle=False)
@@ -69,11 +68,10 @@ def test_sklearnex_import_rf_regression(dataframe, queue):
69
68
  assert_allclose([-6.839], pred, atol=1e-2)
70
69
 
71
70
 
72
- # TODO: fix ET classifier predict for the GPU sycl_queue.
73
- @pytest.mark.parametrize(
74
- "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
75
- )
71
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
76
72
  def test_sklearnex_import_et_classifier(dataframe, queue):
73
+ if queue and queue.sycl_device.is_gpu:
74
+ pytest.skip("ET classifier predict for the GPU sycl_queue is buggy.")
77
75
  from sklearnex.ensemble import ExtraTreesClassifier
78
76
 
79
77
  X, y = make_classification(
@@ -93,11 +91,10 @@ def test_sklearnex_import_et_classifier(dataframe, queue):
93
91
  assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]])))
94
92
 
95
93
 
96
- # TODO: fix ET regressor predict for the GPU sycl_queue.
97
- @pytest.mark.parametrize(
98
- "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
99
- )
94
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
100
95
  def test_sklearnex_import_et_regression(dataframe, queue):
96
+ if queue and queue.sycl_device.is_gpu:
97
+ pytest.skip("ET regressor predict for the GPU sycl_queue is buggy.")
101
98
  from sklearnex.ensemble import ExtraTreesRegressor
102
99
 
103
100
  X, y = make_regression(n_features=1, random_state=0, shuffle=False)
@@ -15,3 +15,16 @@
15
15
  # ===============================================================================
16
16
 
17
17
  from daal4py.sklearn.linear_model import ElasticNet, Lasso
18
+ from onedal._device_offload import support_usm_ndarray
19
+
20
+ # Note: `sklearnex.linear_model.ElasticNet` only has functional
21
+ # sycl GPU support. No GPU device will be offloaded.
22
+ ElasticNet.fit = support_usm_ndarray(queue_param=False)(ElasticNet.fit)
23
+ ElasticNet.predict = support_usm_ndarray(queue_param=False)(ElasticNet.predict)
24
+ ElasticNet.score = support_usm_ndarray(queue_param=False)(ElasticNet.score)
25
+
26
+ # Note: `sklearnex.linear_model.Lasso` only has functional
27
+ # sycl GPU support. No GPU device will be offloaded.
28
+ Lasso.fit = support_usm_ndarray(queue_param=False)(Lasso.fit)
29
+ Lasso.predict = support_usm_ndarray(queue_param=False)(Lasso.predict)
30
+ Lasso.score = support_usm_ndarray(queue_param=False)(Lasso.score)
@@ -27,7 +27,6 @@ from daal4py.sklearn._utils import sklearn_check_version
27
27
 
28
28
  from .._device_offload import dispatch, wrap_output_data
29
29
  from .._utils import PatchingConditionsChain, get_patch_message, register_hyperparameters
30
- from ..utils.validation import _assert_all_finite
31
30
 
32
31
  if sklearn_check_version("1.0") and not sklearn_check_version("1.2"):
33
32
  from sklearn.linear_model._base import _deprecate_normalize
@@ -138,19 +137,6 @@ class LinearRegression(sklearn_LinearRegression):
138
137
  sample_weight=sample_weight,
139
138
  )
140
139
 
141
- def _test_type_and_finiteness(self, X_in):
142
- X = X_in if isinstance(X_in, np.ndarray) else np.asarray(X_in)
143
-
144
- dtype = X.dtype
145
- if "complex" in str(type(dtype)):
146
- return False
147
-
148
- try:
149
- _assert_all_finite(X)
150
- except BaseException:
151
- return False
152
- return True
153
-
154
140
  def _onedal_fit_supported(self, method_name, *data):
155
141
  assert method_name == "fit"
156
142
  assert len(data) == 3
@@ -174,7 +160,7 @@ class LinearRegression(sklearn_LinearRegression):
174
160
  # Check if equations are well defined
175
161
  is_underdetermined = n_samples < (n_features + int(self.fit_intercept))
176
162
 
177
- dal_ready = patching_status.and_conditions(
163
+ patching_status.and_conditions(
178
164
  [
179
165
  (sample_weight is None, "Sample weight is not supported."),
180
166
  (
@@ -193,17 +179,6 @@ class LinearRegression(sklearn_LinearRegression):
193
179
  ),
194
180
  ]
195
181
  )
196
- if not dal_ready:
197
- return patching_status
198
-
199
- if not patching_status.and_condition(
200
- self._test_type_and_finiteness(X), "Input X is not supported."
201
- ):
202
- return patching_status
203
-
204
- patching_status.and_condition(
205
- self._test_type_and_finiteness(y), "Input y is not supported."
206
- )
207
182
 
208
183
  return patching_status
209
184
 
@@ -217,19 +192,13 @@ class LinearRegression(sklearn_LinearRegression):
217
192
  model_is_sparse = issparse(self.coef_) or (
218
193
  self.fit_intercept and issparse(self.intercept_)
219
194
  )
220
- dal_ready = patching_status.and_conditions(
195
+ patching_status.and_conditions(
221
196
  [
222
197
  (n_samples > 0, "Number of samples is less than 1."),
223
198
  (not issparse(data[0]), "Sparse input is not supported."),
224
199
  (not model_is_sparse, "Sparse coefficients are not supported."),
225
200
  ]
226
201
  )
227
- if not dal_ready:
228
- return patching_status
229
-
230
- patching_status.and_condition(
231
- self._test_type_and_finiteness(data[0]), "Input X is not supported."
232
- )
233
202
 
234
203
  return patching_status
235
204
 
@@ -257,7 +226,6 @@ class LinearRegression(sklearn_LinearRegression):
257
226
  "accept_sparse": ["csr", "csc", "coo"],
258
227
  "y_numeric": True,
259
228
  "multi_output": True,
260
- "force_all_finite": False,
261
229
  }
262
230
  if sklearn_check_version("1.2"):
263
231
  X, y = self._validate_data(**check_params)
@@ -28,7 +28,7 @@ if daal_check_version((2024, "P", 1)):
28
28
  from sklearn.linear_model import LogisticRegression as sklearn_LogisticRegression
29
29
  from sklearn.metrics import accuracy_score
30
30
  from sklearn.utils.multiclass import type_of_target
31
- from sklearn.utils.validation import check_X_y
31
+ from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
32
32
 
33
33
  from daal4py.sklearn._n_jobs_support import control_n_jobs
34
34
  from daal4py.sklearn._utils import sklearn_check_version
@@ -38,7 +38,8 @@ if daal_check_version((2024, "P", 1)):
38
38
 
39
39
  from .._device_offload import dispatch, wrap_output_data
40
40
  from .._utils import PatchingConditionsChain, get_patch_message
41
- from ..utils.validation import _assert_all_finite
41
+
42
+ _sparsity_enabled = daal_check_version((2024, "P", 700))
42
43
 
43
44
  class BaseLogisticRegression(ABC):
44
45
  def _save_attributes(self):
@@ -107,8 +108,6 @@ if daal_check_version((2024, "P", 1)):
107
108
  _onedal_cpu_fit = daal4py_fit
108
109
 
109
110
  def fit(self, X, y, sample_weight=None):
110
- if sklearn_check_version("1.0"):
111
- self._check_feature_names(X, reset=True)
112
111
  if sklearn_check_version("1.2"):
113
112
  self._validate_params()
114
113
  dispatch(
@@ -126,8 +125,6 @@ if daal_check_version((2024, "P", 1)):
126
125
 
127
126
  @wrap_output_data
128
127
  def predict(self, X):
129
- if sklearn_check_version("1.0"):
130
- self._check_feature_names(X, reset=False)
131
128
  return dispatch(
132
129
  self,
133
130
  "predict",
@@ -140,8 +137,6 @@ if daal_check_version((2024, "P", 1)):
140
137
 
141
138
  @wrap_output_data
142
139
  def predict_proba(self, X):
143
- if sklearn_check_version("1.0"):
144
- self._check_feature_names(X, reset=False)
145
140
  return dispatch(
146
141
  self,
147
142
  "predict_proba",
@@ -154,8 +149,6 @@ if daal_check_version((2024, "P", 1)):
154
149
 
155
150
  @wrap_output_data
156
151
  def predict_log_proba(self, X):
157
- if sklearn_check_version("1.0"):
158
- self._check_feature_names(X, reset=False)
159
152
  return dispatch(
160
153
  self,
161
154
  "predict_log_proba",
@@ -168,8 +161,6 @@ if daal_check_version((2024, "P", 1)):
168
161
 
169
162
  @wrap_output_data
170
163
  def score(self, X, y, sample_weight=None):
171
- if sklearn_check_version("1.0"):
172
- self._check_feature_names(X, reset=False)
173
164
  return dispatch(
174
165
  self,
175
166
  "score",
@@ -187,17 +178,6 @@ if daal_check_version((2024, "P", 1)):
187
178
  y, self._onedal_predict(X, queue=queue), sample_weight=sample_weight
188
179
  )
189
180
 
190
- def _test_type_and_finiteness(self, X_in):
191
- X = np.asarray(X_in)
192
-
193
- if np.iscomplexobj(X):
194
- return False
195
- try:
196
- _assert_all_finite(X)
197
- except BaseException:
198
- return False
199
- return True
200
-
201
181
  def _onedal_gpu_fit_supported(self, method_name, *data):
202
182
  assert method_name == "fit"
203
183
  assert len(data) == 3
@@ -208,7 +188,12 @@ if daal_check_version((2024, "P", 1)):
208
188
  f"sklearn.linear_model.{class_name}.fit"
209
189
  )
210
190
 
211
- dal_ready = patching_status.and_conditions(
191
+ target_type = (
192
+ type_of_target(y, input_name="y")
193
+ if sklearn_check_version("1.1")
194
+ else type_of_target(y)
195
+ )
196
+ patching_status.and_conditions(
212
197
  [
213
198
  (self.penalty == "l2", "Only l2 penalty is supported."),
214
199
  (self.dual == False, "dual=True is not supported."),
@@ -226,24 +211,12 @@ if daal_check_version((2024, "P", 1)):
226
211
  (self.l1_ratio is None, "l1 ratio is not supported."),
227
212
  (sample_weight is None, "Sample weight is not supported."),
228
213
  (
229
- type_of_target(y) == "binary",
214
+ target_type == "binary",
230
215
  "Only binary classification is supported",
231
216
  ),
232
217
  ]
233
218
  )
234
219
 
235
- if not dal_ready:
236
- return patching_status
237
-
238
- if not patching_status.and_condition(
239
- self._test_type_and_finiteness(X), "Input X is not supported."
240
- ):
241
- return patching_status
242
-
243
- patching_status.and_condition(
244
- self._test_type_and_finiteness(y), "Input y is not supported."
245
- )
246
-
247
220
  return patching_status
248
221
 
249
222
  def _onedal_gpu_predict_supported(self, method_name, *data):
@@ -267,7 +240,7 @@ if daal_check_version((2024, "P", 1)):
267
240
  [
268
241
  (n_samples > 0, "Number of samples is less than 1."),
269
242
  (
270
- not any([issparse(i) for i in data]),
243
+ (not any([issparse(i) for i in data])) or _sparsity_enabled,
271
244
  "Sparse input is not supported.",
272
245
  ),
273
246
  (not model_is_sparse, "Sparse coefficients are not supported."),
@@ -277,12 +250,6 @@ if daal_check_version((2024, "P", 1)):
277
250
  ),
278
251
  ]
279
252
  )
280
- if not dal_ready:
281
- return patching_status
282
-
283
- patching_status.and_condition(
284
- self._test_type_and_finiteness(*data), "Input X is not supported."
285
- )
286
253
 
287
254
  return patching_status
288
255
 
@@ -313,24 +280,29 @@ if daal_check_version((2024, "P", 1)):
313
280
  }
314
281
  self._onedal_estimator = onedal_LogisticRegression(**onedal_params)
315
282
 
316
- def _onedal_fit(self, X, y, sample_weight, queue=None):
283
+ def _onedal_fit(self, X, y, sample_weight=None, queue=None):
317
284
  if queue is None or queue.sycl_device.is_cpu:
318
285
  return self._onedal_cpu_fit(X, y, sample_weight)
319
286
 
320
287
  assert sample_weight is None
321
288
 
322
- check_params = {
323
- "X": X,
324
- "y": y,
325
- "dtype": [np.float64, np.float32],
326
- "accept_sparse": False,
327
- "multi_output": False,
328
- "force_all_finite": True,
329
- }
330
- if sklearn_check_version("1.2"):
331
- X, y = self._validate_data(**check_params)
289
+ if sklearn_check_version("1.0"):
290
+ X, y = self._validate_data(
291
+ X,
292
+ y,
293
+ accept_sparse=_sparsity_enabled,
294
+ accept_large_sparse=_sparsity_enabled,
295
+ dtype=[np.float64, np.float32],
296
+ )
332
297
  else:
333
- X, y = check_X_y(**check_params)
298
+ X, y = check_X_y(
299
+ X,
300
+ y,
301
+ accept_sparse=_sparsity_enabled,
302
+ accept_large_sparse=_sparsity_enabled,
303
+ dtype=[np.float64, np.float32],
304
+ )
305
+
334
306
  self._initialize_onedal_estimator()
335
307
  try:
336
308
  self._onedal_estimator.fit(X, y, queue=queue)
@@ -348,7 +320,23 @@ if daal_check_version((2024, "P", 1)):
348
320
  if queue is None or queue.sycl_device.is_cpu:
349
321
  return daal4py_predict(self, X, "computeClassLabels")
350
322
 
351
- X = self._validate_data(X, accept_sparse=False, reset=False)
323
+ check_is_fitted(self)
324
+ if sklearn_check_version("1.0"):
325
+ X = self._validate_data(
326
+ X,
327
+ reset=False,
328
+ accept_sparse=_sparsity_enabled,
329
+ accept_large_sparse=_sparsity_enabled,
330
+ dtype=[np.float64, np.float32],
331
+ )
332
+ else:
333
+ X = check_array(
334
+ X,
335
+ accept_sparse=_sparsity_enabled,
336
+ accept_large_sparse=_sparsity_enabled,
337
+ dtype=[np.float64, np.float32],
338
+ )
339
+
352
340
  assert hasattr(self, "_onedal_estimator")
353
341
  return self._onedal_estimator.predict(X, queue=queue)
354
342
 
@@ -356,7 +344,23 @@ if daal_check_version((2024, "P", 1)):
356
344
  if queue is None or queue.sycl_device.is_cpu:
357
345
  return daal4py_predict(self, X, "computeClassProbabilities")
358
346
 
359
- X = self._validate_data(X, accept_sparse=False, reset=False)
347
+ check_is_fitted(self)
348
+ if sklearn_check_version("1.0"):
349
+ X = self._validate_data(
350
+ X,
351
+ reset=False,
352
+ accept_sparse=_sparsity_enabled,
353
+ accept_large_sparse=_sparsity_enabled,
354
+ dtype=[np.float64, np.float32],
355
+ )
356
+ else:
357
+ X = check_array(
358
+ X,
359
+ accept_sparse=_sparsity_enabled,
360
+ accept_large_sparse=_sparsity_enabled,
361
+ dtype=[np.float64, np.float32],
362
+ )
363
+
360
364
  assert hasattr(self, "_onedal_estimator")
361
365
  return self._onedal_estimator.predict_proba(X, queue=queue)
362
366
 
@@ -364,7 +368,23 @@ if daal_check_version((2024, "P", 1)):
364
368
  if queue is None or queue.sycl_device.is_cpu:
365
369
  return daal4py_predict(self, X, "computeClassLogProbabilities")
366
370
 
367
- X = self._validate_data(X, accept_sparse=False, reset=False)
371
+ check_is_fitted(self)
372
+ if sklearn_check_version("1.0"):
373
+ X = self._validate_data(
374
+ X,
375
+ reset=False,
376
+ accept_sparse=_sparsity_enabled,
377
+ accept_large_sparse=_sparsity_enabled,
378
+ dtype=[np.float64, np.float32],
379
+ )
380
+ else:
381
+ X = check_array(
382
+ X,
383
+ accept_sparse=_sparsity_enabled,
384
+ accept_large_sparse=_sparsity_enabled,
385
+ dtype=[np.float64, np.float32],
386
+ )
387
+
368
388
  assert hasattr(self, "_onedal_estimator")
369
389
  return self._onedal_estimator.predict_log_proba(X, queue=queue)
370
390
 
@@ -15,3 +15,10 @@
15
15
  # ===============================================================================
16
16
 
17
17
  from daal4py.sklearn.linear_model import Ridge
18
+ from onedal._device_offload import support_usm_ndarray
19
+
20
+ # Note: `sklearnex.linear_model.Ridge` only has functional
21
+ # sycl GPU support. No GPU device will be offloaded.
22
+ Ridge.fit = support_usm_ndarray(queue_param=False)(Ridge.fit)
23
+ Ridge.predict = support_usm_ndarray(queue_param=False)(Ridge.predict)
24
+ Ridge.score = support_usm_ndarray(queue_param=False)(Ridge.score)
@@ -20,6 +20,10 @@ from numpy.testing import assert_allclose
20
20
  from sklearn.datasets import make_regression
21
21
 
22
22
  from daal4py.sklearn._utils import daal_check_version
23
+ from daal4py.sklearn.linear_model.tests.test_ridge import (
24
+ _test_multivariate_ridge_alpha_shape,
25
+ _test_multivariate_ridge_coefficients,
26
+ )
23
27
  from onedal.tests.utils._dataframes_support import (
24
28
  _as_numpy,
25
29
  _convert_to_dataframe,
@@ -57,32 +61,41 @@ def test_sklearnex_import_linear(dataframe, queue, dtype, macro_block):
57
61
  assert_allclose(_as_numpy(linreg.coef_), [1.0, 2.0], rtol=tol)
58
62
 
59
63
 
60
- def test_sklearnex_import_ridge():
64
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
65
+ def test_sklearnex_import_ridge(dataframe, queue):
61
66
  from sklearnex.linear_model import Ridge
62
67
 
63
68
  X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
64
69
  y = np.dot(X, np.array([1, 2])) + 3
70
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
71
+ y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
65
72
  ridgereg = Ridge().fit(X, y)
66
73
  assert "daal4py" in ridgereg.__module__
67
74
  assert_allclose(ridgereg.intercept_, 4.5)
68
75
  assert_allclose(ridgereg.coef_, [0.8, 1.4])
69
76
 
70
77
 
71
- def test_sklearnex_import_lasso():
78
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
79
+ def test_sklearnex_import_lasso(dataframe, queue):
72
80
  from sklearnex.linear_model import Lasso
73
81
 
74
82
  X = [[0, 0], [1, 1], [2, 2]]
75
83
  y = [0, 1, 2]
84
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
85
+ y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
76
86
  lasso = Lasso(alpha=0.1).fit(X, y)
77
87
  assert "daal4py" in lasso.__module__
78
88
  assert_allclose(lasso.intercept_, 0.15)
79
89
  assert_allclose(lasso.coef_, [0.85, 0.0])
80
90
 
81
91
 
82
- def test_sklearnex_import_elastic():
92
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
93
+ def test_sklearnex_import_elastic(dataframe, queue):
83
94
  from sklearnex.linear_model import ElasticNet
84
95
 
85
96
  X, y = make_regression(n_features=2, random_state=0)
97
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
98
+ y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
86
99
  elasticnet = ElasticNet(random_state=0).fit(X, y)
87
100
  assert "daal4py" in elasticnet.__module__
88
101
  assert_allclose(elasticnet.intercept_, 1.451, atol=1e-3)
@@ -115,3 +128,15 @@ def test_sklearnex_reconstruct_model(dataframe, queue, dtype):
115
128
 
116
129
  tol = 1e-5 if _as_numpy(y_pred).dtype == np.float32 else 1e-7
117
130
  assert_allclose(gtr, _as_numpy(y_pred), rtol=tol)
131
+
132
+
133
+ def test_sklearnex_multivariate_ridge_coefs():
134
+ from sklearnex.linear_model import Ridge
135
+
136
+ _test_multivariate_ridge_coefficients(Ridge, random_state=0)
137
+
138
+
139
+ def test_sklearnex_multivariate_ridge_alpha_shape():
140
+ from sklearnex.linear_model import Ridge
141
+
142
+ _test_multivariate_ridge_alpha_shape(Ridge, random_state=0)
@@ -14,8 +14,11 @@
14
14
  # limitations under the License.
15
15
  # ===============================================================================
16
16
 
17
+ import numpy as np
17
18
  import pytest
18
- from sklearn.datasets import load_breast_cancer, load_iris
19
+ from numpy.testing import assert_allclose, assert_array_equal
20
+ from scipy.sparse import csr_matrix
21
+ from sklearn.datasets import load_breast_cancer, load_iris, make_classification
19
22
  from sklearn.metrics import accuracy_score
20
23
  from sklearn.model_selection import train_test_split
21
24
 
@@ -24,7 +27,9 @@ from onedal.tests.utils._dataframes_support import (
24
27
  _as_numpy,
25
28
  _convert_to_dataframe,
26
29
  get_dataframes_and_queues,
30
+ get_queues,
27
31
  )
32
+ from sklearnex import config_context
28
33
 
29
34
 
30
35
  def prepare_input(X, y, dataframe, queue):
@@ -38,8 +43,7 @@ def prepare_input(X, y, dataframe, queue):
38
43
 
39
44
 
40
45
  @pytest.mark.parametrize(
41
- "dataframe,queue",
42
- get_dataframes_and_queues(device_filter_="cpu"),
46
+ "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
43
47
  )
44
48
  def test_sklearnex_multiclass_classification(dataframe, queue):
45
49
  from sklearnex.linear_model import LogisticRegression
@@ -89,3 +93,41 @@ def test_sklearnex_binary_classification(dataframe, queue):
89
93
 
90
94
  y_pred = _as_numpy(logreg.predict(X_test))
91
95
  assert accuracy_score(y_test, y_pred) > 0.95
96
+
97
+
98
+ if daal_check_version((2024, "P", 700)):
99
+
100
+ @pytest.mark.parametrize("queue", get_queues("gpu"))
101
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
102
+ @pytest.mark.parametrize(
103
+ "dims", [(3007, 17, 0.05), (50000, 100, 0.01), (512, 10, 0.5)]
104
+ )
105
+ def test_csr(queue, dtype, dims):
106
+ from sklearnex.linear_model import LogisticRegression
107
+
108
+ n, p, density = dims
109
+
110
+ # Create sparse dataset for classification
111
+ X, y = make_classification(n, p, random_state=42)
112
+ X = X.astype(dtype)
113
+ y = y.astype(dtype)
114
+ np.random.seed(2007 + n + p)
115
+ mask = np.random.binomial(1, density, (n, p))
116
+ X = X * mask
117
+ X_sp = csr_matrix(X)
118
+
119
+ model = LogisticRegression(fit_intercept=True, solver="newton-cg")
120
+ model_sp = LogisticRegression(fit_intercept=True, solver="newton-cg")
121
+
122
+ with config_context(target_offload="gpu:0"):
123
+ model.fit(X, y)
124
+ pred = model.predict(X)
125
+ prob = model.predict_proba(X)
126
+ model_sp.fit(X_sp, y)
127
+ pred_sp = model_sp.predict(X_sp)
128
+ prob_sp = model_sp.predict_proba(X_sp)
129
+
130
+ assert_allclose(pred, pred_sp)
131
+ assert_allclose(prob, prob_sp)
132
+ assert_allclose(model.coef_, model_sp.coef_, rtol=1e-4)
133
+ assert_allclose(model.intercept_, model_sp.intercept_, rtol=1e-4)
@@ -15,3 +15,7 @@
15
15
  # ===============================================================================
16
16
 
17
17
  from daal4py.sklearn.manifold import TSNE
18
+ from onedal._device_offload import support_usm_ndarray
19
+
20
+ TSNE.fit = support_usm_ndarray(queue_param=False)(TSNE.fit)
21
+ TSNE.fit_transform = support_usm_ndarray(queue_param=False)(TSNE.fit_transform)
@@ -15,3 +15,8 @@
15
15
  # ===============================================================================
16
16
 
17
17
  from daal4py.sklearn.metrics import pairwise_distances
18
+ from onedal._device_offload import support_usm_ndarray
19
+
20
+ pairwise_distances = support_usm_ndarray(freefunc=True, queue_param=False)(
21
+ pairwise_distances
22
+ )
@@ -15,3 +15,6 @@
15
15
  # ===============================================================================
16
16
 
17
17
  from daal4py.sklearn.metrics import roc_auc_score
18
+ from onedal._device_offload import support_usm_ndarray
19
+
20
+ roc_auc_score = support_usm_ndarray(freefunc=True, queue_param=False)(roc_auc_score)
@@ -15,3 +15,6 @@
15
15
  # ===============================================================================
16
16
 
17
17
  from daal4py.sklearn.model_selection import train_test_split
18
+ from onedal._device_offload import support_usm_ndarray
19
+
20
+ train_test_split = support_usm_ndarray(freefunc=True, queue_param=False)(train_test_split)
@@ -97,6 +97,15 @@ class LocalOutlierFactor(KNeighborsDispatchingBase, sklearn_LocalOutlierFactor):
97
97
  self.negative_outlier_factor_, 100.0 * self.contamination
98
98
  )
99
99
 
100
+ # adoption of warning for data with duplicated samples from
101
+ # https://github.com/scikit-learn/scikit-learn/pull/28773
102
+ if sklearn_check_version("1.6"):
103
+ if np.min(self.negative_outlier_factor_) < -1e7 and not self.novelty:
104
+ warnings.warn(
105
+ "Duplicate values are leading to incorrect results. "
106
+ "Increase the number of neighbors for more accurate results."
107
+ )
108
+
100
109
  return self
101
110
 
102
111
  def fit(self, X, y=None):