scikit-learn-intelex 2024.3.0__py310-none-manylinux1_x86_64.whl → 2024.5.0__py310-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (43) hide show
  1. {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/METADATA +2 -2
  2. {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/RECORD +43 -37
  3. sklearnex/_device_offload.py +39 -5
  4. sklearnex/basic_statistics/__init__.py +2 -1
  5. sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
  6. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +384 -0
  7. sklearnex/covariance/incremental_covariance.py +217 -30
  8. sklearnex/covariance/tests/test_incremental_covariance.py +54 -17
  9. sklearnex/decomposition/pca.py +71 -19
  10. sklearnex/decomposition/tests/test_pca.py +2 -2
  11. sklearnex/dispatcher.py +33 -2
  12. sklearnex/ensemble/_forest.py +73 -79
  13. sklearnex/linear_model/__init__.py +5 -3
  14. sklearnex/linear_model/incremental_linear.py +387 -0
  15. sklearnex/linear_model/linear.py +275 -340
  16. sklearnex/linear_model/logistic_regression.py +50 -9
  17. sklearnex/linear_model/tests/test_incremental_linear.py +200 -0
  18. sklearnex/linear_model/tests/test_linear.py +40 -5
  19. sklearnex/neighbors/_lof.py +53 -36
  20. sklearnex/neighbors/common.py +4 -1
  21. sklearnex/neighbors/knn_classification.py +37 -122
  22. sklearnex/neighbors/knn_regression.py +10 -117
  23. sklearnex/neighbors/knn_unsupervised.py +6 -78
  24. sklearnex/neighbors/tests/test_neighbors.py +2 -2
  25. sklearnex/preview/cluster/k_means.py +5 -73
  26. sklearnex/preview/covariance/covariance.py +6 -5
  27. sklearnex/preview/covariance/tests/test_covariance.py +18 -5
  28. sklearnex/svm/_common.py +4 -7
  29. sklearnex/svm/nusvc.py +66 -50
  30. sklearnex/svm/nusvr.py +3 -49
  31. sklearnex/svm/svc.py +66 -51
  32. sklearnex/svm/svr.py +3 -49
  33. sklearnex/tests/_utils.py +34 -16
  34. sklearnex/tests/test_memory_usage.py +5 -1
  35. sklearnex/tests/test_n_jobs_support.py +12 -2
  36. sklearnex/tests/test_patching.py +87 -58
  37. sklearnex/tests/test_run_to_run_stability_tests.py +1 -1
  38. sklearnex/utils/__init__.py +2 -1
  39. sklearnex/utils/_namespace.py +97 -0
  40. sklearnex/utils/tests/test_finite.py +89 -0
  41. {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/LICENSE.txt +0 -0
  42. {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/WHEEL +0 -0
  43. {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/top_level.txt +0 -0
@@ -14,18 +14,33 @@
14
14
  # limitations under the License.
15
15
  # ===============================================================================
16
16
 
17
+ import numbers
18
+ import warnings
19
+
17
20
  import numpy as np
21
+ from scipy import linalg
22
+ from sklearn.base import BaseEstimator
23
+ from sklearn.covariance import EmpiricalCovariance as sklearn_EmpiricalCovariance
18
24
  from sklearn.utils import check_array, gen_batches
19
25
 
20
26
  from daal4py.sklearn._n_jobs_support import control_n_jobs
27
+ from daal4py.sklearn._utils import daal_check_version, sklearn_check_version
21
28
  from onedal._device_offload import support_usm_ndarray
22
29
  from onedal.covariance import (
23
30
  IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance,
24
31
  )
32
+ from sklearnex import config_context
33
+
34
+ from .._device_offload import dispatch, wrap_output_data
35
+ from .._utils import PatchingConditionsChain, register_hyperparameters
36
+ from ..metrics import pairwise_distances
25
37
 
38
+ if sklearn_check_version("1.2"):
39
+ from sklearn.utils._param_validation import Interval
26
40
 
27
- @control_n_jobs(decorated_methods=["partial_fit"])
28
- class IncrementalEmpiricalCovariance:
41
+
42
+ @control_n_jobs(decorated_methods=["partial_fit", "fit", "_onedal_finalize_fit"])
43
+ class IncrementalEmpiricalCovariance(BaseEstimator):
29
44
  """
30
45
  Incremental estimator for covariance.
31
46
  Allows to compute empirical covariance estimated by maximum
@@ -33,12 +48,25 @@ class IncrementalEmpiricalCovariance:
33
48
 
34
49
  Parameters
35
50
  ----------
51
+ store_precision : bool, default=False
52
+ Specifies if the estimated precision is stored.
53
+
54
+ assume_centered : bool, default=False
55
+ If True, data are not centered before computation.
56
+ Useful when working with data whose mean is almost, but not exactly
57
+ zero.
58
+ If False (default), data are centered before computation.
59
+
36
60
  batch_size : int, default=None
37
61
  The number of samples to use for each batch. Only used when calling
38
62
  ``fit``. If ``batch_size`` is ``None``, then ``batch_size``
39
63
  is inferred from the data and set to ``5 * n_features``, to provide a
40
64
  balance between approximation accuracy and memory consumption.
41
65
 
66
+ copy : bool, default=True
67
+ If False, X will be overwritten. ``copy=False`` can be used to
68
+ save memory but is unsafe for general use.
69
+
42
70
  Attributes
43
71
  ----------
44
72
  location_ : ndarray of shape (n_features,)
@@ -46,44 +74,130 @@ class IncrementalEmpiricalCovariance:
46
74
 
47
75
  covariance_ : ndarray of shape (n_features, n_features)
48
76
  Estimated covariance matrix
77
+
78
+ n_samples_seen_ : int
79
+ The number of samples processed by the estimator. Will be reset on
80
+ new calls to fit, but increments across ``partial_fit`` calls.
81
+
82
+ batch_size_ : int
83
+ Inferred batch size from ``batch_size``.
84
+
85
+ n_features_in_ : int
86
+ Number of features seen during :term:`fit` `partial_fit`.
49
87
  """
50
88
 
51
89
  _onedal_incremental_covariance = staticmethod(onedal_IncrementalEmpiricalCovariance)
52
90
 
53
- def __init__(self, batch_size=None):
54
- self._need_to_finalize = False # If True then finalize compute should
55
- # be called to obtain covariance_ or location_ from partial compute data
91
+ if sklearn_check_version("1.2"):
92
+ _parameter_constraints: dict = {
93
+ "store_precision": ["boolean"],
94
+ "assume_centered": ["boolean"],
95
+ "batch_size": [Interval(numbers.Integral, 1, None, closed="left"), None],
96
+ "copy": ["boolean"],
97
+ }
98
+
99
+ get_precision = sklearn_EmpiricalCovariance.get_precision
100
+ error_norm = wrap_output_data(sklearn_EmpiricalCovariance.error_norm)
101
+ score = wrap_output_data(sklearn_EmpiricalCovariance.score)
102
+
103
+ def __init__(
104
+ self, *, store_precision=False, assume_centered=False, batch_size=None, copy=True
105
+ ):
106
+ self.assume_centered = assume_centered
107
+ self.store_precision = store_precision
56
108
  self.batch_size = batch_size
109
+ self.copy = copy
110
+
111
+ def _onedal_supported(self, method_name, *data):
112
+ patching_status = PatchingConditionsChain(
113
+ f"sklearn.covariance.{self.__class__.__name__}.{method_name}"
114
+ )
115
+ return patching_status
57
116
 
58
117
  def _onedal_finalize_fit(self):
59
118
  assert hasattr(self, "_onedal_estimator")
60
119
  self._onedal_estimator.finalize_fit()
61
120
  self._need_to_finalize = False
62
121
 
63
- def _onedal_partial_fit(self, X, queue):
122
+ if not daal_check_version((2024, "P", 400)) and self.assume_centered:
123
+ location = self._onedal_estimator.location_[None, :]
124
+ self._onedal_estimator.covariance_ += np.dot(location.T, location)
125
+ self._onedal_estimator.location_ = np.zeros_like(np.squeeze(location))
126
+ if self.store_precision:
127
+ self.precision_ = linalg.pinvh(
128
+ self._onedal_estimator.covariance_, check_finite=False
129
+ )
130
+ else:
131
+ self.precision_ = None
132
+
133
+ @property
134
+ def covariance_(self):
135
+ if hasattr(self, "_onedal_estimator"):
136
+ if self._need_to_finalize:
137
+ self._onedal_finalize_fit()
138
+ return self._onedal_estimator.covariance_
139
+ else:
140
+ raise AttributeError(
141
+ f"'{self.__class__.__name__}' object has no attribute 'covariance_'"
142
+ )
143
+
144
+ @property
145
+ def location_(self):
146
+ if hasattr(self, "_onedal_estimator"):
147
+ if self._need_to_finalize:
148
+ self._onedal_finalize_fit()
149
+ return self._onedal_estimator.location_
150
+ else:
151
+ raise AttributeError(
152
+ f"'{self.__class__.__name__}' object has no attribute 'location_'"
153
+ )
154
+
155
+ def _onedal_partial_fit(self, X, queue=None, check_input=True):
156
+
157
+ first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0
158
+
159
+ # finite check occurs on onedal side
160
+ if check_input:
161
+ if sklearn_check_version("1.2"):
162
+ self._validate_params()
163
+
164
+ if sklearn_check_version("1.0"):
165
+ X = self._validate_data(
166
+ X,
167
+ dtype=[np.float64, np.float32],
168
+ reset=first_pass,
169
+ copy=self.copy,
170
+ force_all_finite=False,
171
+ )
172
+ else:
173
+ X = check_array(
174
+ X,
175
+ dtype=[np.float64, np.float32],
176
+ copy=self.copy,
177
+ force_all_finite=False,
178
+ )
179
+
64
180
  onedal_params = {
65
181
  "method": "dense",
66
182
  "bias": True,
183
+ "assume_centered": self.assume_centered,
67
184
  }
68
185
  if not hasattr(self, "_onedal_estimator"):
69
186
  self._onedal_estimator = self._onedal_incremental_covariance(**onedal_params)
70
- self._onedal_estimator.partial_fit(X, queue)
71
- self._need_to_finalize = True
187
+ try:
188
+ if first_pass:
189
+ self.n_samples_seen_ = X.shape[0]
190
+ self.n_features_in_ = X.shape[1]
191
+ else:
192
+ self.n_samples_seen_ += X.shape[0]
72
193
 
73
- @property
74
- def covariance_(self):
75
- if self._need_to_finalize:
76
- self._onedal_finalize_fit()
77
- return self._onedal_estimator.covariance_
194
+ self._onedal_estimator.partial_fit(X, queue)
195
+ finally:
196
+ self._need_to_finalize = True
78
197
 
79
- @property
80
- def location_(self):
81
- if self._need_to_finalize:
82
- self._onedal_finalize_fit()
83
- return self._onedal_estimator.location_
198
+ return self
84
199
 
85
- @support_usm_ndarray()
86
- def partial_fit(self, X, queue=None):
200
+ def partial_fit(self, X, y=None, check_input=True):
87
201
  """
88
202
  Incremental fit with X. All of X is processed as a single batch.
89
203
 
@@ -93,16 +207,29 @@ class IncrementalEmpiricalCovariance:
93
207
  Training data, where `n_samples` is the number of samples and
94
208
  `n_features` is the number of features.
95
209
 
210
+ y : Ignored
211
+ Not used, present for API consistency by convention.
212
+
213
+ check_input : bool, default=True
214
+ Run check_array on X.
215
+
96
216
  Returns
97
217
  -------
98
218
  self : object
99
219
  Returns the instance itself.
100
220
  """
101
- X = check_array(X, dtype=[np.float64, np.float32])
102
- self._onedal_partial_fit(X, queue)
103
- return self
221
+ return dispatch(
222
+ self,
223
+ "partial_fit",
224
+ {
225
+ "onedal": self.__class__._onedal_partial_fit,
226
+ "sklearn": None,
227
+ },
228
+ X,
229
+ check_input=check_input,
230
+ )
104
231
 
105
- def fit(self, X, queue=None):
232
+ def fit(self, X, y=None):
106
233
  """
107
234
  Fit the model with X, using minibatches of size batch_size.
108
235
 
@@ -112,19 +239,79 @@ class IncrementalEmpiricalCovariance:
112
239
  Training data, where `n_samples` is the number of samples and
113
240
  `n_features` is the number of features.
114
241
 
242
+ y : Ignored
243
+ Not used, present for API consistency by convention.
244
+
115
245
  Returns
116
246
  -------
117
247
  self : object
118
248
  Returns the instance itself.
119
249
  """
120
- n_samples, n_features = X.shape
121
- if self.batch_size is None:
122
- batch_size_ = 5 * n_features
250
+
251
+ return dispatch(
252
+ self,
253
+ "fit",
254
+ {
255
+ "onedal": self.__class__._onedal_fit,
256
+ "sklearn": None,
257
+ },
258
+ X,
259
+ )
260
+
261
+ def _onedal_fit(self, X, queue=None):
262
+ self.n_samples_seen_ = 0
263
+ if hasattr(self, "_onedal_estimator"):
264
+ self._onedal_estimator._reset()
265
+
266
+ if sklearn_check_version("1.2"):
267
+ self._validate_params()
268
+
269
+ # finite check occurs on onedal side
270
+ if sklearn_check_version("1.0"):
271
+ X = self._validate_data(
272
+ X, dtype=[np.float64, np.float32], copy=self.copy, force_all_finite=False
273
+ )
123
274
  else:
124
- batch_size_ = self.batch_size
125
- for batch in gen_batches(n_samples, batch_size_):
275
+ X = check_array(
276
+ X, dtype=[np.float64, np.float32], copy=self.copy, force_all_finite=False
277
+ )
278
+ self.n_features_in_ = X.shape[1]
279
+
280
+ self.batch_size_ = self.batch_size if self.batch_size else 5 * self.n_features_in_
281
+
282
+ if X.shape[0] == 1:
283
+ warnings.warn(
284
+ "Only one sample available. You may want to reshape your data array"
285
+ )
286
+
287
+ for batch in gen_batches(X.shape[0], self.batch_size_):
126
288
  X_batch = X[batch]
127
- self.partial_fit(X_batch, queue=queue)
289
+ self._onedal_partial_fit(X_batch, queue=queue, check_input=False)
128
290
 
129
291
  self._onedal_finalize_fit()
292
+
130
293
  return self
294
+
295
+ # expose sklearnex pairwise_distances if mahalanobis distance eventually supported
296
+ @wrap_output_data
297
+ def mahalanobis(self, X):
298
+ if sklearn_check_version("1.0"):
299
+ self._validate_data(X, reset=False, copy=self.copy)
300
+ else:
301
+ check_array(X, copy=self.copy)
302
+
303
+ precision = self.get_precision()
304
+ with config_context(assume_finite=True):
305
+ # compute mahalanobis distances
306
+ dist = pairwise_distances(
307
+ X, self.location_[np.newaxis, :], metric="mahalanobis", VI=precision
308
+ )
309
+
310
+ return np.reshape(dist, (len(X),)) ** 2
311
+
312
+ _onedal_cpu_supported = _onedal_supported
313
+ _onedal_gpu_supported = _onedal_supported
314
+
315
+ mahalanobis.__doc__ = sklearn_EmpiricalCovariance.mahalanobis.__doc__
316
+ error_norm.__doc__ = sklearn_EmpiricalCovariance.error_norm.__doc__
317
+ score.__doc__ = sklearn_EmpiricalCovariance.score.__doc__
@@ -17,6 +17,10 @@
17
17
  import numpy as np
18
18
  import pytest
19
19
  from numpy.testing import assert_allclose
20
+ from sklearn.covariance.tests.test_covariance import (
21
+ test_covariance,
22
+ test_EmpiricalCovariance_validates_mahalanobis,
23
+ )
20
24
 
21
25
  from onedal.tests.utils._dataframes_support import (
22
26
  _convert_to_dataframe,
@@ -26,13 +30,14 @@ from onedal.tests.utils._dataframes_support import (
26
30
 
27
31
  @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
28
32
  @pytest.mark.parametrize("dtype", [np.float32, np.float64])
29
- def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype):
33
+ @pytest.mark.parametrize("assume_centered", [True, False])
34
+ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype, assume_centered):
30
35
  from sklearnex.covariance import IncrementalEmpiricalCovariance
31
36
 
32
37
  X = np.array([[0, 1], [0, 1]])
33
38
  X = X.astype(dtype)
34
39
  X_split = np.array_split(X, 2)
35
- inccov = IncrementalEmpiricalCovariance()
40
+ inccov = IncrementalEmpiricalCovariance(assume_centered=assume_centered)
36
41
 
37
42
  for i in range(2):
38
43
  X_split_df = _convert_to_dataframe(
@@ -40,8 +45,12 @@ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype):
40
45
  )
41
46
  result = inccov.partial_fit(X_split_df)
42
47
 
43
- expected_covariance = np.array([[0, 0], [0, 0]])
44
- expected_means = np.array([0, 1])
48
+ if assume_centered:
49
+ expected_covariance = np.array([[0, 0], [0, 1]])
50
+ expected_means = np.array([0, 0])
51
+ else:
52
+ expected_covariance = np.array([[0, 0], [0, 0]])
53
+ expected_means = np.array([0, 1])
45
54
 
46
55
  assert_allclose(expected_covariance, result.covariance_)
47
56
  assert_allclose(expected_means, result.location_)
@@ -49,7 +58,7 @@ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype):
49
58
  X = np.array([[1, 2], [3, 6]])
50
59
  X = X.astype(dtype)
51
60
  X_split = np.array_split(X, 2)
52
- inccov = IncrementalEmpiricalCovariance()
61
+ inccov = IncrementalEmpiricalCovariance(assume_centered=assume_centered)
53
62
 
54
63
  for i in range(2):
55
64
  X_split_df = _convert_to_dataframe(
@@ -57,8 +66,12 @@ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype):
57
66
  )
58
67
  result = inccov.partial_fit(X_split_df)
59
68
 
60
- expected_covariance = np.array([[1, 2], [2, 4]])
61
- expected_means = np.array([2, 4])
69
+ if assume_centered:
70
+ expected_covariance = np.array([[5, 10], [10, 20]])
71
+ expected_means = np.array([0, 0])
72
+ else:
73
+ expected_covariance = np.array([[1, 2], [2, 4]])
74
+ expected_means = np.array([2, 4])
62
75
 
63
76
  assert_allclose(expected_covariance, result.covariance_)
64
77
  assert_allclose(expected_means, result.location_)
@@ -87,9 +100,9 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, batch_size, dtype):
87
100
 
88
101
 
89
102
  @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
90
- @pytest.mark.parametrize("num_batches", [2, 4, 6, 8, 10])
91
- @pytest.mark.parametrize("row_count", [100, 1000, 2000])
92
- @pytest.mark.parametrize("column_count", [10, 100, 200])
103
+ @pytest.mark.parametrize("num_batches", [2, 10])
104
+ @pytest.mark.parametrize("row_count", [100, 1000])
105
+ @pytest.mark.parametrize("column_count", [10, 100])
93
106
  @pytest.mark.parametrize("dtype", [np.float32, np.float64])
94
107
  def test_sklearnex_partial_fit_on_random_data(
95
108
  dataframe, queue, num_batches, row_count, column_count, dtype
@@ -117,12 +130,13 @@ def test_sklearnex_partial_fit_on_random_data(
117
130
 
118
131
 
119
132
  @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
120
- @pytest.mark.parametrize("num_batches", [2, 4, 6, 8, 10])
121
- @pytest.mark.parametrize("row_count", [100, 1000, 2000])
122
- @pytest.mark.parametrize("column_count", [10, 100, 200])
133
+ @pytest.mark.parametrize("num_batches", [2, 10])
134
+ @pytest.mark.parametrize("row_count", [100, 1000])
135
+ @pytest.mark.parametrize("column_count", [10, 100])
123
136
  @pytest.mark.parametrize("dtype", [np.float32, np.float64])
137
+ @pytest.mark.parametrize("assume_centered", [True, False])
124
138
  def test_sklearnex_fit_on_random_data(
125
- dataframe, queue, num_batches, row_count, column_count, dtype
139
+ dataframe, queue, num_batches, row_count, column_count, dtype, assume_centered
126
140
  ):
127
141
  from sklearnex.covariance import IncrementalEmpiricalCovariance
128
142
 
@@ -132,12 +146,35 @@ def test_sklearnex_fit_on_random_data(
132
146
  X = X.astype(dtype)
133
147
  X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
134
148
  batch_size = row_count // num_batches
135
- inccov = IncrementalEmpiricalCovariance(batch_size=batch_size)
149
+ inccov = IncrementalEmpiricalCovariance(
150
+ batch_size=batch_size, assume_centered=assume_centered
151
+ )
136
152
 
137
153
  result = inccov.fit(X_df)
138
154
 
139
- expected_covariance = np.cov(X.T, bias=1)
140
- expected_means = np.mean(X, axis=0)
155
+ if assume_centered:
156
+ expected_covariance = np.dot(X.T, X) / X.shape[0]
157
+ expected_means = np.zeros_like(X[0])
158
+ else:
159
+ expected_covariance = np.cov(X.T, bias=1)
160
+ expected_means = np.mean(X, axis=0)
141
161
 
142
162
  assert_allclose(expected_covariance, result.covariance_, atol=1e-6)
143
163
  assert_allclose(expected_means, result.location_, atol=1e-6)
164
+
165
+
166
+ # Monkeypatch IncrementalEmpiricalCovariance into relevant sklearn.covariance tests
167
+ @pytest.mark.allow_sklearn_fallback
168
+ @pytest.mark.parametrize(
169
+ "sklearn_test",
170
+ [
171
+ test_covariance,
172
+ test_EmpiricalCovariance_validates_mahalanobis,
173
+ ],
174
+ )
175
+ def test_IncrementalEmpiricalCovariance_against_sklearn(monkeypatch, sklearn_test):
176
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
177
+
178
+ class_name = ".".join([sklearn_test.__module__, "EmpiricalCovariance"])
179
+ monkeypatch.setattr(class_name, IncrementalEmpiricalCovariance)
180
+ sklearn_test()
@@ -21,6 +21,7 @@ from daal4py.sklearn._utils import daal_check_version
21
21
  if daal_check_version((2024, "P", 100)):
22
22
  import numbers
23
23
  from math import sqrt
24
+ from warnings import warn
24
25
 
25
26
  import numpy as np
26
27
  from scipy.sparse import issparse
@@ -35,9 +36,13 @@ if daal_check_version((2024, "P", 100)):
35
36
  if sklearn_check_version("1.1") and not sklearn_check_version("1.2"):
36
37
  from sklearn.utils import check_scalar
37
38
 
39
+ if sklearn_check_version("1.2"):
40
+ from sklearn.utils._param_validation import StrOptions
41
+
38
42
  from sklearn.decomposition import PCA as sklearn_PCA
39
43
 
40
44
  from onedal.decomposition import PCA as onedal_PCA
45
+ from sklearnex.utils import get_namespace
41
46
 
42
47
  @control_n_jobs(decorated_methods=["fit", "transform", "fit_transform"])
43
48
  class PCA(sklearn_PCA):
@@ -45,6 +50,16 @@ if daal_check_version((2024, "P", 100)):
45
50
 
46
51
  if sklearn_check_version("1.2"):
47
52
  _parameter_constraints: dict = {**sklearn_PCA._parameter_constraints}
53
+ # "onedal_svd" solver uses oneDAL's PCA-SVD algorithm
54
+ # and required for testing purposes to fully enable it in future.
55
+ # "covariance_eigh" solver is added for ability to explicitly request
56
+ # oneDAL's PCA-Covariance algorithm using any sklearn version < 1.5.
57
+ _parameter_constraints["svd_solver"] = [
58
+ StrOptions(
59
+ _parameter_constraints["svd_solver"][0].options
60
+ | {"onedal_svd", "covariance_eigh"}
61
+ )
62
+ ]
48
63
 
49
64
  if sklearn_check_version("1.1"):
50
65
 
@@ -95,6 +110,7 @@ if daal_check_version((2024, "P", 100)):
95
110
  self._fit(X)
96
111
  return self
97
112
 
113
+ @wrap_output_data
98
114
  def _fit(self, X):
99
115
  if sklearn_check_version("1.2"):
100
116
  self._validate_params()
@@ -106,7 +122,7 @@ if daal_check_version((2024, "P", 100)):
106
122
  target_type=numbers.Integral,
107
123
  )
108
124
 
109
- U, S, Vt = dispatch(
125
+ return dispatch(
110
126
  self,
111
127
  "fit",
112
128
  {
@@ -115,7 +131,6 @@ if daal_check_version((2024, "P", 100)):
115
131
  },
116
132
  X,
117
133
  )
118
- return U, S, Vt
119
134
 
120
135
  def _onedal_fit(self, X, queue=None):
121
136
  X = self._validate_data(
@@ -128,7 +143,7 @@ if daal_check_version((2024, "P", 100)):
128
143
  onedal_params = {
129
144
  "n_components": self.n_components,
130
145
  "is_deterministic": True,
131
- "method": "cov",
146
+ "method": "svd" if self._fit_svd_solver == "onedal_svd" else "cov",
132
147
  "whiten": self.whiten,
133
148
  }
134
149
  self._onedal_estimator = onedal_PCA(**onedal_params)
@@ -139,7 +154,13 @@ if daal_check_version((2024, "P", 100)):
139
154
  S = self.singular_values_
140
155
  Vt = self.components_
141
156
 
142
- return U, S, Vt
157
+ if sklearn_check_version("1.5"):
158
+ xp, _ = get_namespace(X)
159
+ x_is_centered = not self.copy
160
+
161
+ return U, S, Vt, X, x_is_centered, xp
162
+ else:
163
+ return U, S, Vt
143
164
 
144
165
  @wrap_output_data
145
166
  def transform(self, X):
@@ -155,34 +176,39 @@ if daal_check_version((2024, "P", 100)):
155
176
 
156
177
  def _onedal_transform(self, X, queue=None):
157
178
  check_is_fitted(self)
179
+ if sklearn_check_version("1.0"):
180
+ self._check_feature_names(X, reset=False)
158
181
  X = self._validate_data(
159
182
  X,
160
183
  dtype=[np.float64, np.float32],
161
184
  reset=False,
162
185
  )
163
186
  self._validate_n_features_in_after_fitting(X)
164
- if sklearn_check_version("1.0"):
165
- self._check_feature_names(X, reset=False)
166
187
 
167
188
  return self._onedal_estimator.predict(X, queue=queue)
168
189
 
169
- @wrap_output_data
170
190
  def fit_transform(self, X, y=None):
171
- U, S, Vt = self._fit(X)
172
- if U is None:
173
- # oneDAL PCA was fit
174
- X_transformed = self._onedal_transform(X)
175
- return X_transformed
191
+ if sklearn_check_version("1.5"):
192
+ U, S, Vt, X_fit, x_is_centered, xp = self._fit(X)
176
193
  else:
194
+ U, S, Vt = self._fit(X)
195
+ X_fit = X
196
+ if hasattr(self, "_onedal_estimator"):
197
+ # oneDAL PCA was fit
198
+ return self.transform(X)
199
+ elif U is not None:
177
200
  # Scikit-learn PCA was fit
178
201
  U = U[:, : self.n_components_]
179
202
 
180
203
  if self.whiten:
181
- U *= sqrt(X.shape[0] - 1)
204
+ U *= sqrt(X_fit.shape[0] - 1)
182
205
  else:
183
206
  U *= S[: self.n_components_]
184
207
 
185
208
  return U
209
+ else:
210
+ # Scikit-learn PCA["covariance_eigh"] was fit
211
+ return self._transform(X_fit, xp, x_is_centered=x_is_centered)
186
212
 
187
213
  def _onedal_supported(self, method_name, X):
188
214
  class_name = self.__class__.__name__
@@ -200,7 +226,13 @@ if daal_check_version((2024, "P", 100)):
200
226
  ),
201
227
  (
202
228
  self._is_solver_compatible_with_onedal(shape_tuple),
203
- f"Only 'full' svd solver is supported.",
229
+ (
230
+ "Only 'covariance_eigh' and 'onedal_svd' "
231
+ "solvers are supported."
232
+ if sklearn_check_version("1.5")
233
+ else "Only 'full', 'covariance_eigh' and 'onedal_svd' "
234
+ "solvers are supported."
235
+ ),
204
236
  ),
205
237
  (not issparse(X), "oneDAL PCA does not support sparse data"),
206
238
  ]
@@ -255,7 +287,13 @@ if daal_check_version((2024, "P", 100)):
255
287
 
256
288
  if self._fit_svd_solver == "auto":
257
289
  if sklearn_check_version("1.1"):
258
- if max(shape_tuple) <= 500 or n_components == "mle":
290
+ if (
291
+ sklearn_check_version("1.5")
292
+ and shape_tuple[1] <= 1_000
293
+ and shape_tuple[0] >= 10 * shape_tuple[1]
294
+ ):
295
+ self._fit_svd_solver = "covariance_eigh"
296
+ elif max(shape_tuple) <= 500 or n_components == "mle":
259
297
  self._fit_svd_solver = "full"
260
298
  elif 1 <= n_components < 0.8 * n_sf_min:
261
299
  self._fit_svd_solver = "randomized"
@@ -289,7 +327,23 @@ if daal_check_version((2024, "P", 100)):
289
327
  else:
290
328
  self._fit_svd_solver = "full"
291
329
 
292
- if self._fit_svd_solver == "full":
330
+ # Use oneDAL in next cases:
331
+ # 1. oneDAL SVD solver is explicitly set
332
+ # 2. solver is set or dispatched to "covariance_eigh"
333
+ # 3. solver is set or dispatched to "full" and sklearn version < 1.5
334
+ # 4. solver is set to "auto" and dispatched to "full"
335
+ if self._fit_svd_solver in ["onedal_svd", "covariance_eigh"]:
336
+ return True
337
+ elif not sklearn_check_version("1.5") and self._fit_svd_solver == "full":
338
+ self._fit_svd_solver = "covariance_eigh"
339
+ return True
340
+ elif self.svd_solver == "auto" and self._fit_svd_solver == "full":
341
+ warn(
342
+ "Sklearnex always uses `covariance_eigh` solver instead of `full` "
343
+ "when `svd_solver` parameter is set to `auto` "
344
+ "for performance purposes."
345
+ )
346
+ self._fit_svd_solver = "covariance_eigh"
293
347
  return True
294
348
  else:
295
349
  return False
@@ -298,11 +352,9 @@ if daal_check_version((2024, "P", 100)):
298
352
  self.n_samples_ = self._onedal_estimator.n_samples_
299
353
  if sklearn_check_version("1.2"):
300
354
  self.n_features_in_ = self._onedal_estimator.n_features_
301
- elif sklearn_check_version("0.24"):
302
- self.n_features_ = self._onedal_estimator.n_features_
303
- self.n_features_in_ = self._onedal_estimator.n_features_
304
355
  else:
305
356
  self.n_features_ = self._onedal_estimator.n_features_
357
+ self.n_features_in_ = self._onedal_estimator.n_features_
306
358
  self.n_components_ = self._onedal_estimator.n_components_
307
359
  self.components_ = self._onedal_estimator.components_
308
360
  self.mean_ = self._onedal_estimator.mean_
@@ -41,10 +41,10 @@ def test_sklearnex_import(dataframe, queue):
41
41
  [3.6053038, 0.04224385],
42
42
  ]
43
43
 
44
- pca = PCA(n_components=2, svd_solver="full")
44
+ pca = PCA(n_components=2, svd_solver="covariance_eigh")
45
45
  pca.fit(X)
46
46
  X_transformed = pca.transform(X)
47
- X_fit_transformed = PCA(n_components=2, svd_solver="full").fit_transform(X)
47
+ X_fit_transformed = PCA(n_components=2, svd_solver="covariance_eigh").fit_transform(X)
48
48
 
49
49
  if daal_check_version((2024, "P", 100)):
50
50
  assert "sklearnex" in pca.__module__