scikit-learn-intelex 2024.3.0__py38-none-manylinux1_x86_64.whl → 2024.5.0__py38-none-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/METADATA +2 -2
- {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/RECORD +43 -37
- sklearnex/_device_offload.py +39 -5
- sklearnex/basic_statistics/__init__.py +2 -1
- sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
- sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +384 -0
- sklearnex/covariance/incremental_covariance.py +217 -30
- sklearnex/covariance/tests/test_incremental_covariance.py +54 -17
- sklearnex/decomposition/pca.py +71 -19
- sklearnex/decomposition/tests/test_pca.py +2 -2
- sklearnex/dispatcher.py +33 -2
- sklearnex/ensemble/_forest.py +73 -79
- sklearnex/linear_model/__init__.py +5 -3
- sklearnex/linear_model/incremental_linear.py +387 -0
- sklearnex/linear_model/linear.py +275 -340
- sklearnex/linear_model/logistic_regression.py +50 -9
- sklearnex/linear_model/tests/test_incremental_linear.py +200 -0
- sklearnex/linear_model/tests/test_linear.py +40 -5
- sklearnex/neighbors/_lof.py +53 -36
- sklearnex/neighbors/common.py +4 -1
- sklearnex/neighbors/knn_classification.py +37 -122
- sklearnex/neighbors/knn_regression.py +10 -117
- sklearnex/neighbors/knn_unsupervised.py +6 -78
- sklearnex/neighbors/tests/test_neighbors.py +2 -2
- sklearnex/preview/cluster/k_means.py +5 -73
- sklearnex/preview/covariance/covariance.py +6 -5
- sklearnex/preview/covariance/tests/test_covariance.py +18 -5
- sklearnex/svm/_common.py +4 -7
- sklearnex/svm/nusvc.py +66 -50
- sklearnex/svm/nusvr.py +3 -49
- sklearnex/svm/svc.py +66 -51
- sklearnex/svm/svr.py +3 -49
- sklearnex/tests/_utils.py +34 -16
- sklearnex/tests/test_memory_usage.py +5 -1
- sklearnex/tests/test_n_jobs_support.py +12 -2
- sklearnex/tests/test_patching.py +87 -58
- sklearnex/tests/test_run_to_run_stability_tests.py +1 -1
- sklearnex/utils/__init__.py +2 -1
- sklearnex/utils/_namespace.py +97 -0
- sklearnex/utils/tests/test_finite.py +89 -0
- {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/WHEEL +0 -0
- {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/top_level.txt +0 -0
|
@@ -14,18 +14,33 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
|
+
import numbers
|
|
18
|
+
import warnings
|
|
19
|
+
|
|
17
20
|
import numpy as np
|
|
21
|
+
from scipy import linalg
|
|
22
|
+
from sklearn.base import BaseEstimator
|
|
23
|
+
from sklearn.covariance import EmpiricalCovariance as sklearn_EmpiricalCovariance
|
|
18
24
|
from sklearn.utils import check_array, gen_batches
|
|
19
25
|
|
|
20
26
|
from daal4py.sklearn._n_jobs_support import control_n_jobs
|
|
27
|
+
from daal4py.sklearn._utils import daal_check_version, sklearn_check_version
|
|
21
28
|
from onedal._device_offload import support_usm_ndarray
|
|
22
29
|
from onedal.covariance import (
|
|
23
30
|
IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance,
|
|
24
31
|
)
|
|
32
|
+
from sklearnex import config_context
|
|
33
|
+
|
|
34
|
+
from .._device_offload import dispatch, wrap_output_data
|
|
35
|
+
from .._utils import PatchingConditionsChain, register_hyperparameters
|
|
36
|
+
from ..metrics import pairwise_distances
|
|
25
37
|
|
|
38
|
+
if sklearn_check_version("1.2"):
|
|
39
|
+
from sklearn.utils._param_validation import Interval
|
|
26
40
|
|
|
27
|
-
|
|
28
|
-
|
|
41
|
+
|
|
42
|
+
@control_n_jobs(decorated_methods=["partial_fit", "fit", "_onedal_finalize_fit"])
|
|
43
|
+
class IncrementalEmpiricalCovariance(BaseEstimator):
|
|
29
44
|
"""
|
|
30
45
|
Incremental estimator for covariance.
|
|
31
46
|
Allows to compute empirical covariance estimated by maximum
|
|
@@ -33,12 +48,25 @@ class IncrementalEmpiricalCovariance:
|
|
|
33
48
|
|
|
34
49
|
Parameters
|
|
35
50
|
----------
|
|
51
|
+
store_precision : bool, default=False
|
|
52
|
+
Specifies if the estimated precision is stored.
|
|
53
|
+
|
|
54
|
+
assume_centered : bool, default=False
|
|
55
|
+
If True, data are not centered before computation.
|
|
56
|
+
Useful when working with data whose mean is almost, but not exactly
|
|
57
|
+
zero.
|
|
58
|
+
If False (default), data are centered before computation.
|
|
59
|
+
|
|
36
60
|
batch_size : int, default=None
|
|
37
61
|
The number of samples to use for each batch. Only used when calling
|
|
38
62
|
``fit``. If ``batch_size`` is ``None``, then ``batch_size``
|
|
39
63
|
is inferred from the data and set to ``5 * n_features``, to provide a
|
|
40
64
|
balance between approximation accuracy and memory consumption.
|
|
41
65
|
|
|
66
|
+
copy : bool, default=True
|
|
67
|
+
If False, X will be overwritten. ``copy=False`` can be used to
|
|
68
|
+
save memory but is unsafe for general use.
|
|
69
|
+
|
|
42
70
|
Attributes
|
|
43
71
|
----------
|
|
44
72
|
location_ : ndarray of shape (n_features,)
|
|
@@ -46,44 +74,130 @@ class IncrementalEmpiricalCovariance:
|
|
|
46
74
|
|
|
47
75
|
covariance_ : ndarray of shape (n_features, n_features)
|
|
48
76
|
Estimated covariance matrix
|
|
77
|
+
|
|
78
|
+
n_samples_seen_ : int
|
|
79
|
+
The number of samples processed by the estimator. Will be reset on
|
|
80
|
+
new calls to fit, but increments across ``partial_fit`` calls.
|
|
81
|
+
|
|
82
|
+
batch_size_ : int
|
|
83
|
+
Inferred batch size from ``batch_size``.
|
|
84
|
+
|
|
85
|
+
n_features_in_ : int
|
|
86
|
+
Number of features seen during :term:`fit` `partial_fit`.
|
|
49
87
|
"""
|
|
50
88
|
|
|
51
89
|
_onedal_incremental_covariance = staticmethod(onedal_IncrementalEmpiricalCovariance)
|
|
52
90
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
91
|
+
if sklearn_check_version("1.2"):
|
|
92
|
+
_parameter_constraints: dict = {
|
|
93
|
+
"store_precision": ["boolean"],
|
|
94
|
+
"assume_centered": ["boolean"],
|
|
95
|
+
"batch_size": [Interval(numbers.Integral, 1, None, closed="left"), None],
|
|
96
|
+
"copy": ["boolean"],
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
get_precision = sklearn_EmpiricalCovariance.get_precision
|
|
100
|
+
error_norm = wrap_output_data(sklearn_EmpiricalCovariance.error_norm)
|
|
101
|
+
score = wrap_output_data(sklearn_EmpiricalCovariance.score)
|
|
102
|
+
|
|
103
|
+
def __init__(
|
|
104
|
+
self, *, store_precision=False, assume_centered=False, batch_size=None, copy=True
|
|
105
|
+
):
|
|
106
|
+
self.assume_centered = assume_centered
|
|
107
|
+
self.store_precision = store_precision
|
|
56
108
|
self.batch_size = batch_size
|
|
109
|
+
self.copy = copy
|
|
110
|
+
|
|
111
|
+
def _onedal_supported(self, method_name, *data):
|
|
112
|
+
patching_status = PatchingConditionsChain(
|
|
113
|
+
f"sklearn.covariance.{self.__class__.__name__}.{method_name}"
|
|
114
|
+
)
|
|
115
|
+
return patching_status
|
|
57
116
|
|
|
58
117
|
def _onedal_finalize_fit(self):
|
|
59
118
|
assert hasattr(self, "_onedal_estimator")
|
|
60
119
|
self._onedal_estimator.finalize_fit()
|
|
61
120
|
self._need_to_finalize = False
|
|
62
121
|
|
|
63
|
-
|
|
122
|
+
if not daal_check_version((2024, "P", 400)) and self.assume_centered:
|
|
123
|
+
location = self._onedal_estimator.location_[None, :]
|
|
124
|
+
self._onedal_estimator.covariance_ += np.dot(location.T, location)
|
|
125
|
+
self._onedal_estimator.location_ = np.zeros_like(np.squeeze(location))
|
|
126
|
+
if self.store_precision:
|
|
127
|
+
self.precision_ = linalg.pinvh(
|
|
128
|
+
self._onedal_estimator.covariance_, check_finite=False
|
|
129
|
+
)
|
|
130
|
+
else:
|
|
131
|
+
self.precision_ = None
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def covariance_(self):
|
|
135
|
+
if hasattr(self, "_onedal_estimator"):
|
|
136
|
+
if self._need_to_finalize:
|
|
137
|
+
self._onedal_finalize_fit()
|
|
138
|
+
return self._onedal_estimator.covariance_
|
|
139
|
+
else:
|
|
140
|
+
raise AttributeError(
|
|
141
|
+
f"'{self.__class__.__name__}' object has no attribute 'covariance_'"
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def location_(self):
|
|
146
|
+
if hasattr(self, "_onedal_estimator"):
|
|
147
|
+
if self._need_to_finalize:
|
|
148
|
+
self._onedal_finalize_fit()
|
|
149
|
+
return self._onedal_estimator.location_
|
|
150
|
+
else:
|
|
151
|
+
raise AttributeError(
|
|
152
|
+
f"'{self.__class__.__name__}' object has no attribute 'location_'"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def _onedal_partial_fit(self, X, queue=None, check_input=True):
|
|
156
|
+
|
|
157
|
+
first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0
|
|
158
|
+
|
|
159
|
+
# finite check occurs on onedal side
|
|
160
|
+
if check_input:
|
|
161
|
+
if sklearn_check_version("1.2"):
|
|
162
|
+
self._validate_params()
|
|
163
|
+
|
|
164
|
+
if sklearn_check_version("1.0"):
|
|
165
|
+
X = self._validate_data(
|
|
166
|
+
X,
|
|
167
|
+
dtype=[np.float64, np.float32],
|
|
168
|
+
reset=first_pass,
|
|
169
|
+
copy=self.copy,
|
|
170
|
+
force_all_finite=False,
|
|
171
|
+
)
|
|
172
|
+
else:
|
|
173
|
+
X = check_array(
|
|
174
|
+
X,
|
|
175
|
+
dtype=[np.float64, np.float32],
|
|
176
|
+
copy=self.copy,
|
|
177
|
+
force_all_finite=False,
|
|
178
|
+
)
|
|
179
|
+
|
|
64
180
|
onedal_params = {
|
|
65
181
|
"method": "dense",
|
|
66
182
|
"bias": True,
|
|
183
|
+
"assume_centered": self.assume_centered,
|
|
67
184
|
}
|
|
68
185
|
if not hasattr(self, "_onedal_estimator"):
|
|
69
186
|
self._onedal_estimator = self._onedal_incremental_covariance(**onedal_params)
|
|
70
|
-
|
|
71
|
-
|
|
187
|
+
try:
|
|
188
|
+
if first_pass:
|
|
189
|
+
self.n_samples_seen_ = X.shape[0]
|
|
190
|
+
self.n_features_in_ = X.shape[1]
|
|
191
|
+
else:
|
|
192
|
+
self.n_samples_seen_ += X.shape[0]
|
|
72
193
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
self._onedal_finalize_fit()
|
|
77
|
-
return self._onedal_estimator.covariance_
|
|
194
|
+
self._onedal_estimator.partial_fit(X, queue)
|
|
195
|
+
finally:
|
|
196
|
+
self._need_to_finalize = True
|
|
78
197
|
|
|
79
|
-
|
|
80
|
-
def location_(self):
|
|
81
|
-
if self._need_to_finalize:
|
|
82
|
-
self._onedal_finalize_fit()
|
|
83
|
-
return self._onedal_estimator.location_
|
|
198
|
+
return self
|
|
84
199
|
|
|
85
|
-
|
|
86
|
-
def partial_fit(self, X, queue=None):
|
|
200
|
+
def partial_fit(self, X, y=None, check_input=True):
|
|
87
201
|
"""
|
|
88
202
|
Incremental fit with X. All of X is processed as a single batch.
|
|
89
203
|
|
|
@@ -93,16 +207,29 @@ class IncrementalEmpiricalCovariance:
|
|
|
93
207
|
Training data, where `n_samples` is the number of samples and
|
|
94
208
|
`n_features` is the number of features.
|
|
95
209
|
|
|
210
|
+
y : Ignored
|
|
211
|
+
Not used, present for API consistency by convention.
|
|
212
|
+
|
|
213
|
+
check_input : bool, default=True
|
|
214
|
+
Run check_array on X.
|
|
215
|
+
|
|
96
216
|
Returns
|
|
97
217
|
-------
|
|
98
218
|
self : object
|
|
99
219
|
Returns the instance itself.
|
|
100
220
|
"""
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
221
|
+
return dispatch(
|
|
222
|
+
self,
|
|
223
|
+
"partial_fit",
|
|
224
|
+
{
|
|
225
|
+
"onedal": self.__class__._onedal_partial_fit,
|
|
226
|
+
"sklearn": None,
|
|
227
|
+
},
|
|
228
|
+
X,
|
|
229
|
+
check_input=check_input,
|
|
230
|
+
)
|
|
104
231
|
|
|
105
|
-
def fit(self, X,
|
|
232
|
+
def fit(self, X, y=None):
|
|
106
233
|
"""
|
|
107
234
|
Fit the model with X, using minibatches of size batch_size.
|
|
108
235
|
|
|
@@ -112,19 +239,79 @@ class IncrementalEmpiricalCovariance:
|
|
|
112
239
|
Training data, where `n_samples` is the number of samples and
|
|
113
240
|
`n_features` is the number of features.
|
|
114
241
|
|
|
242
|
+
y : Ignored
|
|
243
|
+
Not used, present for API consistency by convention.
|
|
244
|
+
|
|
115
245
|
Returns
|
|
116
246
|
-------
|
|
117
247
|
self : object
|
|
118
248
|
Returns the instance itself.
|
|
119
249
|
"""
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
250
|
+
|
|
251
|
+
return dispatch(
|
|
252
|
+
self,
|
|
253
|
+
"fit",
|
|
254
|
+
{
|
|
255
|
+
"onedal": self.__class__._onedal_fit,
|
|
256
|
+
"sklearn": None,
|
|
257
|
+
},
|
|
258
|
+
X,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
def _onedal_fit(self, X, queue=None):
|
|
262
|
+
self.n_samples_seen_ = 0
|
|
263
|
+
if hasattr(self, "_onedal_estimator"):
|
|
264
|
+
self._onedal_estimator._reset()
|
|
265
|
+
|
|
266
|
+
if sklearn_check_version("1.2"):
|
|
267
|
+
self._validate_params()
|
|
268
|
+
|
|
269
|
+
# finite check occurs on onedal side
|
|
270
|
+
if sklearn_check_version("1.0"):
|
|
271
|
+
X = self._validate_data(
|
|
272
|
+
X, dtype=[np.float64, np.float32], copy=self.copy, force_all_finite=False
|
|
273
|
+
)
|
|
123
274
|
else:
|
|
124
|
-
|
|
125
|
-
|
|
275
|
+
X = check_array(
|
|
276
|
+
X, dtype=[np.float64, np.float32], copy=self.copy, force_all_finite=False
|
|
277
|
+
)
|
|
278
|
+
self.n_features_in_ = X.shape[1]
|
|
279
|
+
|
|
280
|
+
self.batch_size_ = self.batch_size if self.batch_size else 5 * self.n_features_in_
|
|
281
|
+
|
|
282
|
+
if X.shape[0] == 1:
|
|
283
|
+
warnings.warn(
|
|
284
|
+
"Only one sample available. You may want to reshape your data array"
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
for batch in gen_batches(X.shape[0], self.batch_size_):
|
|
126
288
|
X_batch = X[batch]
|
|
127
|
-
self.
|
|
289
|
+
self._onedal_partial_fit(X_batch, queue=queue, check_input=False)
|
|
128
290
|
|
|
129
291
|
self._onedal_finalize_fit()
|
|
292
|
+
|
|
130
293
|
return self
|
|
294
|
+
|
|
295
|
+
# expose sklearnex pairwise_distances if mahalanobis distance eventually supported
|
|
296
|
+
@wrap_output_data
|
|
297
|
+
def mahalanobis(self, X):
|
|
298
|
+
if sklearn_check_version("1.0"):
|
|
299
|
+
self._validate_data(X, reset=False, copy=self.copy)
|
|
300
|
+
else:
|
|
301
|
+
check_array(X, copy=self.copy)
|
|
302
|
+
|
|
303
|
+
precision = self.get_precision()
|
|
304
|
+
with config_context(assume_finite=True):
|
|
305
|
+
# compute mahalanobis distances
|
|
306
|
+
dist = pairwise_distances(
|
|
307
|
+
X, self.location_[np.newaxis, :], metric="mahalanobis", VI=precision
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
return np.reshape(dist, (len(X),)) ** 2
|
|
311
|
+
|
|
312
|
+
_onedal_cpu_supported = _onedal_supported
|
|
313
|
+
_onedal_gpu_supported = _onedal_supported
|
|
314
|
+
|
|
315
|
+
mahalanobis.__doc__ = sklearn_EmpiricalCovariance.mahalanobis.__doc__
|
|
316
|
+
error_norm.__doc__ = sklearn_EmpiricalCovariance.error_norm.__doc__
|
|
317
|
+
score.__doc__ = sklearn_EmpiricalCovariance.score.__doc__
|
|
@@ -17,6 +17,10 @@
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
import pytest
|
|
19
19
|
from numpy.testing import assert_allclose
|
|
20
|
+
from sklearn.covariance.tests.test_covariance import (
|
|
21
|
+
test_covariance,
|
|
22
|
+
test_EmpiricalCovariance_validates_mahalanobis,
|
|
23
|
+
)
|
|
20
24
|
|
|
21
25
|
from onedal.tests.utils._dataframes_support import (
|
|
22
26
|
_convert_to_dataframe,
|
|
@@ -26,13 +30,14 @@ from onedal.tests.utils._dataframes_support import (
|
|
|
26
30
|
|
|
27
31
|
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
28
32
|
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
29
|
-
|
|
33
|
+
@pytest.mark.parametrize("assume_centered", [True, False])
|
|
34
|
+
def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype, assume_centered):
|
|
30
35
|
from sklearnex.covariance import IncrementalEmpiricalCovariance
|
|
31
36
|
|
|
32
37
|
X = np.array([[0, 1], [0, 1]])
|
|
33
38
|
X = X.astype(dtype)
|
|
34
39
|
X_split = np.array_split(X, 2)
|
|
35
|
-
inccov = IncrementalEmpiricalCovariance()
|
|
40
|
+
inccov = IncrementalEmpiricalCovariance(assume_centered=assume_centered)
|
|
36
41
|
|
|
37
42
|
for i in range(2):
|
|
38
43
|
X_split_df = _convert_to_dataframe(
|
|
@@ -40,8 +45,12 @@ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype):
|
|
|
40
45
|
)
|
|
41
46
|
result = inccov.partial_fit(X_split_df)
|
|
42
47
|
|
|
43
|
-
|
|
44
|
-
|
|
48
|
+
if assume_centered:
|
|
49
|
+
expected_covariance = np.array([[0, 0], [0, 1]])
|
|
50
|
+
expected_means = np.array([0, 0])
|
|
51
|
+
else:
|
|
52
|
+
expected_covariance = np.array([[0, 0], [0, 0]])
|
|
53
|
+
expected_means = np.array([0, 1])
|
|
45
54
|
|
|
46
55
|
assert_allclose(expected_covariance, result.covariance_)
|
|
47
56
|
assert_allclose(expected_means, result.location_)
|
|
@@ -49,7 +58,7 @@ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype):
|
|
|
49
58
|
X = np.array([[1, 2], [3, 6]])
|
|
50
59
|
X = X.astype(dtype)
|
|
51
60
|
X_split = np.array_split(X, 2)
|
|
52
|
-
inccov = IncrementalEmpiricalCovariance()
|
|
61
|
+
inccov = IncrementalEmpiricalCovariance(assume_centered=assume_centered)
|
|
53
62
|
|
|
54
63
|
for i in range(2):
|
|
55
64
|
X_split_df = _convert_to_dataframe(
|
|
@@ -57,8 +66,12 @@ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype):
|
|
|
57
66
|
)
|
|
58
67
|
result = inccov.partial_fit(X_split_df)
|
|
59
68
|
|
|
60
|
-
|
|
61
|
-
|
|
69
|
+
if assume_centered:
|
|
70
|
+
expected_covariance = np.array([[5, 10], [10, 20]])
|
|
71
|
+
expected_means = np.array([0, 0])
|
|
72
|
+
else:
|
|
73
|
+
expected_covariance = np.array([[1, 2], [2, 4]])
|
|
74
|
+
expected_means = np.array([2, 4])
|
|
62
75
|
|
|
63
76
|
assert_allclose(expected_covariance, result.covariance_)
|
|
64
77
|
assert_allclose(expected_means, result.location_)
|
|
@@ -87,9 +100,9 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, batch_size, dtype):
|
|
|
87
100
|
|
|
88
101
|
|
|
89
102
|
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
90
|
-
@pytest.mark.parametrize("num_batches", [2,
|
|
91
|
-
@pytest.mark.parametrize("row_count", [100, 1000
|
|
92
|
-
@pytest.mark.parametrize("column_count", [10, 100
|
|
103
|
+
@pytest.mark.parametrize("num_batches", [2, 10])
|
|
104
|
+
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
105
|
+
@pytest.mark.parametrize("column_count", [10, 100])
|
|
93
106
|
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
94
107
|
def test_sklearnex_partial_fit_on_random_data(
|
|
95
108
|
dataframe, queue, num_batches, row_count, column_count, dtype
|
|
@@ -117,12 +130,13 @@ def test_sklearnex_partial_fit_on_random_data(
|
|
|
117
130
|
|
|
118
131
|
|
|
119
132
|
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
120
|
-
@pytest.mark.parametrize("num_batches", [2,
|
|
121
|
-
@pytest.mark.parametrize("row_count", [100, 1000
|
|
122
|
-
@pytest.mark.parametrize("column_count", [10, 100
|
|
133
|
+
@pytest.mark.parametrize("num_batches", [2, 10])
|
|
134
|
+
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
135
|
+
@pytest.mark.parametrize("column_count", [10, 100])
|
|
123
136
|
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
137
|
+
@pytest.mark.parametrize("assume_centered", [True, False])
|
|
124
138
|
def test_sklearnex_fit_on_random_data(
|
|
125
|
-
dataframe, queue, num_batches, row_count, column_count, dtype
|
|
139
|
+
dataframe, queue, num_batches, row_count, column_count, dtype, assume_centered
|
|
126
140
|
):
|
|
127
141
|
from sklearnex.covariance import IncrementalEmpiricalCovariance
|
|
128
142
|
|
|
@@ -132,12 +146,35 @@ def test_sklearnex_fit_on_random_data(
|
|
|
132
146
|
X = X.astype(dtype)
|
|
133
147
|
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
134
148
|
batch_size = row_count // num_batches
|
|
135
|
-
inccov = IncrementalEmpiricalCovariance(
|
|
149
|
+
inccov = IncrementalEmpiricalCovariance(
|
|
150
|
+
batch_size=batch_size, assume_centered=assume_centered
|
|
151
|
+
)
|
|
136
152
|
|
|
137
153
|
result = inccov.fit(X_df)
|
|
138
154
|
|
|
139
|
-
|
|
140
|
-
|
|
155
|
+
if assume_centered:
|
|
156
|
+
expected_covariance = np.dot(X.T, X) / X.shape[0]
|
|
157
|
+
expected_means = np.zeros_like(X[0])
|
|
158
|
+
else:
|
|
159
|
+
expected_covariance = np.cov(X.T, bias=1)
|
|
160
|
+
expected_means = np.mean(X, axis=0)
|
|
141
161
|
|
|
142
162
|
assert_allclose(expected_covariance, result.covariance_, atol=1e-6)
|
|
143
163
|
assert_allclose(expected_means, result.location_, atol=1e-6)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# Monkeypatch IncrementalEmpiricalCovariance into relevant sklearn.covariance tests
|
|
167
|
+
@pytest.mark.allow_sklearn_fallback
|
|
168
|
+
@pytest.mark.parametrize(
|
|
169
|
+
"sklearn_test",
|
|
170
|
+
[
|
|
171
|
+
test_covariance,
|
|
172
|
+
test_EmpiricalCovariance_validates_mahalanobis,
|
|
173
|
+
],
|
|
174
|
+
)
|
|
175
|
+
def test_IncrementalEmpiricalCovariance_against_sklearn(monkeypatch, sklearn_test):
|
|
176
|
+
from sklearnex.covariance import IncrementalEmpiricalCovariance
|
|
177
|
+
|
|
178
|
+
class_name = ".".join([sklearn_test.__module__, "EmpiricalCovariance"])
|
|
179
|
+
monkeypatch.setattr(class_name, IncrementalEmpiricalCovariance)
|
|
180
|
+
sklearn_test()
|
sklearnex/decomposition/pca.py
CHANGED
|
@@ -21,6 +21,7 @@ from daal4py.sklearn._utils import daal_check_version
|
|
|
21
21
|
if daal_check_version((2024, "P", 100)):
|
|
22
22
|
import numbers
|
|
23
23
|
from math import sqrt
|
|
24
|
+
from warnings import warn
|
|
24
25
|
|
|
25
26
|
import numpy as np
|
|
26
27
|
from scipy.sparse import issparse
|
|
@@ -35,9 +36,13 @@ if daal_check_version((2024, "P", 100)):
|
|
|
35
36
|
if sklearn_check_version("1.1") and not sklearn_check_version("1.2"):
|
|
36
37
|
from sklearn.utils import check_scalar
|
|
37
38
|
|
|
39
|
+
if sklearn_check_version("1.2"):
|
|
40
|
+
from sklearn.utils._param_validation import StrOptions
|
|
41
|
+
|
|
38
42
|
from sklearn.decomposition import PCA as sklearn_PCA
|
|
39
43
|
|
|
40
44
|
from onedal.decomposition import PCA as onedal_PCA
|
|
45
|
+
from sklearnex.utils import get_namespace
|
|
41
46
|
|
|
42
47
|
@control_n_jobs(decorated_methods=["fit", "transform", "fit_transform"])
|
|
43
48
|
class PCA(sklearn_PCA):
|
|
@@ -45,6 +50,16 @@ if daal_check_version((2024, "P", 100)):
|
|
|
45
50
|
|
|
46
51
|
if sklearn_check_version("1.2"):
|
|
47
52
|
_parameter_constraints: dict = {**sklearn_PCA._parameter_constraints}
|
|
53
|
+
# "onedal_svd" solver uses oneDAL's PCA-SVD algorithm
|
|
54
|
+
# and required for testing purposes to fully enable it in future.
|
|
55
|
+
# "covariance_eigh" solver is added for ability to explicitly request
|
|
56
|
+
# oneDAL's PCA-Covariance algorithm using any sklearn version < 1.5.
|
|
57
|
+
_parameter_constraints["svd_solver"] = [
|
|
58
|
+
StrOptions(
|
|
59
|
+
_parameter_constraints["svd_solver"][0].options
|
|
60
|
+
| {"onedal_svd", "covariance_eigh"}
|
|
61
|
+
)
|
|
62
|
+
]
|
|
48
63
|
|
|
49
64
|
if sklearn_check_version("1.1"):
|
|
50
65
|
|
|
@@ -95,6 +110,7 @@ if daal_check_version((2024, "P", 100)):
|
|
|
95
110
|
self._fit(X)
|
|
96
111
|
return self
|
|
97
112
|
|
|
113
|
+
@wrap_output_data
|
|
98
114
|
def _fit(self, X):
|
|
99
115
|
if sklearn_check_version("1.2"):
|
|
100
116
|
self._validate_params()
|
|
@@ -106,7 +122,7 @@ if daal_check_version((2024, "P", 100)):
|
|
|
106
122
|
target_type=numbers.Integral,
|
|
107
123
|
)
|
|
108
124
|
|
|
109
|
-
|
|
125
|
+
return dispatch(
|
|
110
126
|
self,
|
|
111
127
|
"fit",
|
|
112
128
|
{
|
|
@@ -115,7 +131,6 @@ if daal_check_version((2024, "P", 100)):
|
|
|
115
131
|
},
|
|
116
132
|
X,
|
|
117
133
|
)
|
|
118
|
-
return U, S, Vt
|
|
119
134
|
|
|
120
135
|
def _onedal_fit(self, X, queue=None):
|
|
121
136
|
X = self._validate_data(
|
|
@@ -128,7 +143,7 @@ if daal_check_version((2024, "P", 100)):
|
|
|
128
143
|
onedal_params = {
|
|
129
144
|
"n_components": self.n_components,
|
|
130
145
|
"is_deterministic": True,
|
|
131
|
-
"method": "cov",
|
|
146
|
+
"method": "svd" if self._fit_svd_solver == "onedal_svd" else "cov",
|
|
132
147
|
"whiten": self.whiten,
|
|
133
148
|
}
|
|
134
149
|
self._onedal_estimator = onedal_PCA(**onedal_params)
|
|
@@ -139,7 +154,13 @@ if daal_check_version((2024, "P", 100)):
|
|
|
139
154
|
S = self.singular_values_
|
|
140
155
|
Vt = self.components_
|
|
141
156
|
|
|
142
|
-
|
|
157
|
+
if sklearn_check_version("1.5"):
|
|
158
|
+
xp, _ = get_namespace(X)
|
|
159
|
+
x_is_centered = not self.copy
|
|
160
|
+
|
|
161
|
+
return U, S, Vt, X, x_is_centered, xp
|
|
162
|
+
else:
|
|
163
|
+
return U, S, Vt
|
|
143
164
|
|
|
144
165
|
@wrap_output_data
|
|
145
166
|
def transform(self, X):
|
|
@@ -155,34 +176,39 @@ if daal_check_version((2024, "P", 100)):
|
|
|
155
176
|
|
|
156
177
|
def _onedal_transform(self, X, queue=None):
|
|
157
178
|
check_is_fitted(self)
|
|
179
|
+
if sklearn_check_version("1.0"):
|
|
180
|
+
self._check_feature_names(X, reset=False)
|
|
158
181
|
X = self._validate_data(
|
|
159
182
|
X,
|
|
160
183
|
dtype=[np.float64, np.float32],
|
|
161
184
|
reset=False,
|
|
162
185
|
)
|
|
163
186
|
self._validate_n_features_in_after_fitting(X)
|
|
164
|
-
if sklearn_check_version("1.0"):
|
|
165
|
-
self._check_feature_names(X, reset=False)
|
|
166
187
|
|
|
167
188
|
return self._onedal_estimator.predict(X, queue=queue)
|
|
168
189
|
|
|
169
|
-
@wrap_output_data
|
|
170
190
|
def fit_transform(self, X, y=None):
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
# oneDAL PCA was fit
|
|
174
|
-
X_transformed = self._onedal_transform(X)
|
|
175
|
-
return X_transformed
|
|
191
|
+
if sklearn_check_version("1.5"):
|
|
192
|
+
U, S, Vt, X_fit, x_is_centered, xp = self._fit(X)
|
|
176
193
|
else:
|
|
194
|
+
U, S, Vt = self._fit(X)
|
|
195
|
+
X_fit = X
|
|
196
|
+
if hasattr(self, "_onedal_estimator"):
|
|
197
|
+
# oneDAL PCA was fit
|
|
198
|
+
return self.transform(X)
|
|
199
|
+
elif U is not None:
|
|
177
200
|
# Scikit-learn PCA was fit
|
|
178
201
|
U = U[:, : self.n_components_]
|
|
179
202
|
|
|
180
203
|
if self.whiten:
|
|
181
|
-
U *= sqrt(
|
|
204
|
+
U *= sqrt(X_fit.shape[0] - 1)
|
|
182
205
|
else:
|
|
183
206
|
U *= S[: self.n_components_]
|
|
184
207
|
|
|
185
208
|
return U
|
|
209
|
+
else:
|
|
210
|
+
# Scikit-learn PCA["covariance_eigh"] was fit
|
|
211
|
+
return self._transform(X_fit, xp, x_is_centered=x_is_centered)
|
|
186
212
|
|
|
187
213
|
def _onedal_supported(self, method_name, X):
|
|
188
214
|
class_name = self.__class__.__name__
|
|
@@ -200,7 +226,13 @@ if daal_check_version((2024, "P", 100)):
|
|
|
200
226
|
),
|
|
201
227
|
(
|
|
202
228
|
self._is_solver_compatible_with_onedal(shape_tuple),
|
|
203
|
-
|
|
229
|
+
(
|
|
230
|
+
"Only 'covariance_eigh' and 'onedal_svd' "
|
|
231
|
+
"solvers are supported."
|
|
232
|
+
if sklearn_check_version("1.5")
|
|
233
|
+
else "Only 'full', 'covariance_eigh' and 'onedal_svd' "
|
|
234
|
+
"solvers are supported."
|
|
235
|
+
),
|
|
204
236
|
),
|
|
205
237
|
(not issparse(X), "oneDAL PCA does not support sparse data"),
|
|
206
238
|
]
|
|
@@ -255,7 +287,13 @@ if daal_check_version((2024, "P", 100)):
|
|
|
255
287
|
|
|
256
288
|
if self._fit_svd_solver == "auto":
|
|
257
289
|
if sklearn_check_version("1.1"):
|
|
258
|
-
if
|
|
290
|
+
if (
|
|
291
|
+
sklearn_check_version("1.5")
|
|
292
|
+
and shape_tuple[1] <= 1_000
|
|
293
|
+
and shape_tuple[0] >= 10 * shape_tuple[1]
|
|
294
|
+
):
|
|
295
|
+
self._fit_svd_solver = "covariance_eigh"
|
|
296
|
+
elif max(shape_tuple) <= 500 or n_components == "mle":
|
|
259
297
|
self._fit_svd_solver = "full"
|
|
260
298
|
elif 1 <= n_components < 0.8 * n_sf_min:
|
|
261
299
|
self._fit_svd_solver = "randomized"
|
|
@@ -289,7 +327,23 @@ if daal_check_version((2024, "P", 100)):
|
|
|
289
327
|
else:
|
|
290
328
|
self._fit_svd_solver = "full"
|
|
291
329
|
|
|
292
|
-
|
|
330
|
+
# Use oneDAL in next cases:
|
|
331
|
+
# 1. oneDAL SVD solver is explicitly set
|
|
332
|
+
# 2. solver is set or dispatched to "covariance_eigh"
|
|
333
|
+
# 3. solver is set or dispatched to "full" and sklearn version < 1.5
|
|
334
|
+
# 4. solver is set to "auto" and dispatched to "full"
|
|
335
|
+
if self._fit_svd_solver in ["onedal_svd", "covariance_eigh"]:
|
|
336
|
+
return True
|
|
337
|
+
elif not sklearn_check_version("1.5") and self._fit_svd_solver == "full":
|
|
338
|
+
self._fit_svd_solver = "covariance_eigh"
|
|
339
|
+
return True
|
|
340
|
+
elif self.svd_solver == "auto" and self._fit_svd_solver == "full":
|
|
341
|
+
warn(
|
|
342
|
+
"Sklearnex always uses `covariance_eigh` solver instead of `full` "
|
|
343
|
+
"when `svd_solver` parameter is set to `auto` "
|
|
344
|
+
"for performance purposes."
|
|
345
|
+
)
|
|
346
|
+
self._fit_svd_solver = "covariance_eigh"
|
|
293
347
|
return True
|
|
294
348
|
else:
|
|
295
349
|
return False
|
|
@@ -298,11 +352,9 @@ if daal_check_version((2024, "P", 100)):
|
|
|
298
352
|
self.n_samples_ = self._onedal_estimator.n_samples_
|
|
299
353
|
if sklearn_check_version("1.2"):
|
|
300
354
|
self.n_features_in_ = self._onedal_estimator.n_features_
|
|
301
|
-
elif sklearn_check_version("0.24"):
|
|
302
|
-
self.n_features_ = self._onedal_estimator.n_features_
|
|
303
|
-
self.n_features_in_ = self._onedal_estimator.n_features_
|
|
304
355
|
else:
|
|
305
356
|
self.n_features_ = self._onedal_estimator.n_features_
|
|
357
|
+
self.n_features_in_ = self._onedal_estimator.n_features_
|
|
306
358
|
self.n_components_ = self._onedal_estimator.n_components_
|
|
307
359
|
self.components_ = self._onedal_estimator.components_
|
|
308
360
|
self.mean_ = self._onedal_estimator.mean_
|
|
@@ -41,10 +41,10 @@ def test_sklearnex_import(dataframe, queue):
|
|
|
41
41
|
[3.6053038, 0.04224385],
|
|
42
42
|
]
|
|
43
43
|
|
|
44
|
-
pca = PCA(n_components=2, svd_solver="
|
|
44
|
+
pca = PCA(n_components=2, svd_solver="covariance_eigh")
|
|
45
45
|
pca.fit(X)
|
|
46
46
|
X_transformed = pca.transform(X)
|
|
47
|
-
X_fit_transformed = PCA(n_components=2, svd_solver="
|
|
47
|
+
X_fit_transformed = PCA(n_components=2, svd_solver="covariance_eigh").fit_transform(X)
|
|
48
48
|
|
|
49
49
|
if daal_check_version((2024, "P", 100)):
|
|
50
50
|
assert "sklearnex" in pca.__module__
|