scikit-learn-intelex 2024.4.0__py39-none-manylinux1_x86_64.whl → 2024.5.0__py39-none-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/METADATA +2 -2
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/RECORD +25 -22
- sklearnex/_device_offload.py +8 -1
- sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +2 -4
- sklearnex/covariance/incremental_covariance.py +217 -30
- sklearnex/covariance/tests/test_incremental_covariance.py +54 -17
- sklearnex/decomposition/pca.py +68 -13
- sklearnex/decomposition/tests/test_pca.py +2 -2
- sklearnex/dispatcher.py +31 -0
- sklearnex/ensemble/_forest.py +5 -4
- sklearnex/linear_model/__init__.py +5 -3
- sklearnex/linear_model/incremental_linear.py +387 -0
- sklearnex/linear_model/linear.py +2 -2
- sklearnex/linear_model/tests/test_incremental_linear.py +200 -0
- sklearnex/linear_model/tests/test_linear.py +2 -2
- sklearnex/neighbors/tests/test_neighbors.py +2 -2
- sklearnex/tests/_utils.py +21 -12
- sklearnex/tests/test_memory_usage.py +5 -1
- sklearnex/tests/test_n_jobs_support.py +4 -0
- sklearnex/tests/test_patching.py +27 -8
- sklearnex/tests/test_run_to_run_stability_tests.py +1 -1
- sklearnex/utils/tests/test_finite.py +89 -0
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/WHEEL +0 -0
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/top_level.txt +0 -0
sklearnex/decomposition/pca.py
CHANGED
|
@@ -21,6 +21,7 @@ from daal4py.sklearn._utils import daal_check_version
|
|
|
21
21
|
if daal_check_version((2024, "P", 100)):
|
|
22
22
|
import numbers
|
|
23
23
|
from math import sqrt
|
|
24
|
+
from warnings import warn
|
|
24
25
|
|
|
25
26
|
import numpy as np
|
|
26
27
|
from scipy.sparse import issparse
|
|
@@ -35,9 +36,13 @@ if daal_check_version((2024, "P", 100)):
|
|
|
35
36
|
if sklearn_check_version("1.1") and not sklearn_check_version("1.2"):
|
|
36
37
|
from sklearn.utils import check_scalar
|
|
37
38
|
|
|
39
|
+
if sklearn_check_version("1.2"):
|
|
40
|
+
from sklearn.utils._param_validation import StrOptions
|
|
41
|
+
|
|
38
42
|
from sklearn.decomposition import PCA as sklearn_PCA
|
|
39
43
|
|
|
40
44
|
from onedal.decomposition import PCA as onedal_PCA
|
|
45
|
+
from sklearnex.utils import get_namespace
|
|
41
46
|
|
|
42
47
|
@control_n_jobs(decorated_methods=["fit", "transform", "fit_transform"])
|
|
43
48
|
class PCA(sklearn_PCA):
|
|
@@ -45,6 +50,16 @@ if daal_check_version((2024, "P", 100)):
|
|
|
45
50
|
|
|
46
51
|
if sklearn_check_version("1.2"):
|
|
47
52
|
_parameter_constraints: dict = {**sklearn_PCA._parameter_constraints}
|
|
53
|
+
# "onedal_svd" solver uses oneDAL's PCA-SVD algorithm
|
|
54
|
+
# and required for testing purposes to fully enable it in future.
|
|
55
|
+
# "covariance_eigh" solver is added for ability to explicitly request
|
|
56
|
+
# oneDAL's PCA-Covariance algorithm using any sklearn version < 1.5.
|
|
57
|
+
_parameter_constraints["svd_solver"] = [
|
|
58
|
+
StrOptions(
|
|
59
|
+
_parameter_constraints["svd_solver"][0].options
|
|
60
|
+
| {"onedal_svd", "covariance_eigh"}
|
|
61
|
+
)
|
|
62
|
+
]
|
|
48
63
|
|
|
49
64
|
if sklearn_check_version("1.1"):
|
|
50
65
|
|
|
@@ -107,7 +122,7 @@ if daal_check_version((2024, "P", 100)):
|
|
|
107
122
|
target_type=numbers.Integral,
|
|
108
123
|
)
|
|
109
124
|
|
|
110
|
-
|
|
125
|
+
return dispatch(
|
|
111
126
|
self,
|
|
112
127
|
"fit",
|
|
113
128
|
{
|
|
@@ -116,7 +131,6 @@ if daal_check_version((2024, "P", 100)):
|
|
|
116
131
|
},
|
|
117
132
|
X,
|
|
118
133
|
)
|
|
119
|
-
return U, S, Vt
|
|
120
134
|
|
|
121
135
|
def _onedal_fit(self, X, queue=None):
|
|
122
136
|
X = self._validate_data(
|
|
@@ -129,7 +143,7 @@ if daal_check_version((2024, "P", 100)):
|
|
|
129
143
|
onedal_params = {
|
|
130
144
|
"n_components": self.n_components,
|
|
131
145
|
"is_deterministic": True,
|
|
132
|
-
"method": "cov",
|
|
146
|
+
"method": "svd" if self._fit_svd_solver == "onedal_svd" else "cov",
|
|
133
147
|
"whiten": self.whiten,
|
|
134
148
|
}
|
|
135
149
|
self._onedal_estimator = onedal_PCA(**onedal_params)
|
|
@@ -140,7 +154,13 @@ if daal_check_version((2024, "P", 100)):
|
|
|
140
154
|
S = self.singular_values_
|
|
141
155
|
Vt = self.components_
|
|
142
156
|
|
|
143
|
-
|
|
157
|
+
if sklearn_check_version("1.5"):
|
|
158
|
+
xp, _ = get_namespace(X)
|
|
159
|
+
x_is_centered = not self.copy
|
|
160
|
+
|
|
161
|
+
return U, S, Vt, X, x_is_centered, xp
|
|
162
|
+
else:
|
|
163
|
+
return U, S, Vt
|
|
144
164
|
|
|
145
165
|
@wrap_output_data
|
|
146
166
|
def transform(self, X):
|
|
@@ -156,32 +176,39 @@ if daal_check_version((2024, "P", 100)):
|
|
|
156
176
|
|
|
157
177
|
def _onedal_transform(self, X, queue=None):
|
|
158
178
|
check_is_fitted(self)
|
|
179
|
+
if sklearn_check_version("1.0"):
|
|
180
|
+
self._check_feature_names(X, reset=False)
|
|
159
181
|
X = self._validate_data(
|
|
160
182
|
X,
|
|
161
183
|
dtype=[np.float64, np.float32],
|
|
162
184
|
reset=False,
|
|
163
185
|
)
|
|
164
186
|
self._validate_n_features_in_after_fitting(X)
|
|
165
|
-
if sklearn_check_version("1.0"):
|
|
166
|
-
self._check_feature_names(X, reset=False)
|
|
167
187
|
|
|
168
188
|
return self._onedal_estimator.predict(X, queue=queue)
|
|
169
189
|
|
|
170
190
|
def fit_transform(self, X, y=None):
|
|
171
|
-
|
|
172
|
-
|
|
191
|
+
if sklearn_check_version("1.5"):
|
|
192
|
+
U, S, Vt, X_fit, x_is_centered, xp = self._fit(X)
|
|
193
|
+
else:
|
|
194
|
+
U, S, Vt = self._fit(X)
|
|
195
|
+
X_fit = X
|
|
196
|
+
if hasattr(self, "_onedal_estimator"):
|
|
173
197
|
# oneDAL PCA was fit
|
|
174
198
|
return self.transform(X)
|
|
175
|
-
|
|
199
|
+
elif U is not None:
|
|
176
200
|
# Scikit-learn PCA was fit
|
|
177
201
|
U = U[:, : self.n_components_]
|
|
178
202
|
|
|
179
203
|
if self.whiten:
|
|
180
|
-
U *= sqrt(
|
|
204
|
+
U *= sqrt(X_fit.shape[0] - 1)
|
|
181
205
|
else:
|
|
182
206
|
U *= S[: self.n_components_]
|
|
183
207
|
|
|
184
208
|
return U
|
|
209
|
+
else:
|
|
210
|
+
# Scikit-learn PCA["covariance_eigh"] was fit
|
|
211
|
+
return self._transform(X_fit, xp, x_is_centered=x_is_centered)
|
|
185
212
|
|
|
186
213
|
def _onedal_supported(self, method_name, X):
|
|
187
214
|
class_name = self.__class__.__name__
|
|
@@ -199,7 +226,13 @@ if daal_check_version((2024, "P", 100)):
|
|
|
199
226
|
),
|
|
200
227
|
(
|
|
201
228
|
self._is_solver_compatible_with_onedal(shape_tuple),
|
|
202
|
-
|
|
229
|
+
(
|
|
230
|
+
"Only 'covariance_eigh' and 'onedal_svd' "
|
|
231
|
+
"solvers are supported."
|
|
232
|
+
if sklearn_check_version("1.5")
|
|
233
|
+
else "Only 'full', 'covariance_eigh' and 'onedal_svd' "
|
|
234
|
+
"solvers are supported."
|
|
235
|
+
),
|
|
203
236
|
),
|
|
204
237
|
(not issparse(X), "oneDAL PCA does not support sparse data"),
|
|
205
238
|
]
|
|
@@ -254,7 +287,13 @@ if daal_check_version((2024, "P", 100)):
|
|
|
254
287
|
|
|
255
288
|
if self._fit_svd_solver == "auto":
|
|
256
289
|
if sklearn_check_version("1.1"):
|
|
257
|
-
if
|
|
290
|
+
if (
|
|
291
|
+
sklearn_check_version("1.5")
|
|
292
|
+
and shape_tuple[1] <= 1_000
|
|
293
|
+
and shape_tuple[0] >= 10 * shape_tuple[1]
|
|
294
|
+
):
|
|
295
|
+
self._fit_svd_solver = "covariance_eigh"
|
|
296
|
+
elif max(shape_tuple) <= 500 or n_components == "mle":
|
|
258
297
|
self._fit_svd_solver = "full"
|
|
259
298
|
elif 1 <= n_components < 0.8 * n_sf_min:
|
|
260
299
|
self._fit_svd_solver = "randomized"
|
|
@@ -288,7 +327,23 @@ if daal_check_version((2024, "P", 100)):
|
|
|
288
327
|
else:
|
|
289
328
|
self._fit_svd_solver = "full"
|
|
290
329
|
|
|
291
|
-
|
|
330
|
+
# Use oneDAL in next cases:
|
|
331
|
+
# 1. oneDAL SVD solver is explicitly set
|
|
332
|
+
# 2. solver is set or dispatched to "covariance_eigh"
|
|
333
|
+
# 3. solver is set or dispatched to "full" and sklearn version < 1.5
|
|
334
|
+
# 4. solver is set to "auto" and dispatched to "full"
|
|
335
|
+
if self._fit_svd_solver in ["onedal_svd", "covariance_eigh"]:
|
|
336
|
+
return True
|
|
337
|
+
elif not sklearn_check_version("1.5") and self._fit_svd_solver == "full":
|
|
338
|
+
self._fit_svd_solver = "covariance_eigh"
|
|
339
|
+
return True
|
|
340
|
+
elif self.svd_solver == "auto" and self._fit_svd_solver == "full":
|
|
341
|
+
warn(
|
|
342
|
+
"Sklearnex always uses `covariance_eigh` solver instead of `full` "
|
|
343
|
+
"when `svd_solver` parameter is set to `auto` "
|
|
344
|
+
"for performance purposes."
|
|
345
|
+
)
|
|
346
|
+
self._fit_svd_solver = "covariance_eigh"
|
|
292
347
|
return True
|
|
293
348
|
else:
|
|
294
349
|
return False
|
|
@@ -41,10 +41,10 @@ def test_sklearnex_import(dataframe, queue):
|
|
|
41
41
|
[3.6053038, 0.04224385],
|
|
42
42
|
]
|
|
43
43
|
|
|
44
|
-
pca = PCA(n_components=2, svd_solver="
|
|
44
|
+
pca = PCA(n_components=2, svd_solver="covariance_eigh")
|
|
45
45
|
pca.fit(X)
|
|
46
46
|
X_transformed = pca.transform(X)
|
|
47
|
-
X_fit_transformed = PCA(n_components=2, svd_solver="
|
|
47
|
+
X_fit_transformed = PCA(n_components=2, svd_solver="covariance_eigh").fit_transform(X)
|
|
48
48
|
|
|
49
49
|
if daal_check_version((2024, "P", 100)):
|
|
50
50
|
assert "sklearnex" in pca.__module__
|
sklearnex/dispatcher.py
CHANGED
|
@@ -93,6 +93,7 @@ def get_patch_map_core(preview=False):
|
|
|
93
93
|
# Scikit-learn* modules
|
|
94
94
|
import sklearn as base_module
|
|
95
95
|
import sklearn.cluster as cluster_module
|
|
96
|
+
import sklearn.covariance as covariance_module
|
|
96
97
|
import sklearn.decomposition as decomposition_module
|
|
97
98
|
import sklearn.ensemble as ensemble_module
|
|
98
99
|
import sklearn.linear_model as linear_model_module
|
|
@@ -115,11 +116,17 @@ def get_patch_map_core(preview=False):
|
|
|
115
116
|
from .utils.parallel import _FuncWrapperOld as _FuncWrapper_sklearnex
|
|
116
117
|
|
|
117
118
|
from .cluster import DBSCAN as DBSCAN_sklearnex
|
|
119
|
+
from .covariance import (
|
|
120
|
+
IncrementalEmpiricalCovariance as IncrementalEmpiricalCovariance_sklearnex,
|
|
121
|
+
)
|
|
118
122
|
from .decomposition import PCA as PCA_sklearnex
|
|
119
123
|
from .ensemble import ExtraTreesClassifier as ExtraTreesClassifier_sklearnex
|
|
120
124
|
from .ensemble import ExtraTreesRegressor as ExtraTreesRegressor_sklearnex
|
|
121
125
|
from .ensemble import RandomForestClassifier as RandomForestClassifier_sklearnex
|
|
122
126
|
from .ensemble import RandomForestRegressor as RandomForestRegressor_sklearnex
|
|
127
|
+
from .linear_model import (
|
|
128
|
+
IncrementalLinearRegression as IncrementalLinearRegression_sklearnex,
|
|
129
|
+
)
|
|
123
130
|
from .linear_model import LinearRegression as LinearRegression_sklearnex
|
|
124
131
|
from .linear_model import LogisticRegression as LogisticRegression_sklearnex
|
|
125
132
|
from .neighbors import KNeighborsClassifier as KNeighborsClassifier_sklearnex
|
|
@@ -273,6 +280,30 @@ def get_patch_map_core(preview=False):
|
|
|
273
280
|
]
|
|
274
281
|
mapping["localoutlierfactor"] = mapping["lof"]
|
|
275
282
|
|
|
283
|
+
# IncrementalEmpiricalCovariance
|
|
284
|
+
mapping["incrementalempiricalcovariance"] = [
|
|
285
|
+
[
|
|
286
|
+
(
|
|
287
|
+
covariance_module,
|
|
288
|
+
"IncrementalEmpiricalCovariance",
|
|
289
|
+
IncrementalEmpiricalCovariance_sklearnex,
|
|
290
|
+
),
|
|
291
|
+
None,
|
|
292
|
+
]
|
|
293
|
+
]
|
|
294
|
+
|
|
295
|
+
# IncrementalLinearRegression
|
|
296
|
+
mapping["incrementallinearregression"] = [
|
|
297
|
+
[
|
|
298
|
+
(
|
|
299
|
+
linear_model_module,
|
|
300
|
+
"IncrementalLinearRegression",
|
|
301
|
+
IncrementalLinearRegression_sklearnex,
|
|
302
|
+
),
|
|
303
|
+
None,
|
|
304
|
+
]
|
|
305
|
+
]
|
|
306
|
+
|
|
276
307
|
# Configs
|
|
277
308
|
mapping["set_config"] = [
|
|
278
309
|
[(base_module, "set_config", set_config_sklearnex), None]
|
sklearnex/ensemble/_forest.py
CHANGED
|
@@ -777,15 +777,16 @@ class ForestClassifier(sklearn_ForestClassifier, BaseForest):
|
|
|
777
777
|
return patching_status
|
|
778
778
|
|
|
779
779
|
def _onedal_predict(self, X, queue=None):
|
|
780
|
+
check_is_fitted(self, "_onedal_estimator")
|
|
781
|
+
|
|
782
|
+
if sklearn_check_version("1.0"):
|
|
783
|
+
self._check_feature_names(X, reset=False)
|
|
784
|
+
|
|
780
785
|
X = check_array(
|
|
781
786
|
X,
|
|
782
787
|
dtype=[np.float64, np.float32],
|
|
783
788
|
force_all_finite=False,
|
|
784
789
|
) # Warning, order of dtype matters
|
|
785
|
-
check_is_fitted(self, "_onedal_estimator")
|
|
786
|
-
|
|
787
|
-
if sklearn_check_version("1.0"):
|
|
788
|
-
self._check_feature_names(X, reset=False)
|
|
789
790
|
|
|
790
791
|
res = self._onedal_estimator.predict(X, queue=queue)
|
|
791
792
|
return np.take(self.classes_, res.ravel().astype(np.int64, casting="unsafe"))
|
|
@@ -15,14 +15,16 @@
|
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
17
|
from .coordinate_descent import ElasticNet, Lasso
|
|
18
|
+
from .incremental_linear import IncrementalLinearRegression
|
|
18
19
|
from .linear import LinearRegression
|
|
19
20
|
from .logistic_regression import LogisticRegression
|
|
20
21
|
from .ridge import Ridge
|
|
21
22
|
|
|
22
23
|
__all__ = [
|
|
23
|
-
"Ridge",
|
|
24
|
-
"LinearRegression",
|
|
25
|
-
"LogisticRegression",
|
|
26
24
|
"ElasticNet",
|
|
25
|
+
"IncrementalLinearRegression",
|
|
27
26
|
"Lasso",
|
|
27
|
+
"LinearRegression",
|
|
28
|
+
"LogisticRegression",
|
|
29
|
+
"Ridge",
|
|
28
30
|
]
|
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
# ===============================================================================
|
|
2
|
+
# Copyright 2024 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ===============================================================================
|
|
16
|
+
|
|
17
|
+
import numbers
|
|
18
|
+
import warnings
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin
|
|
22
|
+
from sklearn.exceptions import NotFittedError
|
|
23
|
+
from sklearn.utils import check_array, gen_batches
|
|
24
|
+
|
|
25
|
+
from daal4py.sklearn._n_jobs_support import control_n_jobs
|
|
26
|
+
from daal4py.sklearn._utils import sklearn_check_version
|
|
27
|
+
from onedal.linear_model import (
|
|
28
|
+
IncrementalLinearRegression as onedal_IncrementalLinearRegression,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
if sklearn_check_version("1.2"):
|
|
32
|
+
from sklearn.utils._param_validation import Interval
|
|
33
|
+
|
|
34
|
+
from onedal.common.hyperparameters import get_hyperparameters
|
|
35
|
+
|
|
36
|
+
from .._device_offload import dispatch, wrap_output_data
|
|
37
|
+
from .._utils import PatchingConditionsChain, register_hyperparameters
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@register_hyperparameters(
|
|
41
|
+
{
|
|
42
|
+
"fit": get_hyperparameters("linear_regression", "train"),
|
|
43
|
+
"partial_fit": get_hyperparameters("linear_regression", "train"),
|
|
44
|
+
}
|
|
45
|
+
)
|
|
46
|
+
@control_n_jobs(
|
|
47
|
+
decorated_methods=["fit", "partial_fit", "predict", "_onedal_finalize_fit"]
|
|
48
|
+
)
|
|
49
|
+
class IncrementalLinearRegression(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
50
|
+
"""
|
|
51
|
+
Incremental estimator for linear regression.
|
|
52
|
+
Allows to train linear regression if data are splitted into batches.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
fit_intercept : bool, default=True
|
|
57
|
+
Whether to calculate the intercept for this model. If set
|
|
58
|
+
to False, no intercept will be used in calculations
|
|
59
|
+
(i.e. data is expected to be centered).
|
|
60
|
+
|
|
61
|
+
copy_X : bool, default=True
|
|
62
|
+
If True, X will be copied; else, it may be overwritten.
|
|
63
|
+
|
|
64
|
+
n_jobs : int, default=None
|
|
65
|
+
The number of jobs to use for the computation.
|
|
66
|
+
|
|
67
|
+
batch_size : int, default=None
|
|
68
|
+
The number of samples to use for each batch. Only used when calling
|
|
69
|
+
``fit``. If ``batch_size`` is ``None``, then ``batch_size``
|
|
70
|
+
is inferred from the data and set to ``5 * n_features``, to provide a
|
|
71
|
+
balance between approximation accuracy and memory consumption.
|
|
72
|
+
|
|
73
|
+
Attributes
|
|
74
|
+
----------
|
|
75
|
+
coef_ : array of shape (n_features, ) or (n_targets, n_features)
|
|
76
|
+
Estimated coefficients for the linear regression problem.
|
|
77
|
+
If multiple targets are passed during the fit (y 2D), this
|
|
78
|
+
is a 2D array of shape (n_targets, n_features), while if only
|
|
79
|
+
one target is passed, this is a 1D array of length n_features.
|
|
80
|
+
|
|
81
|
+
intercept_ : float or array of shape (n_targets,)
|
|
82
|
+
Independent term in the linear model. Set to 0.0 if
|
|
83
|
+
`fit_intercept = False`.
|
|
84
|
+
|
|
85
|
+
n_features_in_ : int
|
|
86
|
+
Number of features seen during :term:`fit`.
|
|
87
|
+
|
|
88
|
+
n_samples_seen_ : int
|
|
89
|
+
The number of samples processed by the estimator. Will be reset on
|
|
90
|
+
new calls to fit, but increments across ``partial_fit`` calls.
|
|
91
|
+
It should be not less than `n_features_in_` if `fit_intercept`
|
|
92
|
+
is False and not less than `n_features_in_` + 1 if `fit_intercept`
|
|
93
|
+
is True to obtain regression coefficients.
|
|
94
|
+
|
|
95
|
+
batch_size_ : int
|
|
96
|
+
Inferred batch size from ``batch_size``.
|
|
97
|
+
|
|
98
|
+
n_features_in_ : int
|
|
99
|
+
Number of features seen during :term:`fit` `partial_fit`.
|
|
100
|
+
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
_onedal_incremental_linear = staticmethod(onedal_IncrementalLinearRegression)
|
|
104
|
+
|
|
105
|
+
if sklearn_check_version("1.2"):
|
|
106
|
+
_parameter_constraints: dict = {
|
|
107
|
+
"fit_intercept": ["boolean"],
|
|
108
|
+
"copy_X": ["boolean"],
|
|
109
|
+
"n_jobs": [Interval(numbers.Integral, -1, None, closed="left"), None],
|
|
110
|
+
"batch_size": [Interval(numbers.Integral, 1, None, closed="left"), None],
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
def __init__(self, *, fit_intercept=True, copy_X=True, n_jobs=None, batch_size=None):
|
|
114
|
+
self.fit_intercept = fit_intercept
|
|
115
|
+
self.copy_X = copy_X
|
|
116
|
+
self.n_jobs = n_jobs
|
|
117
|
+
self.batch_size = batch_size
|
|
118
|
+
|
|
119
|
+
def _onedal_supported(self, method_name, *data):
|
|
120
|
+
patching_status = PatchingConditionsChain(
|
|
121
|
+
f"sklearn.linear_model.{self.__class__.__name__}.{method_name}"
|
|
122
|
+
)
|
|
123
|
+
return patching_status
|
|
124
|
+
|
|
125
|
+
_onedal_cpu_supported = _onedal_supported
|
|
126
|
+
_onedal_gpu_supported = _onedal_supported
|
|
127
|
+
|
|
128
|
+
def _onedal_predict(self, X, queue=None):
|
|
129
|
+
if sklearn_check_version("1.2"):
|
|
130
|
+
self._validate_params()
|
|
131
|
+
|
|
132
|
+
if sklearn_check_version("1.0"):
|
|
133
|
+
X = self._validate_data(
|
|
134
|
+
X,
|
|
135
|
+
dtype=[np.float64, np.float32],
|
|
136
|
+
copy=self.copy_X,
|
|
137
|
+
)
|
|
138
|
+
else:
|
|
139
|
+
X = check_array(
|
|
140
|
+
X,
|
|
141
|
+
dtype=[np.float64, np.float32],
|
|
142
|
+
copy=self.copy_X,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
assert hasattr(self, "_onedal_estimator")
|
|
146
|
+
if self._need_to_finalize:
|
|
147
|
+
self._onedal_finalize_fit()
|
|
148
|
+
return self._onedal_estimator.predict(X, queue)
|
|
149
|
+
|
|
150
|
+
def _onedal_partial_fit(self, X, y, queue=None):
|
|
151
|
+
first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0
|
|
152
|
+
|
|
153
|
+
if sklearn_check_version("1.2"):
|
|
154
|
+
self._validate_params()
|
|
155
|
+
|
|
156
|
+
if sklearn_check_version("1.0"):
|
|
157
|
+
X, y = self._validate_data(
|
|
158
|
+
X,
|
|
159
|
+
y,
|
|
160
|
+
dtype=[np.float64, np.float32],
|
|
161
|
+
reset=first_pass,
|
|
162
|
+
copy=self.copy_X,
|
|
163
|
+
multi_output=True,
|
|
164
|
+
)
|
|
165
|
+
else:
|
|
166
|
+
X = check_array(
|
|
167
|
+
X,
|
|
168
|
+
dtype=[np.float64, np.float32],
|
|
169
|
+
copy=self.copy_X,
|
|
170
|
+
)
|
|
171
|
+
y = check_array(
|
|
172
|
+
y,
|
|
173
|
+
dtype=[np.float64, np.float32],
|
|
174
|
+
copy=False,
|
|
175
|
+
ensure_2d=False,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
if first_pass:
|
|
179
|
+
self.n_samples_seen_ = X.shape[0]
|
|
180
|
+
self.n_features_in_ = X.shape[1]
|
|
181
|
+
else:
|
|
182
|
+
self.n_samples_seen_ += X.shape[0]
|
|
183
|
+
onedal_params = {"fit_intercept": self.fit_intercept, "copy_X": self.copy_X}
|
|
184
|
+
if not hasattr(self, "_onedal_estimator"):
|
|
185
|
+
self._onedal_estimator = self._onedal_incremental_linear(**onedal_params)
|
|
186
|
+
self._onedal_estimator.partial_fit(X, y, queue)
|
|
187
|
+
self._need_to_finalize = True
|
|
188
|
+
|
|
189
|
+
def _onedal_finalize_fit(self):
|
|
190
|
+
assert hasattr(self, "_onedal_estimator")
|
|
191
|
+
is_underdetermined = self.n_samples_seen_ < self.n_features_in_ + int(
|
|
192
|
+
self.fit_intercept
|
|
193
|
+
)
|
|
194
|
+
if is_underdetermined:
|
|
195
|
+
raise ValueError("Not enough samples to finalize")
|
|
196
|
+
self._onedal_estimator.finalize_fit()
|
|
197
|
+
self._need_to_finalize = False
|
|
198
|
+
|
|
199
|
+
def _onedal_fit(self, X, y, queue=None):
|
|
200
|
+
if sklearn_check_version("1.2"):
|
|
201
|
+
self._validate_params()
|
|
202
|
+
|
|
203
|
+
if sklearn_check_version("1.0"):
|
|
204
|
+
X, y = self._validate_data(
|
|
205
|
+
X, y, dtype=[np.float64, np.float32], copy=self.copy_X, multi_output=True
|
|
206
|
+
)
|
|
207
|
+
else:
|
|
208
|
+
X = check_array(
|
|
209
|
+
X,
|
|
210
|
+
dtype=[np.float64, np.float32],
|
|
211
|
+
copy=self.copy_X,
|
|
212
|
+
)
|
|
213
|
+
y = check_array(
|
|
214
|
+
y,
|
|
215
|
+
dtype=[np.float64, np.float32],
|
|
216
|
+
copy=False,
|
|
217
|
+
ensure_2d=False,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
n_samples, n_features = X.shape
|
|
221
|
+
|
|
222
|
+
is_underdetermined = n_samples < n_features + int(self.fit_intercept)
|
|
223
|
+
if is_underdetermined:
|
|
224
|
+
raise ValueError("Not enough samples to run oneDAL backend")
|
|
225
|
+
|
|
226
|
+
if self.batch_size is None:
|
|
227
|
+
self.batch_size_ = 5 * n_features
|
|
228
|
+
else:
|
|
229
|
+
self.batch_size_ = self.batch_size
|
|
230
|
+
|
|
231
|
+
self.n_samples_seen_ = 0
|
|
232
|
+
if hasattr(self, "_onedal_estimator"):
|
|
233
|
+
self._onedal_estimator._reset()
|
|
234
|
+
|
|
235
|
+
for batch in gen_batches(n_samples, self.batch_size_):
|
|
236
|
+
X_batch, y_batch = X[batch], y[batch]
|
|
237
|
+
self._onedal_partial_fit(X_batch, y_batch, queue=queue)
|
|
238
|
+
|
|
239
|
+
if sklearn_check_version("1.2"):
|
|
240
|
+
self._validate_params()
|
|
241
|
+
|
|
242
|
+
# finite check occurs on onedal side
|
|
243
|
+
self.n_features_in_ = n_features
|
|
244
|
+
|
|
245
|
+
if n_samples == 1:
|
|
246
|
+
warnings.warn(
|
|
247
|
+
"Only one sample available. You may want to reshape your data array"
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
self._onedal_finalize_fit()
|
|
251
|
+
|
|
252
|
+
return self
|
|
253
|
+
|
|
254
|
+
def get_intercept_(self):
|
|
255
|
+
if hasattr(self, "_onedal_estimator"):
|
|
256
|
+
if self._need_to_finalize:
|
|
257
|
+
self._onedal_finalize_fit()
|
|
258
|
+
|
|
259
|
+
return self._onedal_estimator.intercept_
|
|
260
|
+
else:
|
|
261
|
+
raise AttributeError(
|
|
262
|
+
f"'{self.__class__.__name__}' object has no attribute 'intercept_'"
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
def set_intercept_(self, value):
|
|
266
|
+
self.__dict__["intercept_"] = value
|
|
267
|
+
if hasattr(self, "_onedal_estimator"):
|
|
268
|
+
self._onedal_estimator.intercept_ = value
|
|
269
|
+
del self._onedal_estimator._onedal_model
|
|
270
|
+
|
|
271
|
+
def get_coef_(self):
|
|
272
|
+
if hasattr(self, "_onedal_estimator"):
|
|
273
|
+
if self._need_to_finalize:
|
|
274
|
+
self._onedal_finalize_fit()
|
|
275
|
+
|
|
276
|
+
return self._onedal_estimator.coef_
|
|
277
|
+
else:
|
|
278
|
+
raise AttributeError(
|
|
279
|
+
f"'{self.__class__.__name__}' object has no attribute 'coef_'"
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
def set_coef_(self, value):
|
|
283
|
+
self.__dict__["coef_"] = value
|
|
284
|
+
if hasattr(self, "_onedal_estimator"):
|
|
285
|
+
self._onedal_estimator.coef_ = value
|
|
286
|
+
del self._onedal_estimator._onedal_model
|
|
287
|
+
|
|
288
|
+
coef_ = property(get_coef_, set_coef_)
|
|
289
|
+
intercept_ = property(get_intercept_, set_intercept_)
|
|
290
|
+
|
|
291
|
+
def partial_fit(self, X, y):
|
|
292
|
+
"""
|
|
293
|
+
Incremental fit linear model with X and y. All of X and y is
|
|
294
|
+
processed as a single batch.
|
|
295
|
+
|
|
296
|
+
Parameters
|
|
297
|
+
----------
|
|
298
|
+
X : array-like of shape (n_samples, n_features)
|
|
299
|
+
Training data, where `n_samples` is the number of samples and
|
|
300
|
+
`n_features` is the number of features.
|
|
301
|
+
|
|
302
|
+
y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
|
303
|
+
Target values, where `n_samples` is the number of samples and
|
|
304
|
+
`n_targets` is the number of targets.
|
|
305
|
+
|
|
306
|
+
Returns
|
|
307
|
+
-------
|
|
308
|
+
self : object
|
|
309
|
+
Returns the instance itself.
|
|
310
|
+
"""
|
|
311
|
+
|
|
312
|
+
dispatch(
|
|
313
|
+
self,
|
|
314
|
+
"partial_fit",
|
|
315
|
+
{
|
|
316
|
+
"onedal": self.__class__._onedal_partial_fit,
|
|
317
|
+
"sklearn": None,
|
|
318
|
+
},
|
|
319
|
+
X,
|
|
320
|
+
y,
|
|
321
|
+
)
|
|
322
|
+
return self
|
|
323
|
+
|
|
324
|
+
def fit(self, X, y):
|
|
325
|
+
"""
|
|
326
|
+
Fit the model with X and y, using minibatches of size batch_size.
|
|
327
|
+
|
|
328
|
+
Parameters
|
|
329
|
+
----------
|
|
330
|
+
X : array-like of shape (n_samples, n_features)
|
|
331
|
+
Training data, where `n_samples` is the number of samples and
|
|
332
|
+
`n_features` is the number of features. It is necessary for
|
|
333
|
+
`n_samples` to be not less than `n_features` if `fit_intercept`
|
|
334
|
+
is False and not less than `n_features` + 1 if `fit_intercept`
|
|
335
|
+
is True
|
|
336
|
+
|
|
337
|
+
y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
|
338
|
+
Target values, where `n_samples` is the number of samples and
|
|
339
|
+
`n_targets` is the number of targets.
|
|
340
|
+
|
|
341
|
+
Returns
|
|
342
|
+
-------
|
|
343
|
+
self : object
|
|
344
|
+
Returns the instance itself.
|
|
345
|
+
"""
|
|
346
|
+
|
|
347
|
+
dispatch(
|
|
348
|
+
self,
|
|
349
|
+
"fit",
|
|
350
|
+
{
|
|
351
|
+
"onedal": self.__class__._onedal_fit,
|
|
352
|
+
"sklearn": None,
|
|
353
|
+
},
|
|
354
|
+
X,
|
|
355
|
+
y,
|
|
356
|
+
)
|
|
357
|
+
return self
|
|
358
|
+
|
|
359
|
+
@wrap_output_data
|
|
360
|
+
def predict(self, X, y=None):
|
|
361
|
+
"""
|
|
362
|
+
Predict using the linear model.
|
|
363
|
+
Parameters
|
|
364
|
+
----------
|
|
365
|
+
X : array-like or sparse matrix, shape (n_samples, n_features)
|
|
366
|
+
Samples.
|
|
367
|
+
Returns
|
|
368
|
+
-------
|
|
369
|
+
C : array, shape (n_samples, n_targets)
|
|
370
|
+
Returns predicted values.
|
|
371
|
+
"""
|
|
372
|
+
if not hasattr(self, "coef_"):
|
|
373
|
+
msg = (
|
|
374
|
+
"This %(name)s instance is not fitted yet. Call 'fit' or 'partial_fit' "
|
|
375
|
+
"with appropriate arguments before using this estimator."
|
|
376
|
+
)
|
|
377
|
+
raise NotFittedError(msg % {"name": self.__class__.__name__})
|
|
378
|
+
|
|
379
|
+
return dispatch(
|
|
380
|
+
self,
|
|
381
|
+
"predict",
|
|
382
|
+
{
|
|
383
|
+
"onedal": self.__class__._onedal_predict,
|
|
384
|
+
"sklearn": None,
|
|
385
|
+
},
|
|
386
|
+
X,
|
|
387
|
+
)
|
sklearnex/linear_model/linear.py
CHANGED
|
@@ -157,7 +157,7 @@ class LinearRegression(sklearn_LinearRegression):
|
|
|
157
157
|
n_features = _num_features(X, fallback_1d=True)
|
|
158
158
|
|
|
159
159
|
# Check if equations are well defined
|
|
160
|
-
|
|
160
|
+
is_underdetermined = n_samples < (n_features + int(self.fit_intercept))
|
|
161
161
|
|
|
162
162
|
dal_ready = patching_status.and_conditions(
|
|
163
163
|
[
|
|
@@ -172,7 +172,7 @@ class LinearRegression(sklearn_LinearRegression):
|
|
|
172
172
|
"Forced positive coefficients are not supported.",
|
|
173
173
|
),
|
|
174
174
|
(
|
|
175
|
-
|
|
175
|
+
not is_underdetermined,
|
|
176
176
|
"The shape of X (fitting) does not satisfy oneDAL requirements:"
|
|
177
177
|
"Number of features + 1 >= number of samples.",
|
|
178
178
|
),
|