scikit-learn-intelex 2024.6.0__py310-none-manylinux1_x86_64.whl → 2024.7.0__py310-none-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- {scikit_learn_intelex-2024.6.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/METADATA +2 -2
- {scikit_learn_intelex-2024.6.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/RECORD +55 -41
- sklearnex/_config.py +3 -15
- sklearnex/_device_offload.py +9 -168
- sklearnex/basic_statistics/basic_statistics.py +127 -1
- sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
- sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +1 -1
- sklearnex/cluster/dbscan.py +0 -1
- sklearnex/cluster/k_means.py +8 -0
- sklearnex/cluster/tests/test_kmeans.py +15 -3
- sklearnex/covariance/incremental_covariance.py +64 -13
- sklearnex/covariance/tests/test_incremental_covariance.py +35 -0
- sklearnex/decomposition/pca.py +25 -1
- sklearnex/dispatcher.py +94 -0
- sklearnex/ensemble/_forest.py +8 -35
- sklearnex/ensemble/tests/test_forest.py +9 -12
- sklearnex/linear_model/coordinate_descent.py +13 -0
- sklearnex/linear_model/linear.py +2 -34
- sklearnex/linear_model/logistic_regression.py +79 -59
- sklearnex/linear_model/ridge.py +7 -0
- sklearnex/linear_model/tests/test_linear.py +28 -3
- sklearnex/linear_model/tests/test_logreg.py +45 -3
- sklearnex/manifold/t_sne.py +4 -0
- sklearnex/metrics/pairwise.py +5 -0
- sklearnex/metrics/ranking.py +3 -0
- sklearnex/model_selection/split.py +3 -0
- sklearnex/neighbors/_lof.py +9 -0
- sklearnex/neighbors/common.py +45 -1
- sklearnex/neighbors/knn_classification.py +1 -20
- sklearnex/neighbors/knn_regression.py +1 -20
- sklearnex/neighbors/knn_unsupervised.py +31 -7
- sklearnex/preview/__init__.py +1 -1
- sklearnex/preview/linear_model/__init__.py +19 -0
- sklearnex/preview/linear_model/ridge.py +419 -0
- sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
- sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
- sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
- sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
- sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
- sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
- sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
- sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
- sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +163 -0
- sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
- sklearnex/svm/_common.py +19 -21
- sklearnex/svm/tests/test_svm.py +12 -20
- sklearnex/tests/_utils.py +143 -20
- sklearnex/tests/_utils_spmd.py +185 -0
- sklearnex/tests/test_config.py +4 -0
- sklearnex/tests/test_monkeypatch.py +12 -4
- sklearnex/tests/test_patching.py +16 -13
- sklearnex/tests/test_run_to_run_stability.py +21 -9
- {scikit_learn_intelex-2024.6.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2024.6.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/WHEEL +0 -0
- {scikit_learn_intelex-2024.6.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/top_level.txt +0 -0
sklearnex/ensemble/_forest.py
CHANGED
|
@@ -38,7 +38,12 @@ from sklearn.tree import (
|
|
|
38
38
|
)
|
|
39
39
|
from sklearn.tree._tree import Tree
|
|
40
40
|
from sklearn.utils import check_random_state, deprecated
|
|
41
|
-
from sklearn.utils.validation import
|
|
41
|
+
from sklearn.utils.validation import (
|
|
42
|
+
_check_sample_weight,
|
|
43
|
+
check_array,
|
|
44
|
+
check_is_fitted,
|
|
45
|
+
check_X_y,
|
|
46
|
+
)
|
|
42
47
|
|
|
43
48
|
from daal4py.sklearn._n_jobs_support import control_n_jobs
|
|
44
49
|
from daal4py.sklearn._utils import (
|
|
@@ -70,7 +75,7 @@ class BaseForest(ABC):
|
|
|
70
75
|
X, y = self._validate_data(
|
|
71
76
|
X,
|
|
72
77
|
y,
|
|
73
|
-
multi_output=
|
|
78
|
+
multi_output=True,
|
|
74
79
|
accept_sparse=False,
|
|
75
80
|
dtype=[np.float64, np.float32],
|
|
76
81
|
force_all_finite=False,
|
|
@@ -78,7 +83,7 @@ class BaseForest(ABC):
|
|
|
78
83
|
)
|
|
79
84
|
|
|
80
85
|
if sample_weight is not None:
|
|
81
|
-
sample_weight =
|
|
86
|
+
sample_weight = _check_sample_weight(sample_weight, X)
|
|
82
87
|
|
|
83
88
|
if y.ndim == 2 and y.shape[1] == 1:
|
|
84
89
|
warnings.warn(
|
|
@@ -289,38 +294,6 @@ class BaseForest(ABC):
|
|
|
289
294
|
"min_bin_size must be integral number but was " "%r" % self.min_bin_size
|
|
290
295
|
)
|
|
291
296
|
|
|
292
|
-
def check_sample_weight(self, sample_weight, X, dtype=None):
|
|
293
|
-
n_samples = _num_samples(X)
|
|
294
|
-
|
|
295
|
-
if dtype is not None and dtype not in [np.float32, np.float64]:
|
|
296
|
-
dtype = np.float64
|
|
297
|
-
|
|
298
|
-
if sample_weight is None:
|
|
299
|
-
sample_weight = np.ones(n_samples, dtype=dtype)
|
|
300
|
-
elif isinstance(sample_weight, numbers.Number):
|
|
301
|
-
sample_weight = np.full(n_samples, sample_weight, dtype=dtype)
|
|
302
|
-
else:
|
|
303
|
-
if dtype is None:
|
|
304
|
-
dtype = [np.float64, np.float32]
|
|
305
|
-
sample_weight = check_array(
|
|
306
|
-
sample_weight,
|
|
307
|
-
accept_sparse=False,
|
|
308
|
-
ensure_2d=False,
|
|
309
|
-
dtype=dtype,
|
|
310
|
-
order="C",
|
|
311
|
-
force_all_finite=False,
|
|
312
|
-
)
|
|
313
|
-
if sample_weight.ndim != 1:
|
|
314
|
-
raise ValueError("Sample weights must be 1D array or scalar")
|
|
315
|
-
|
|
316
|
-
if sample_weight.shape != (n_samples,):
|
|
317
|
-
raise ValueError(
|
|
318
|
-
"sample_weight.shape == {}, expected {}!".format(
|
|
319
|
-
sample_weight.shape, (n_samples,)
|
|
320
|
-
)
|
|
321
|
-
)
|
|
322
|
-
return sample_weight
|
|
323
|
-
|
|
324
297
|
@property
|
|
325
298
|
def estimators_(self):
|
|
326
299
|
if hasattr(self, "_cached_estimators_"):
|
|
@@ -46,11 +46,10 @@ def test_sklearnex_import_rf_classifier(dataframe, queue):
|
|
|
46
46
|
assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]])))
|
|
47
47
|
|
|
48
48
|
|
|
49
|
-
|
|
50
|
-
@pytest.mark.parametrize(
|
|
51
|
-
"dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
|
|
52
|
-
)
|
|
49
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
53
50
|
def test_sklearnex_import_rf_regression(dataframe, queue):
|
|
51
|
+
if queue and queue.sycl_device.is_gpu:
|
|
52
|
+
pytest.skip("RF regressor predict for the GPU sycl_queue is buggy.")
|
|
54
53
|
from sklearnex.ensemble import RandomForestRegressor
|
|
55
54
|
|
|
56
55
|
X, y = make_regression(n_features=4, n_informative=2, random_state=0, shuffle=False)
|
|
@@ -69,11 +68,10 @@ def test_sklearnex_import_rf_regression(dataframe, queue):
|
|
|
69
68
|
assert_allclose([-6.839], pred, atol=1e-2)
|
|
70
69
|
|
|
71
70
|
|
|
72
|
-
|
|
73
|
-
@pytest.mark.parametrize(
|
|
74
|
-
"dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
|
|
75
|
-
)
|
|
71
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
76
72
|
def test_sklearnex_import_et_classifier(dataframe, queue):
|
|
73
|
+
if queue and queue.sycl_device.is_gpu:
|
|
74
|
+
pytest.skip("ET classifier predict for the GPU sycl_queue is buggy.")
|
|
77
75
|
from sklearnex.ensemble import ExtraTreesClassifier
|
|
78
76
|
|
|
79
77
|
X, y = make_classification(
|
|
@@ -93,11 +91,10 @@ def test_sklearnex_import_et_classifier(dataframe, queue):
|
|
|
93
91
|
assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]])))
|
|
94
92
|
|
|
95
93
|
|
|
96
|
-
|
|
97
|
-
@pytest.mark.parametrize(
|
|
98
|
-
"dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
|
|
99
|
-
)
|
|
94
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
100
95
|
def test_sklearnex_import_et_regression(dataframe, queue):
|
|
96
|
+
if queue and queue.sycl_device.is_gpu:
|
|
97
|
+
pytest.skip("ET regressor predict for the GPU sycl_queue is buggy.")
|
|
101
98
|
from sklearnex.ensemble import ExtraTreesRegressor
|
|
102
99
|
|
|
103
100
|
X, y = make_regression(n_features=1, random_state=0, shuffle=False)
|
|
@@ -15,3 +15,16 @@
|
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
17
|
from daal4py.sklearn.linear_model import ElasticNet, Lasso
|
|
18
|
+
from onedal._device_offload import support_usm_ndarray
|
|
19
|
+
|
|
20
|
+
# Note: `sklearnex.linear_model.ElasticNet` only has functional
|
|
21
|
+
# sycl GPU support. No GPU device will be offloaded.
|
|
22
|
+
ElasticNet.fit = support_usm_ndarray(queue_param=False)(ElasticNet.fit)
|
|
23
|
+
ElasticNet.predict = support_usm_ndarray(queue_param=False)(ElasticNet.predict)
|
|
24
|
+
ElasticNet.score = support_usm_ndarray(queue_param=False)(ElasticNet.score)
|
|
25
|
+
|
|
26
|
+
# Note: `sklearnex.linear_model.Lasso` only has functional
|
|
27
|
+
# sycl GPU support. No GPU device will be offloaded.
|
|
28
|
+
Lasso.fit = support_usm_ndarray(queue_param=False)(Lasso.fit)
|
|
29
|
+
Lasso.predict = support_usm_ndarray(queue_param=False)(Lasso.predict)
|
|
30
|
+
Lasso.score = support_usm_ndarray(queue_param=False)(Lasso.score)
|
sklearnex/linear_model/linear.py
CHANGED
|
@@ -27,7 +27,6 @@ from daal4py.sklearn._utils import sklearn_check_version
|
|
|
27
27
|
|
|
28
28
|
from .._device_offload import dispatch, wrap_output_data
|
|
29
29
|
from .._utils import PatchingConditionsChain, get_patch_message, register_hyperparameters
|
|
30
|
-
from ..utils.validation import _assert_all_finite
|
|
31
30
|
|
|
32
31
|
if sklearn_check_version("1.0") and not sklearn_check_version("1.2"):
|
|
33
32
|
from sklearn.linear_model._base import _deprecate_normalize
|
|
@@ -138,19 +137,6 @@ class LinearRegression(sklearn_LinearRegression):
|
|
|
138
137
|
sample_weight=sample_weight,
|
|
139
138
|
)
|
|
140
139
|
|
|
141
|
-
def _test_type_and_finiteness(self, X_in):
|
|
142
|
-
X = X_in if isinstance(X_in, np.ndarray) else np.asarray(X_in)
|
|
143
|
-
|
|
144
|
-
dtype = X.dtype
|
|
145
|
-
if "complex" in str(type(dtype)):
|
|
146
|
-
return False
|
|
147
|
-
|
|
148
|
-
try:
|
|
149
|
-
_assert_all_finite(X)
|
|
150
|
-
except BaseException:
|
|
151
|
-
return False
|
|
152
|
-
return True
|
|
153
|
-
|
|
154
140
|
def _onedal_fit_supported(self, method_name, *data):
|
|
155
141
|
assert method_name == "fit"
|
|
156
142
|
assert len(data) == 3
|
|
@@ -174,7 +160,7 @@ class LinearRegression(sklearn_LinearRegression):
|
|
|
174
160
|
# Check if equations are well defined
|
|
175
161
|
is_underdetermined = n_samples < (n_features + int(self.fit_intercept))
|
|
176
162
|
|
|
177
|
-
|
|
163
|
+
patching_status.and_conditions(
|
|
178
164
|
[
|
|
179
165
|
(sample_weight is None, "Sample weight is not supported."),
|
|
180
166
|
(
|
|
@@ -193,17 +179,6 @@ class LinearRegression(sklearn_LinearRegression):
|
|
|
193
179
|
),
|
|
194
180
|
]
|
|
195
181
|
)
|
|
196
|
-
if not dal_ready:
|
|
197
|
-
return patching_status
|
|
198
|
-
|
|
199
|
-
if not patching_status.and_condition(
|
|
200
|
-
self._test_type_and_finiteness(X), "Input X is not supported."
|
|
201
|
-
):
|
|
202
|
-
return patching_status
|
|
203
|
-
|
|
204
|
-
patching_status.and_condition(
|
|
205
|
-
self._test_type_and_finiteness(y), "Input y is not supported."
|
|
206
|
-
)
|
|
207
182
|
|
|
208
183
|
return patching_status
|
|
209
184
|
|
|
@@ -217,19 +192,13 @@ class LinearRegression(sklearn_LinearRegression):
|
|
|
217
192
|
model_is_sparse = issparse(self.coef_) or (
|
|
218
193
|
self.fit_intercept and issparse(self.intercept_)
|
|
219
194
|
)
|
|
220
|
-
|
|
195
|
+
patching_status.and_conditions(
|
|
221
196
|
[
|
|
222
197
|
(n_samples > 0, "Number of samples is less than 1."),
|
|
223
198
|
(not issparse(data[0]), "Sparse input is not supported."),
|
|
224
199
|
(not model_is_sparse, "Sparse coefficients are not supported."),
|
|
225
200
|
]
|
|
226
201
|
)
|
|
227
|
-
if not dal_ready:
|
|
228
|
-
return patching_status
|
|
229
|
-
|
|
230
|
-
patching_status.and_condition(
|
|
231
|
-
self._test_type_and_finiteness(data[0]), "Input X is not supported."
|
|
232
|
-
)
|
|
233
202
|
|
|
234
203
|
return patching_status
|
|
235
204
|
|
|
@@ -257,7 +226,6 @@ class LinearRegression(sklearn_LinearRegression):
|
|
|
257
226
|
"accept_sparse": ["csr", "csc", "coo"],
|
|
258
227
|
"y_numeric": True,
|
|
259
228
|
"multi_output": True,
|
|
260
|
-
"force_all_finite": False,
|
|
261
229
|
}
|
|
262
230
|
if sklearn_check_version("1.2"):
|
|
263
231
|
X, y = self._validate_data(**check_params)
|
|
@@ -28,7 +28,7 @@ if daal_check_version((2024, "P", 1)):
|
|
|
28
28
|
from sklearn.linear_model import LogisticRegression as sklearn_LogisticRegression
|
|
29
29
|
from sklearn.metrics import accuracy_score
|
|
30
30
|
from sklearn.utils.multiclass import type_of_target
|
|
31
|
-
from sklearn.utils.validation import check_X_y
|
|
31
|
+
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
|
|
32
32
|
|
|
33
33
|
from daal4py.sklearn._n_jobs_support import control_n_jobs
|
|
34
34
|
from daal4py.sklearn._utils import sklearn_check_version
|
|
@@ -38,7 +38,8 @@ if daal_check_version((2024, "P", 1)):
|
|
|
38
38
|
|
|
39
39
|
from .._device_offload import dispatch, wrap_output_data
|
|
40
40
|
from .._utils import PatchingConditionsChain, get_patch_message
|
|
41
|
-
|
|
41
|
+
|
|
42
|
+
_sparsity_enabled = daal_check_version((2024, "P", 700))
|
|
42
43
|
|
|
43
44
|
class BaseLogisticRegression(ABC):
|
|
44
45
|
def _save_attributes(self):
|
|
@@ -107,8 +108,6 @@ if daal_check_version((2024, "P", 1)):
|
|
|
107
108
|
_onedal_cpu_fit = daal4py_fit
|
|
108
109
|
|
|
109
110
|
def fit(self, X, y, sample_weight=None):
|
|
110
|
-
if sklearn_check_version("1.0"):
|
|
111
|
-
self._check_feature_names(X, reset=True)
|
|
112
111
|
if sklearn_check_version("1.2"):
|
|
113
112
|
self._validate_params()
|
|
114
113
|
dispatch(
|
|
@@ -126,8 +125,6 @@ if daal_check_version((2024, "P", 1)):
|
|
|
126
125
|
|
|
127
126
|
@wrap_output_data
|
|
128
127
|
def predict(self, X):
|
|
129
|
-
if sklearn_check_version("1.0"):
|
|
130
|
-
self._check_feature_names(X, reset=False)
|
|
131
128
|
return dispatch(
|
|
132
129
|
self,
|
|
133
130
|
"predict",
|
|
@@ -140,8 +137,6 @@ if daal_check_version((2024, "P", 1)):
|
|
|
140
137
|
|
|
141
138
|
@wrap_output_data
|
|
142
139
|
def predict_proba(self, X):
|
|
143
|
-
if sklearn_check_version("1.0"):
|
|
144
|
-
self._check_feature_names(X, reset=False)
|
|
145
140
|
return dispatch(
|
|
146
141
|
self,
|
|
147
142
|
"predict_proba",
|
|
@@ -154,8 +149,6 @@ if daal_check_version((2024, "P", 1)):
|
|
|
154
149
|
|
|
155
150
|
@wrap_output_data
|
|
156
151
|
def predict_log_proba(self, X):
|
|
157
|
-
if sklearn_check_version("1.0"):
|
|
158
|
-
self._check_feature_names(X, reset=False)
|
|
159
152
|
return dispatch(
|
|
160
153
|
self,
|
|
161
154
|
"predict_log_proba",
|
|
@@ -168,8 +161,6 @@ if daal_check_version((2024, "P", 1)):
|
|
|
168
161
|
|
|
169
162
|
@wrap_output_data
|
|
170
163
|
def score(self, X, y, sample_weight=None):
|
|
171
|
-
if sklearn_check_version("1.0"):
|
|
172
|
-
self._check_feature_names(X, reset=False)
|
|
173
164
|
return dispatch(
|
|
174
165
|
self,
|
|
175
166
|
"score",
|
|
@@ -187,17 +178,6 @@ if daal_check_version((2024, "P", 1)):
|
|
|
187
178
|
y, self._onedal_predict(X, queue=queue), sample_weight=sample_weight
|
|
188
179
|
)
|
|
189
180
|
|
|
190
|
-
def _test_type_and_finiteness(self, X_in):
|
|
191
|
-
X = np.asarray(X_in)
|
|
192
|
-
|
|
193
|
-
if np.iscomplexobj(X):
|
|
194
|
-
return False
|
|
195
|
-
try:
|
|
196
|
-
_assert_all_finite(X)
|
|
197
|
-
except BaseException:
|
|
198
|
-
return False
|
|
199
|
-
return True
|
|
200
|
-
|
|
201
181
|
def _onedal_gpu_fit_supported(self, method_name, *data):
|
|
202
182
|
assert method_name == "fit"
|
|
203
183
|
assert len(data) == 3
|
|
@@ -208,7 +188,12 @@ if daal_check_version((2024, "P", 1)):
|
|
|
208
188
|
f"sklearn.linear_model.{class_name}.fit"
|
|
209
189
|
)
|
|
210
190
|
|
|
211
|
-
|
|
191
|
+
target_type = (
|
|
192
|
+
type_of_target(y, input_name="y")
|
|
193
|
+
if sklearn_check_version("1.1")
|
|
194
|
+
else type_of_target(y)
|
|
195
|
+
)
|
|
196
|
+
patching_status.and_conditions(
|
|
212
197
|
[
|
|
213
198
|
(self.penalty == "l2", "Only l2 penalty is supported."),
|
|
214
199
|
(self.dual == False, "dual=True is not supported."),
|
|
@@ -226,24 +211,12 @@ if daal_check_version((2024, "P", 1)):
|
|
|
226
211
|
(self.l1_ratio is None, "l1 ratio is not supported."),
|
|
227
212
|
(sample_weight is None, "Sample weight is not supported."),
|
|
228
213
|
(
|
|
229
|
-
|
|
214
|
+
target_type == "binary",
|
|
230
215
|
"Only binary classification is supported",
|
|
231
216
|
),
|
|
232
217
|
]
|
|
233
218
|
)
|
|
234
219
|
|
|
235
|
-
if not dal_ready:
|
|
236
|
-
return patching_status
|
|
237
|
-
|
|
238
|
-
if not patching_status.and_condition(
|
|
239
|
-
self._test_type_and_finiteness(X), "Input X is not supported."
|
|
240
|
-
):
|
|
241
|
-
return patching_status
|
|
242
|
-
|
|
243
|
-
patching_status.and_condition(
|
|
244
|
-
self._test_type_and_finiteness(y), "Input y is not supported."
|
|
245
|
-
)
|
|
246
|
-
|
|
247
220
|
return patching_status
|
|
248
221
|
|
|
249
222
|
def _onedal_gpu_predict_supported(self, method_name, *data):
|
|
@@ -267,7 +240,7 @@ if daal_check_version((2024, "P", 1)):
|
|
|
267
240
|
[
|
|
268
241
|
(n_samples > 0, "Number of samples is less than 1."),
|
|
269
242
|
(
|
|
270
|
-
not any([issparse(i) for i in data]),
|
|
243
|
+
(not any([issparse(i) for i in data])) or _sparsity_enabled,
|
|
271
244
|
"Sparse input is not supported.",
|
|
272
245
|
),
|
|
273
246
|
(not model_is_sparse, "Sparse coefficients are not supported."),
|
|
@@ -277,12 +250,6 @@ if daal_check_version((2024, "P", 1)):
|
|
|
277
250
|
),
|
|
278
251
|
]
|
|
279
252
|
)
|
|
280
|
-
if not dal_ready:
|
|
281
|
-
return patching_status
|
|
282
|
-
|
|
283
|
-
patching_status.and_condition(
|
|
284
|
-
self._test_type_and_finiteness(*data), "Input X is not supported."
|
|
285
|
-
)
|
|
286
253
|
|
|
287
254
|
return patching_status
|
|
288
255
|
|
|
@@ -313,24 +280,29 @@ if daal_check_version((2024, "P", 1)):
|
|
|
313
280
|
}
|
|
314
281
|
self._onedal_estimator = onedal_LogisticRegression(**onedal_params)
|
|
315
282
|
|
|
316
|
-
def _onedal_fit(self, X, y, sample_weight, queue=None):
|
|
283
|
+
def _onedal_fit(self, X, y, sample_weight=None, queue=None):
|
|
317
284
|
if queue is None or queue.sycl_device.is_cpu:
|
|
318
285
|
return self._onedal_cpu_fit(X, y, sample_weight)
|
|
319
286
|
|
|
320
287
|
assert sample_weight is None
|
|
321
288
|
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
if sklearn_check_version("1.2"):
|
|
331
|
-
X, y = self._validate_data(**check_params)
|
|
289
|
+
if sklearn_check_version("1.0"):
|
|
290
|
+
X, y = self._validate_data(
|
|
291
|
+
X,
|
|
292
|
+
y,
|
|
293
|
+
accept_sparse=_sparsity_enabled,
|
|
294
|
+
accept_large_sparse=_sparsity_enabled,
|
|
295
|
+
dtype=[np.float64, np.float32],
|
|
296
|
+
)
|
|
332
297
|
else:
|
|
333
|
-
X, y = check_X_y(
|
|
298
|
+
X, y = check_X_y(
|
|
299
|
+
X,
|
|
300
|
+
y,
|
|
301
|
+
accept_sparse=_sparsity_enabled,
|
|
302
|
+
accept_large_sparse=_sparsity_enabled,
|
|
303
|
+
dtype=[np.float64, np.float32],
|
|
304
|
+
)
|
|
305
|
+
|
|
334
306
|
self._initialize_onedal_estimator()
|
|
335
307
|
try:
|
|
336
308
|
self._onedal_estimator.fit(X, y, queue=queue)
|
|
@@ -348,7 +320,23 @@ if daal_check_version((2024, "P", 1)):
|
|
|
348
320
|
if queue is None or queue.sycl_device.is_cpu:
|
|
349
321
|
return daal4py_predict(self, X, "computeClassLabels")
|
|
350
322
|
|
|
351
|
-
|
|
323
|
+
check_is_fitted(self)
|
|
324
|
+
if sklearn_check_version("1.0"):
|
|
325
|
+
X = self._validate_data(
|
|
326
|
+
X,
|
|
327
|
+
reset=False,
|
|
328
|
+
accept_sparse=_sparsity_enabled,
|
|
329
|
+
accept_large_sparse=_sparsity_enabled,
|
|
330
|
+
dtype=[np.float64, np.float32],
|
|
331
|
+
)
|
|
332
|
+
else:
|
|
333
|
+
X = check_array(
|
|
334
|
+
X,
|
|
335
|
+
accept_sparse=_sparsity_enabled,
|
|
336
|
+
accept_large_sparse=_sparsity_enabled,
|
|
337
|
+
dtype=[np.float64, np.float32],
|
|
338
|
+
)
|
|
339
|
+
|
|
352
340
|
assert hasattr(self, "_onedal_estimator")
|
|
353
341
|
return self._onedal_estimator.predict(X, queue=queue)
|
|
354
342
|
|
|
@@ -356,7 +344,23 @@ if daal_check_version((2024, "P", 1)):
|
|
|
356
344
|
if queue is None or queue.sycl_device.is_cpu:
|
|
357
345
|
return daal4py_predict(self, X, "computeClassProbabilities")
|
|
358
346
|
|
|
359
|
-
|
|
347
|
+
check_is_fitted(self)
|
|
348
|
+
if sklearn_check_version("1.0"):
|
|
349
|
+
X = self._validate_data(
|
|
350
|
+
X,
|
|
351
|
+
reset=False,
|
|
352
|
+
accept_sparse=_sparsity_enabled,
|
|
353
|
+
accept_large_sparse=_sparsity_enabled,
|
|
354
|
+
dtype=[np.float64, np.float32],
|
|
355
|
+
)
|
|
356
|
+
else:
|
|
357
|
+
X = check_array(
|
|
358
|
+
X,
|
|
359
|
+
accept_sparse=_sparsity_enabled,
|
|
360
|
+
accept_large_sparse=_sparsity_enabled,
|
|
361
|
+
dtype=[np.float64, np.float32],
|
|
362
|
+
)
|
|
363
|
+
|
|
360
364
|
assert hasattr(self, "_onedal_estimator")
|
|
361
365
|
return self._onedal_estimator.predict_proba(X, queue=queue)
|
|
362
366
|
|
|
@@ -364,7 +368,23 @@ if daal_check_version((2024, "P", 1)):
|
|
|
364
368
|
if queue is None or queue.sycl_device.is_cpu:
|
|
365
369
|
return daal4py_predict(self, X, "computeClassLogProbabilities")
|
|
366
370
|
|
|
367
|
-
|
|
371
|
+
check_is_fitted(self)
|
|
372
|
+
if sklearn_check_version("1.0"):
|
|
373
|
+
X = self._validate_data(
|
|
374
|
+
X,
|
|
375
|
+
reset=False,
|
|
376
|
+
accept_sparse=_sparsity_enabled,
|
|
377
|
+
accept_large_sparse=_sparsity_enabled,
|
|
378
|
+
dtype=[np.float64, np.float32],
|
|
379
|
+
)
|
|
380
|
+
else:
|
|
381
|
+
X = check_array(
|
|
382
|
+
X,
|
|
383
|
+
accept_sparse=_sparsity_enabled,
|
|
384
|
+
accept_large_sparse=_sparsity_enabled,
|
|
385
|
+
dtype=[np.float64, np.float32],
|
|
386
|
+
)
|
|
387
|
+
|
|
368
388
|
assert hasattr(self, "_onedal_estimator")
|
|
369
389
|
return self._onedal_estimator.predict_log_proba(X, queue=queue)
|
|
370
390
|
|
sklearnex/linear_model/ridge.py
CHANGED
|
@@ -15,3 +15,10 @@
|
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
17
|
from daal4py.sklearn.linear_model import Ridge
|
|
18
|
+
from onedal._device_offload import support_usm_ndarray
|
|
19
|
+
|
|
20
|
+
# Note: `sklearnex.linear_model.Ridge` only has functional
|
|
21
|
+
# sycl GPU support. No GPU device will be offloaded.
|
|
22
|
+
Ridge.fit = support_usm_ndarray(queue_param=False)(Ridge.fit)
|
|
23
|
+
Ridge.predict = support_usm_ndarray(queue_param=False)(Ridge.predict)
|
|
24
|
+
Ridge.score = support_usm_ndarray(queue_param=False)(Ridge.score)
|
|
@@ -20,6 +20,10 @@ from numpy.testing import assert_allclose
|
|
|
20
20
|
from sklearn.datasets import make_regression
|
|
21
21
|
|
|
22
22
|
from daal4py.sklearn._utils import daal_check_version
|
|
23
|
+
from daal4py.sklearn.linear_model.tests.test_ridge import (
|
|
24
|
+
_test_multivariate_ridge_alpha_shape,
|
|
25
|
+
_test_multivariate_ridge_coefficients,
|
|
26
|
+
)
|
|
23
27
|
from onedal.tests.utils._dataframes_support import (
|
|
24
28
|
_as_numpy,
|
|
25
29
|
_convert_to_dataframe,
|
|
@@ -57,32 +61,41 @@ def test_sklearnex_import_linear(dataframe, queue, dtype, macro_block):
|
|
|
57
61
|
assert_allclose(_as_numpy(linreg.coef_), [1.0, 2.0], rtol=tol)
|
|
58
62
|
|
|
59
63
|
|
|
60
|
-
|
|
64
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
65
|
+
def test_sklearnex_import_ridge(dataframe, queue):
|
|
61
66
|
from sklearnex.linear_model import Ridge
|
|
62
67
|
|
|
63
68
|
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
|
|
64
69
|
y = np.dot(X, np.array([1, 2])) + 3
|
|
70
|
+
X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
71
|
+
y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
|
|
65
72
|
ridgereg = Ridge().fit(X, y)
|
|
66
73
|
assert "daal4py" in ridgereg.__module__
|
|
67
74
|
assert_allclose(ridgereg.intercept_, 4.5)
|
|
68
75
|
assert_allclose(ridgereg.coef_, [0.8, 1.4])
|
|
69
76
|
|
|
70
77
|
|
|
71
|
-
|
|
78
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
79
|
+
def test_sklearnex_import_lasso(dataframe, queue):
|
|
72
80
|
from sklearnex.linear_model import Lasso
|
|
73
81
|
|
|
74
82
|
X = [[0, 0], [1, 1], [2, 2]]
|
|
75
83
|
y = [0, 1, 2]
|
|
84
|
+
X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
85
|
+
y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
|
|
76
86
|
lasso = Lasso(alpha=0.1).fit(X, y)
|
|
77
87
|
assert "daal4py" in lasso.__module__
|
|
78
88
|
assert_allclose(lasso.intercept_, 0.15)
|
|
79
89
|
assert_allclose(lasso.coef_, [0.85, 0.0])
|
|
80
90
|
|
|
81
91
|
|
|
82
|
-
|
|
92
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
93
|
+
def test_sklearnex_import_elastic(dataframe, queue):
|
|
83
94
|
from sklearnex.linear_model import ElasticNet
|
|
84
95
|
|
|
85
96
|
X, y = make_regression(n_features=2, random_state=0)
|
|
97
|
+
X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
98
|
+
y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
|
|
86
99
|
elasticnet = ElasticNet(random_state=0).fit(X, y)
|
|
87
100
|
assert "daal4py" in elasticnet.__module__
|
|
88
101
|
assert_allclose(elasticnet.intercept_, 1.451, atol=1e-3)
|
|
@@ -115,3 +128,15 @@ def test_sklearnex_reconstruct_model(dataframe, queue, dtype):
|
|
|
115
128
|
|
|
116
129
|
tol = 1e-5 if _as_numpy(y_pred).dtype == np.float32 else 1e-7
|
|
117
130
|
assert_allclose(gtr, _as_numpy(y_pred), rtol=tol)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def test_sklearnex_multivariate_ridge_coefs():
|
|
134
|
+
from sklearnex.linear_model import Ridge
|
|
135
|
+
|
|
136
|
+
_test_multivariate_ridge_coefficients(Ridge, random_state=0)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_sklearnex_multivariate_ridge_alpha_shape():
|
|
140
|
+
from sklearnex.linear_model import Ridge
|
|
141
|
+
|
|
142
|
+
_test_multivariate_ridge_alpha_shape(Ridge, random_state=0)
|
|
@@ -14,8 +14,11 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
|
+
import numpy as np
|
|
17
18
|
import pytest
|
|
18
|
-
from
|
|
19
|
+
from numpy.testing import assert_allclose, assert_array_equal
|
|
20
|
+
from scipy.sparse import csr_matrix
|
|
21
|
+
from sklearn.datasets import load_breast_cancer, load_iris, make_classification
|
|
19
22
|
from sklearn.metrics import accuracy_score
|
|
20
23
|
from sklearn.model_selection import train_test_split
|
|
21
24
|
|
|
@@ -24,7 +27,9 @@ from onedal.tests.utils._dataframes_support import (
|
|
|
24
27
|
_as_numpy,
|
|
25
28
|
_convert_to_dataframe,
|
|
26
29
|
get_dataframes_and_queues,
|
|
30
|
+
get_queues,
|
|
27
31
|
)
|
|
32
|
+
from sklearnex import config_context
|
|
28
33
|
|
|
29
34
|
|
|
30
35
|
def prepare_input(X, y, dataframe, queue):
|
|
@@ -38,8 +43,7 @@ def prepare_input(X, y, dataframe, queue):
|
|
|
38
43
|
|
|
39
44
|
|
|
40
45
|
@pytest.mark.parametrize(
|
|
41
|
-
"dataframe,queue",
|
|
42
|
-
get_dataframes_and_queues(device_filter_="cpu"),
|
|
46
|
+
"dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
|
|
43
47
|
)
|
|
44
48
|
def test_sklearnex_multiclass_classification(dataframe, queue):
|
|
45
49
|
from sklearnex.linear_model import LogisticRegression
|
|
@@ -89,3 +93,41 @@ def test_sklearnex_binary_classification(dataframe, queue):
|
|
|
89
93
|
|
|
90
94
|
y_pred = _as_numpy(logreg.predict(X_test))
|
|
91
95
|
assert accuracy_score(y_test, y_pred) > 0.95
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
if daal_check_version((2024, "P", 700)):
|
|
99
|
+
|
|
100
|
+
@pytest.mark.parametrize("queue", get_queues("gpu"))
|
|
101
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
102
|
+
@pytest.mark.parametrize(
|
|
103
|
+
"dims", [(3007, 17, 0.05), (50000, 100, 0.01), (512, 10, 0.5)]
|
|
104
|
+
)
|
|
105
|
+
def test_csr(queue, dtype, dims):
|
|
106
|
+
from sklearnex.linear_model import LogisticRegression
|
|
107
|
+
|
|
108
|
+
n, p, density = dims
|
|
109
|
+
|
|
110
|
+
# Create sparse dataset for classification
|
|
111
|
+
X, y = make_classification(n, p, random_state=42)
|
|
112
|
+
X = X.astype(dtype)
|
|
113
|
+
y = y.astype(dtype)
|
|
114
|
+
np.random.seed(2007 + n + p)
|
|
115
|
+
mask = np.random.binomial(1, density, (n, p))
|
|
116
|
+
X = X * mask
|
|
117
|
+
X_sp = csr_matrix(X)
|
|
118
|
+
|
|
119
|
+
model = LogisticRegression(fit_intercept=True, solver="newton-cg")
|
|
120
|
+
model_sp = LogisticRegression(fit_intercept=True, solver="newton-cg")
|
|
121
|
+
|
|
122
|
+
with config_context(target_offload="gpu:0"):
|
|
123
|
+
model.fit(X, y)
|
|
124
|
+
pred = model.predict(X)
|
|
125
|
+
prob = model.predict_proba(X)
|
|
126
|
+
model_sp.fit(X_sp, y)
|
|
127
|
+
pred_sp = model_sp.predict(X_sp)
|
|
128
|
+
prob_sp = model_sp.predict_proba(X_sp)
|
|
129
|
+
|
|
130
|
+
assert_allclose(pred, pred_sp)
|
|
131
|
+
assert_allclose(prob, prob_sp)
|
|
132
|
+
assert_allclose(model.coef_, model_sp.coef_, rtol=1e-4)
|
|
133
|
+
assert_allclose(model.intercept_, model_sp.intercept_, rtol=1e-4)
|
sklearnex/manifold/t_sne.py
CHANGED
|
@@ -15,3 +15,7 @@
|
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
17
|
from daal4py.sklearn.manifold import TSNE
|
|
18
|
+
from onedal._device_offload import support_usm_ndarray
|
|
19
|
+
|
|
20
|
+
TSNE.fit = support_usm_ndarray(queue_param=False)(TSNE.fit)
|
|
21
|
+
TSNE.fit_transform = support_usm_ndarray(queue_param=False)(TSNE.fit_transform)
|
sklearnex/metrics/pairwise.py
CHANGED
|
@@ -15,3 +15,8 @@
|
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
17
|
from daal4py.sklearn.metrics import pairwise_distances
|
|
18
|
+
from onedal._device_offload import support_usm_ndarray
|
|
19
|
+
|
|
20
|
+
pairwise_distances = support_usm_ndarray(freefunc=True, queue_param=False)(
|
|
21
|
+
pairwise_distances
|
|
22
|
+
)
|
sklearnex/metrics/ranking.py
CHANGED
|
@@ -15,3 +15,6 @@
|
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
17
|
from daal4py.sklearn.metrics import roc_auc_score
|
|
18
|
+
from onedal._device_offload import support_usm_ndarray
|
|
19
|
+
|
|
20
|
+
roc_auc_score = support_usm_ndarray(freefunc=True, queue_param=False)(roc_auc_score)
|
|
@@ -15,3 +15,6 @@
|
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
17
|
from daal4py.sklearn.model_selection import train_test_split
|
|
18
|
+
from onedal._device_offload import support_usm_ndarray
|
|
19
|
+
|
|
20
|
+
train_test_split = support_usm_ndarray(freefunc=True, queue_param=False)(train_test_split)
|
sklearnex/neighbors/_lof.py
CHANGED
|
@@ -97,6 +97,15 @@ class LocalOutlierFactor(KNeighborsDispatchingBase, sklearn_LocalOutlierFactor):
|
|
|
97
97
|
self.negative_outlier_factor_, 100.0 * self.contamination
|
|
98
98
|
)
|
|
99
99
|
|
|
100
|
+
# adoption of warning for data with duplicated samples from
|
|
101
|
+
# https://github.com/scikit-learn/scikit-learn/pull/28773
|
|
102
|
+
if sklearn_check_version("1.6"):
|
|
103
|
+
if np.min(self.negative_outlier_factor_) < -1e7 and not self.novelty:
|
|
104
|
+
warnings.warn(
|
|
105
|
+
"Duplicate values are leading to incorrect results. "
|
|
106
|
+
"Increase the number of neighbors for more accurate results."
|
|
107
|
+
)
|
|
108
|
+
|
|
100
109
|
return self
|
|
101
110
|
|
|
102
111
|
def fit(self, X, y=None):
|