scikit-learn-intelex 2024.4.0__py312-none-manylinux1_x86_64.whl → 2024.6.0__py312-none-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/METADATA +2 -2
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/RECORD +43 -36
- sklearnex/_device_offload.py +8 -1
- sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +2 -4
- sklearnex/cluster/dbscan.py +3 -0
- sklearnex/cluster/tests/test_dbscan.py +8 -6
- sklearnex/conftest.py +11 -1
- sklearnex/covariance/incremental_covariance.py +217 -30
- sklearnex/covariance/tests/test_incremental_covariance.py +54 -17
- sklearnex/decomposition/pca.py +68 -13
- sklearnex/decomposition/tests/test_pca.py +6 -4
- sklearnex/dispatcher.py +46 -1
- sklearnex/ensemble/_forest.py +114 -22
- sklearnex/ensemble/tests/test_forest.py +13 -3
- sklearnex/glob/dispatcher.py +16 -2
- sklearnex/linear_model/__init__.py +5 -3
- sklearnex/linear_model/incremental_linear.py +464 -0
- sklearnex/linear_model/linear.py +27 -9
- sklearnex/linear_model/logistic_regression.py +13 -15
- sklearnex/linear_model/tests/test_incremental_linear.py +200 -0
- sklearnex/linear_model/tests/test_linear.py +2 -2
- sklearnex/neighbors/knn_regression.py +24 -0
- sklearnex/neighbors/tests/test_neighbors.py +2 -2
- sklearnex/preview/__init__.py +1 -1
- sklearnex/preview/decomposition/__init__.py +19 -0
- sklearnex/preview/decomposition/incremental_pca.py +228 -0
- sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
- sklearnex/svm/_common.py +165 -20
- sklearnex/svm/nusvc.py +40 -4
- sklearnex/svm/nusvr.py +31 -2
- sklearnex/svm/svc.py +40 -4
- sklearnex/svm/svr.py +31 -2
- sklearnex/tests/_utils.py +70 -29
- sklearnex/tests/test_common.py +54 -0
- sklearnex/tests/test_memory_usage.py +195 -132
- sklearnex/tests/test_n_jobs_support.py +4 -0
- sklearnex/tests/test_patching.py +22 -10
- sklearnex/tests/test_run_to_run_stability.py +283 -0
- sklearnex/utils/_namespace.py +1 -1
- sklearnex/utils/tests/test_finite.py +89 -0
- sklearnex/tests/test_run_to_run_stability_tests.py +0 -428
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/WHEEL +0 -0
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/top_level.txt +0 -0
|
@@ -14,122 +14,113 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
# ==============================================================================
|
|
16
16
|
|
|
17
|
-
|
|
18
17
|
import gc
|
|
19
18
|
import logging
|
|
19
|
+
import os
|
|
20
20
|
import tracemalloc
|
|
21
21
|
import types
|
|
22
|
+
import warnings
|
|
23
|
+
from inspect import isclass
|
|
22
24
|
|
|
23
25
|
import numpy as np
|
|
24
26
|
import pandas as pd
|
|
25
27
|
import pytest
|
|
26
28
|
from scipy.stats import pearsonr
|
|
27
|
-
from sklearn.base import BaseEstimator
|
|
29
|
+
from sklearn.base import BaseEstimator, clone
|
|
28
30
|
from sklearn.datasets import make_classification
|
|
29
31
|
from sklearn.model_selection import KFold
|
|
30
32
|
|
|
31
|
-
from
|
|
32
|
-
from
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
33
|
+
from onedal import _is_dpc_backend
|
|
34
|
+
from onedal.tests.utils._dataframes_support import (
|
|
35
|
+
_convert_to_dataframe,
|
|
36
|
+
get_dataframes_and_queues,
|
|
37
|
+
)
|
|
38
|
+
from onedal.tests.utils._device_selection import get_queues, is_dpctl_available
|
|
39
|
+
from sklearnex import config_context
|
|
40
|
+
from sklearnex.tests._utils import PATCHED_FUNCTIONS, PATCHED_MODELS, SPECIAL_INSTANCES
|
|
41
|
+
from sklearnex.utils import get_namespace
|
|
42
|
+
|
|
43
|
+
if _is_dpc_backend:
|
|
44
|
+
from onedal import _backend
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
CPU_SKIP_LIST = (
|
|
48
|
+
"TSNE", # too slow for using in testing on common data size
|
|
49
|
+
"config_context", # does not malloc
|
|
50
|
+
"get_config", # does not malloc
|
|
51
|
+
"set_config", # does not malloc
|
|
52
|
+
"SVC(probability=True)", # memory leak fortran numpy (investigate _fit_proba)
|
|
53
|
+
"NuSVC(probability=True)", # memory leak fortran numpy (investigate _fit_proba)
|
|
54
|
+
"IncrementalEmpiricalCovariance", # dataframe_f issues
|
|
55
|
+
"IncrementalLinearRegression", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
|
|
56
|
+
"IncrementalPCA", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
|
|
57
|
+
"LogisticRegression(solver='newton-cg')", # memory leak fortran (1000, 100)
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
GPU_SKIP_LIST = (
|
|
61
|
+
"TSNE", # too slow for using in testing on common data size
|
|
62
|
+
"RandomForestRegressor", # too slow for using in testing on common data size
|
|
63
|
+
"KMeans", # does not support GPU offloading
|
|
64
|
+
"config_context", # does not malloc
|
|
65
|
+
"get_config", # does not malloc
|
|
66
|
+
"set_config", # does not malloc
|
|
67
|
+
"Ridge", # does not support GPU offloading (fails silently)
|
|
68
|
+
"ElasticNet", # does not support GPU offloading (fails silently)
|
|
69
|
+
"Lasso", # does not support GPU offloading (fails silently)
|
|
70
|
+
"SVR", # does not support GPU offloading (fails silently)
|
|
71
|
+
"NuSVR", # does not support GPU offloading (fails silently)
|
|
72
|
+
"NuSVC", # does not support GPU offloading (fails silently)
|
|
73
|
+
"LogisticRegression", # default parameters not supported, see solver=newton-cg
|
|
74
|
+
"NuSVC(probability=True)", # does not support GPU offloading (fails silently)
|
|
75
|
+
"IncrementalLinearRegression", # issue with potrf with the specific dataset
|
|
76
|
+
"LinearRegression", # issue with potrf with the specific dataset
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def gen_functions(functions):
|
|
81
|
+
func_dict = functions.copy()
|
|
82
|
+
|
|
83
|
+
roc_auc_score = func_dict.pop("roc_auc_score")
|
|
84
|
+
func_dict["roc_auc_score"] = lambda x, y: roc_auc_score(y, y)
|
|
85
|
+
|
|
86
|
+
pairwise_distances = func_dict.pop("pairwise_distances")
|
|
87
|
+
func_dict["pairwise_distances(metric='cosine')"] = lambda x, y: pairwise_distances(
|
|
88
|
+
x, metric="cosine"
|
|
89
|
+
)
|
|
90
|
+
func_dict["pairwise_distances(metric='correlation')"] = (
|
|
91
|
+
lambda x, y: pairwise_distances(x, metric="correlation")
|
|
92
|
+
)
|
|
78
93
|
|
|
94
|
+
_assert_all_finite = func_dict.pop("_assert_all_finite")
|
|
95
|
+
func_dict["_assert_all_finite"] = lambda x, y: [
|
|
96
|
+
_assert_all_finite(x),
|
|
97
|
+
_assert_all_finite(y),
|
|
98
|
+
]
|
|
99
|
+
return func_dict
|
|
79
100
|
|
|
80
|
-
def get_patched_estimators(ban_list, output_list):
|
|
81
|
-
patched_estimators = get_patch_map().values()
|
|
82
|
-
for listing in patched_estimators:
|
|
83
|
-
estimator, name = listing[0][0][2], listing[0][0][1]
|
|
84
|
-
if not isinstance(estimator, types.FunctionType):
|
|
85
|
-
if name not in ban_list:
|
|
86
|
-
if issubclass(estimator, BaseEstimator):
|
|
87
|
-
if hasattr(estimator, "fit"):
|
|
88
|
-
output_list.append(estimator)
|
|
89
101
|
|
|
102
|
+
FUNCTIONS = gen_functions(PATCHED_FUNCTIONS)
|
|
90
103
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
for
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
return estimators_map.values()
|
|
104
|
+
CPU_ESTIMATORS = {
|
|
105
|
+
k: v
|
|
106
|
+
for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES, **FUNCTIONS}.items()
|
|
107
|
+
if not k in CPU_SKIP_LIST
|
|
108
|
+
}
|
|
97
109
|
|
|
110
|
+
GPU_ESTIMATORS = {
|
|
111
|
+
k: v
|
|
112
|
+
for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES}.items()
|
|
113
|
+
if not k in GPU_SKIP_LIST
|
|
114
|
+
}
|
|
98
115
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
FiniteCheckEstimator,
|
|
103
|
-
CosineDistancesEstimator,
|
|
104
|
-
CorrelationDistancesEstimator,
|
|
105
|
-
RocAucEstimator,
|
|
116
|
+
data_shapes = [
|
|
117
|
+
pytest.param((1000, 100), id="(1000, 100)"),
|
|
118
|
+
pytest.param((2000, 50), id="(2000, 50)"),
|
|
106
119
|
]
|
|
107
|
-
get_patched_estimators(BANNED_ESTIMATORS, estimators)
|
|
108
|
-
estimators = remove_duplicated_estimators(estimators)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
def ndarray_c(x, y):
|
|
112
|
-
return np.ascontiguousarray(x), y
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
def ndarray_f(x, y):
|
|
116
|
-
return np.asfortranarray(x), y
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
def dataframe_c(x, y):
|
|
120
|
-
return pd.DataFrame(np.ascontiguousarray(x)), pd.Series(y)
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def dataframe_f(x, y):
|
|
124
|
-
return pd.DataFrame(np.asfortranarray(x)), pd.Series(y)
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
data_transforms = [ndarray_c, ndarray_f, dataframe_c, dataframe_f]
|
|
128
|
-
|
|
129
|
-
data_shapes = [(1000, 100), (2000, 50)]
|
|
130
120
|
|
|
131
121
|
EXTRA_MEMORY_THRESHOLD = 0.15
|
|
132
122
|
N_SPLITS = 10
|
|
123
|
+
ORDER_DICT = {"F": np.asfortranarray, "C": np.ascontiguousarray}
|
|
133
124
|
|
|
134
125
|
|
|
135
126
|
def gen_clsf_data(n_samples, n_features):
|
|
@@ -143,45 +134,82 @@ def gen_clsf_data(n_samples, n_features):
|
|
|
143
134
|
)
|
|
144
135
|
|
|
145
136
|
|
|
146
|
-
def
|
|
137
|
+
def get_traced_memory(queue=None):
|
|
138
|
+
if _is_dpc_backend and queue and queue.sycl_device.is_gpu:
|
|
139
|
+
return _backend.get_used_memory(queue)
|
|
140
|
+
else:
|
|
141
|
+
return tracemalloc.get_traced_memory()[0]
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def take(x, index, axis=0, queue=None):
|
|
145
|
+
xp, array_api = get_namespace(x)
|
|
146
|
+
if array_api:
|
|
147
|
+
return xp.take(x, xp.asarray(index, device=queue), axis=axis)
|
|
148
|
+
else:
|
|
149
|
+
return x.take(index, axis=axis)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def split_train_inference(kf, x, y, estimator, queue=None):
|
|
147
153
|
mem_tracks = []
|
|
148
154
|
for train_index, test_index in kf.split(x):
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
alg
|
|
166
|
-
|
|
167
|
-
|
|
155
|
+
x_train = take(x, train_index, queue=queue)
|
|
156
|
+
y_train = take(y, train_index, queue=queue)
|
|
157
|
+
x_test = take(x, test_index, queue=queue)
|
|
158
|
+
y_test = take(y, test_index, queue=queue)
|
|
159
|
+
|
|
160
|
+
if isclass(estimator) and issubclass(estimator, BaseEstimator):
|
|
161
|
+
alg = estimator()
|
|
162
|
+
flag = True
|
|
163
|
+
elif isinstance(estimator, BaseEstimator):
|
|
164
|
+
alg = clone(estimator)
|
|
165
|
+
flag = True
|
|
166
|
+
else:
|
|
167
|
+
flag = False
|
|
168
|
+
|
|
169
|
+
if flag:
|
|
170
|
+
alg.fit(x_train, y_train)
|
|
171
|
+
if hasattr(alg, "predict"):
|
|
172
|
+
alg.predict(x_test)
|
|
173
|
+
elif hasattr(alg, "transform"):
|
|
174
|
+
alg.transform(x_test)
|
|
175
|
+
elif hasattr(alg, "kneighbors"):
|
|
176
|
+
alg.kneighbors(x_test)
|
|
177
|
+
del alg
|
|
178
|
+
else:
|
|
179
|
+
estimator(x_train, y_train)
|
|
180
|
+
|
|
181
|
+
del x_train, x_test, y_train, y_test, flag
|
|
182
|
+
mem_tracks.append(get_traced_memory(queue))
|
|
168
183
|
return mem_tracks
|
|
169
184
|
|
|
170
185
|
|
|
171
|
-
def _kfold_function_template(estimator,
|
|
186
|
+
def _kfold_function_template(estimator, dataframe, data_shape, queue=None, func=None):
|
|
172
187
|
tracemalloc.start()
|
|
173
188
|
|
|
174
189
|
n_samples, n_features = data_shape
|
|
175
|
-
|
|
190
|
+
X, y, data_memory_size = gen_clsf_data(n_samples, n_features)
|
|
176
191
|
kf = KFold(n_splits=N_SPLITS)
|
|
177
|
-
|
|
192
|
+
if func:
|
|
193
|
+
X = func(X)
|
|
194
|
+
|
|
195
|
+
X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
196
|
+
y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
|
|
178
197
|
|
|
179
|
-
mem_before
|
|
180
|
-
mem_tracks = split_train_inference(kf,
|
|
198
|
+
mem_before = get_traced_memory(queue)
|
|
199
|
+
mem_tracks = split_train_inference(kf, X, y, estimator, queue=queue)
|
|
181
200
|
mem_iter_diffs = np.array(mem_tracks[1:]) - np.array(mem_tracks[:-1])
|
|
182
201
|
mem_incr_mean, mem_incr_std = mem_iter_diffs.mean(), mem_iter_diffs.std()
|
|
183
202
|
mem_incr_mean, mem_incr_std = round(mem_incr_mean), round(mem_incr_std)
|
|
184
|
-
|
|
203
|
+
with warnings.catch_warnings():
|
|
204
|
+
# In the case that the memory usage is constant, this will raise
|
|
205
|
+
# a ConstantInputWarning error in pearsonr from scipy, this can
|
|
206
|
+
# be ignored.
|
|
207
|
+
warnings.filterwarnings(
|
|
208
|
+
"ignore",
|
|
209
|
+
message="An input array is constant; the correlation coefficient is not defined",
|
|
210
|
+
)
|
|
211
|
+
mem_iter_corr, _ = pearsonr(mem_tracks, list(range(len(mem_tracks))))
|
|
212
|
+
|
|
185
213
|
if mem_iter_corr > 0.95:
|
|
186
214
|
logging.warning(
|
|
187
215
|
"Memory usage is steadily increasing with iterations "
|
|
@@ -190,12 +218,17 @@ def _kfold_function_template(estimator, data_transform_function, data_shape):
|
|
|
190
218
|
"Memory usage increase per iteration: "
|
|
191
219
|
f"{mem_incr_mean}±{mem_incr_std} bytes"
|
|
192
220
|
)
|
|
193
|
-
mem_before_gc
|
|
221
|
+
mem_before_gc = get_traced_memory(queue)
|
|
194
222
|
mem_diff = mem_before_gc - mem_before
|
|
223
|
+
if isinstance(estimator, BaseEstimator):
|
|
224
|
+
name = str(estimator)
|
|
225
|
+
else:
|
|
226
|
+
name = estimator.__name__
|
|
227
|
+
|
|
195
228
|
message = (
|
|
196
229
|
"Size of extra allocated memory {} using garbage collector "
|
|
197
230
|
f"is greater than {EXTRA_MEMORY_THRESHOLD * 100}% of input data"
|
|
198
|
-
f"\n\tAlgorithm: {
|
|
231
|
+
f"\n\tAlgorithm: {name}"
|
|
199
232
|
f"\n\tInput data size: {data_memory_size} bytes"
|
|
200
233
|
"\n\tExtra allocated memory size: {} bytes"
|
|
201
234
|
" / {} %"
|
|
@@ -207,21 +240,51 @@ def _kfold_function_template(estimator, data_transform_function, data_shape):
|
|
|
207
240
|
)
|
|
208
241
|
)
|
|
209
242
|
gc.collect()
|
|
210
|
-
mem_after
|
|
243
|
+
mem_after = get_traced_memory(queue)
|
|
211
244
|
tracemalloc.stop()
|
|
212
245
|
mem_diff = mem_after - mem_before
|
|
213
246
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
247
|
+
# GPU offloading with SYCL contains a program/kernel cache which should
|
|
248
|
+
# be controllable via a KernelProgramCache object in the SYCL context.
|
|
249
|
+
# The programs and kernels are stored on the GPU, but cannot be cleared
|
|
250
|
+
# as this class is not available for access in all oneDAL DPC++ runtimes.
|
|
251
|
+
# Therefore, until this is implemented this test must be skipped for gpu
|
|
252
|
+
# as it looks like a memory leak (at least there is no way to discern a
|
|
253
|
+
# leak on the first run).
|
|
254
|
+
if queue is None or queue.sycl_device.is_cpu:
|
|
255
|
+
assert mem_diff < EXTRA_MEMORY_THRESHOLD * data_memory_size, message.format(
|
|
256
|
+
"after", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
|
|
257
|
+
)
|
|
217
258
|
|
|
218
259
|
|
|
219
|
-
|
|
260
|
+
@pytest.mark.parametrize("order", ["F", "C"])
|
|
261
|
+
@pytest.mark.parametrize(
|
|
262
|
+
"dataframe,queue", get_dataframes_and_queues("numpy,pandas,dpctl", "cpu")
|
|
263
|
+
)
|
|
264
|
+
@pytest.mark.parametrize("estimator", CPU_ESTIMATORS.keys())
|
|
265
|
+
@pytest.mark.parametrize("data_shape", data_shapes)
|
|
266
|
+
def test_memory_leaks(estimator, dataframe, queue, order, data_shape):
|
|
267
|
+
func = ORDER_DICT[order]
|
|
268
|
+
if estimator == "_assert_all_finite" and queue is not None:
|
|
269
|
+
pytest.skip(f"{estimator} is not designed for device offloading")
|
|
220
270
|
|
|
271
|
+
_kfold_function_template(
|
|
272
|
+
CPU_ESTIMATORS[estimator], dataframe, data_shape, queue, func
|
|
273
|
+
)
|
|
221
274
|
|
|
222
|
-
|
|
223
|
-
@pytest.mark.
|
|
224
|
-
|
|
275
|
+
|
|
276
|
+
@pytest.mark.skipif(
|
|
277
|
+
os.getenv("ZES_ENABLE_SYSMAN") is None or not is_dpctl_available("gpu"),
|
|
278
|
+
reason="SYCL device memory leak check requires the level zero sysman",
|
|
279
|
+
)
|
|
280
|
+
@pytest.mark.parametrize("queue", get_queues("gpu"))
|
|
281
|
+
@pytest.mark.parametrize("estimator", GPU_ESTIMATORS.keys())
|
|
282
|
+
@pytest.mark.parametrize("order", ["F", "C"])
|
|
225
283
|
@pytest.mark.parametrize("data_shape", data_shapes)
|
|
226
|
-
def
|
|
227
|
-
|
|
284
|
+
def test_gpu_memory_leaks(estimator, queue, order, data_shape):
|
|
285
|
+
func = ORDER_DICT[order]
|
|
286
|
+
if "ExtraTrees" in estimator and data_shape == (2000, 50):
|
|
287
|
+
pytest.skip("Avoid a segmentation fault in Extra Trees algorithms")
|
|
288
|
+
|
|
289
|
+
with config_context(target_offload=queue):
|
|
290
|
+
_kfold_function_template(GPU_ESTIMATORS[estimator], None, data_shape, queue, func)
|
|
@@ -22,6 +22,7 @@ import pytest
|
|
|
22
22
|
from sklearn.base import BaseEstimator
|
|
23
23
|
from sklearn.datasets import make_classification
|
|
24
24
|
|
|
25
|
+
from sklearnex.decomposition import PCA
|
|
25
26
|
from sklearnex.dispatcher import get_patch_map
|
|
26
27
|
from sklearnex.svm import SVC, NuSVC
|
|
27
28
|
|
|
@@ -73,6 +74,9 @@ def test_n_jobs_support(caplog, estimator_class, n_jobs):
|
|
|
73
74
|
# by default, [Nu]SVC.predict_proba is restricted by @available_if decorator
|
|
74
75
|
if estimator_class in [SVC, NuSVC]:
|
|
75
76
|
estimator_kwargs["probability"] = True
|
|
77
|
+
# explicitly request oneDAL's PCA-Covariance algorithm
|
|
78
|
+
if estimator_class == PCA:
|
|
79
|
+
estimator_kwargs["svd_solver"] = "covariance_eigh"
|
|
76
80
|
estimator_instance = estimator_class(**estimator_kwargs)
|
|
77
81
|
# check `n_jobs` parameter doc entry
|
|
78
82
|
check_estimator_doc(estimator_class)
|
sklearnex/tests/test_patching.py
CHANGED
|
@@ -61,12 +61,15 @@ def test_pairwise_distances_patching(caplog, dataframe, queue, dtype, metric):
|
|
|
61
61
|
pytest.skip("pairwise_distances does not support GPU queues")
|
|
62
62
|
|
|
63
63
|
rng = nprnd.default_rng()
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
64
|
+
if dataframe == "pandas":
|
|
65
|
+
X = _convert_to_dataframe(
|
|
66
|
+
rng.random(size=1000).astype(dtype).reshape(1, -1),
|
|
67
|
+
target_df=dataframe,
|
|
68
|
+
)
|
|
69
|
+
else:
|
|
70
|
+
X = _convert_to_dataframe(
|
|
71
|
+
rng.random(size=1000), sycl_queue=queue, target_df=dataframe, dtype=dtype
|
|
72
|
+
)[None, :]
|
|
70
73
|
|
|
71
74
|
_ = pairwise_distances(X, metric=metric)
|
|
72
75
|
assert all(
|
|
@@ -90,14 +93,17 @@ def test_roc_auc_score_patching(caplog, dataframe, queue, dtype):
|
|
|
90
93
|
|
|
91
94
|
with caplog.at_level(logging.WARNING, logger="sklearnex"):
|
|
92
95
|
rng = nprnd.default_rng()
|
|
96
|
+
X = rng.integers(2, size=1000)
|
|
97
|
+
y = rng.integers(2, size=1000)
|
|
98
|
+
|
|
93
99
|
X = _convert_to_dataframe(
|
|
94
|
-
|
|
100
|
+
X,
|
|
95
101
|
sycl_queue=queue,
|
|
96
102
|
target_df=dataframe,
|
|
97
103
|
dtype=dtype,
|
|
98
104
|
)
|
|
99
105
|
y = _convert_to_dataframe(
|
|
100
|
-
|
|
106
|
+
y,
|
|
101
107
|
sycl_queue=queue,
|
|
102
108
|
target_df=dataframe,
|
|
103
109
|
dtype=dtype,
|
|
@@ -142,10 +148,16 @@ def test_standard_estimator_patching(caplog, dataframe, queue, dtype, estimator,
|
|
|
142
148
|
and dtype in [np.uint32, np.uint64]
|
|
143
149
|
):
|
|
144
150
|
pytest.skip("Windows segmentation fault for Ridge.predict for unsigned ints")
|
|
151
|
+
elif estimator == "IncrementalLinearRegression" and np.issubdtype(
|
|
152
|
+
dtype, np.integer
|
|
153
|
+
):
|
|
154
|
+
pytest.skip(
|
|
155
|
+
"IncrementalLinearRegression fails on oneDAL side with int types because dataset is filled by zeroes"
|
|
156
|
+
)
|
|
145
157
|
elif method and not hasattr(est, method):
|
|
146
158
|
pytest.skip(f"sklearn available_if prevents testing {estimator}.{method}")
|
|
147
159
|
|
|
148
|
-
X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)
|
|
160
|
+
X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0]
|
|
149
161
|
est.fit(X, y)
|
|
150
162
|
|
|
151
163
|
if method:
|
|
@@ -177,7 +189,7 @@ def test_special_estimator_patching(caplog, dataframe, queue, dtype, estimator,
|
|
|
177
189
|
elif dtype == np.float64 and queue and not queue.sycl_device.has_aspect_fp64:
|
|
178
190
|
pytest.skip("Hardware does not support fp64 SYCL testing")
|
|
179
191
|
|
|
180
|
-
X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)
|
|
192
|
+
X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0]
|
|
181
193
|
est.fit(X, y)
|
|
182
194
|
|
|
183
195
|
if method and not hasattr(est, method):
|