scikit-learn-intelex 2024.4.0__py310-none-manylinux1_x86_64.whl → 2024.6.0__py310-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (44) hide show
  1. {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/METADATA +2 -2
  2. {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/RECORD +43 -36
  3. sklearnex/_device_offload.py +8 -1
  4. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +2 -4
  5. sklearnex/cluster/dbscan.py +3 -0
  6. sklearnex/cluster/tests/test_dbscan.py +8 -6
  7. sklearnex/conftest.py +11 -1
  8. sklearnex/covariance/incremental_covariance.py +217 -30
  9. sklearnex/covariance/tests/test_incremental_covariance.py +54 -17
  10. sklearnex/decomposition/pca.py +68 -13
  11. sklearnex/decomposition/tests/test_pca.py +6 -4
  12. sklearnex/dispatcher.py +46 -1
  13. sklearnex/ensemble/_forest.py +114 -22
  14. sklearnex/ensemble/tests/test_forest.py +13 -3
  15. sklearnex/glob/dispatcher.py +16 -2
  16. sklearnex/linear_model/__init__.py +5 -3
  17. sklearnex/linear_model/incremental_linear.py +464 -0
  18. sklearnex/linear_model/linear.py +27 -9
  19. sklearnex/linear_model/logistic_regression.py +13 -15
  20. sklearnex/linear_model/tests/test_incremental_linear.py +200 -0
  21. sklearnex/linear_model/tests/test_linear.py +2 -2
  22. sklearnex/neighbors/knn_regression.py +24 -0
  23. sklearnex/neighbors/tests/test_neighbors.py +2 -2
  24. sklearnex/preview/__init__.py +1 -1
  25. sklearnex/preview/decomposition/__init__.py +19 -0
  26. sklearnex/preview/decomposition/incremental_pca.py +228 -0
  27. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  28. sklearnex/svm/_common.py +165 -20
  29. sklearnex/svm/nusvc.py +40 -4
  30. sklearnex/svm/nusvr.py +31 -2
  31. sklearnex/svm/svc.py +40 -4
  32. sklearnex/svm/svr.py +31 -2
  33. sklearnex/tests/_utils.py +70 -29
  34. sklearnex/tests/test_common.py +54 -0
  35. sklearnex/tests/test_memory_usage.py +195 -132
  36. sklearnex/tests/test_n_jobs_support.py +4 -0
  37. sklearnex/tests/test_patching.py +22 -10
  38. sklearnex/tests/test_run_to_run_stability.py +283 -0
  39. sklearnex/utils/_namespace.py +1 -1
  40. sklearnex/utils/tests/test_finite.py +89 -0
  41. sklearnex/tests/test_run_to_run_stability_tests.py +0 -428
  42. {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/LICENSE.txt +0 -0
  43. {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/WHEEL +0 -0
  44. {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/top_level.txt +0 -0
@@ -14,122 +14,113 @@
14
14
  # limitations under the License.
15
15
  # ==============================================================================
16
16
 
17
-
18
17
  import gc
19
18
  import logging
19
+ import os
20
20
  import tracemalloc
21
21
  import types
22
+ import warnings
23
+ from inspect import isclass
22
24
 
23
25
  import numpy as np
24
26
  import pandas as pd
25
27
  import pytest
26
28
  from scipy.stats import pearsonr
27
- from sklearn.base import BaseEstimator
29
+ from sklearn.base import BaseEstimator, clone
28
30
  from sklearn.datasets import make_classification
29
31
  from sklearn.model_selection import KFold
30
32
 
31
- from sklearnex import get_patch_map
32
- from sklearnex.metrics import pairwise_distances, roc_auc_score
33
- from sklearnex.model_selection import train_test_split
34
- from sklearnex.utils import _assert_all_finite
35
-
36
-
37
- class TrainTestSplitEstimator:
38
- def __init__(self):
39
- pass
40
-
41
- def fit(self, x, y):
42
- train_test_split(x, y)
43
-
44
-
45
- class FiniteCheckEstimator:
46
- def __init__(self):
47
- pass
48
-
49
- def fit(self, x, y):
50
- _assert_all_finite(x)
51
- _assert_all_finite(y)
52
-
53
-
54
- class PairwiseDistancesEstimator:
55
- def fit(self, x, y):
56
- pairwise_distances(x, metric=self.metric)
57
-
58
-
59
- class CosineDistancesEstimator(PairwiseDistancesEstimator):
60
- def __init__(self):
61
- self.metric = "cosine"
62
-
63
-
64
- class CorrelationDistancesEstimator(PairwiseDistancesEstimator):
65
- def __init__(self):
66
- self.metric = "correlation"
67
-
68
-
69
- class RocAucEstimator:
70
- def __init__(self):
71
- pass
72
-
73
- def fit(self, x, y):
74
- print(roc_auc_score(y, np.zeros(shape=y.shape, dtype=np.int32)))
75
-
76
-
77
- # add all daal4py estimators enabled in patching (except banned)
33
+ from onedal import _is_dpc_backend
34
+ from onedal.tests.utils._dataframes_support import (
35
+ _convert_to_dataframe,
36
+ get_dataframes_and_queues,
37
+ )
38
+ from onedal.tests.utils._device_selection import get_queues, is_dpctl_available
39
+ from sklearnex import config_context
40
+ from sklearnex.tests._utils import PATCHED_FUNCTIONS, PATCHED_MODELS, SPECIAL_INSTANCES
41
+ from sklearnex.utils import get_namespace
42
+
43
+ if _is_dpc_backend:
44
+ from onedal import _backend
45
+
46
+
47
+ CPU_SKIP_LIST = (
48
+ "TSNE", # too slow for using in testing on common data size
49
+ "config_context", # does not malloc
50
+ "get_config", # does not malloc
51
+ "set_config", # does not malloc
52
+ "SVC(probability=True)", # memory leak fortran numpy (investigate _fit_proba)
53
+ "NuSVC(probability=True)", # memory leak fortran numpy (investigate _fit_proba)
54
+ "IncrementalEmpiricalCovariance", # dataframe_f issues
55
+ "IncrementalLinearRegression", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
56
+ "IncrementalPCA", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
57
+ "LogisticRegression(solver='newton-cg')", # memory leak fortran (1000, 100)
58
+ )
59
+
60
+ GPU_SKIP_LIST = (
61
+ "TSNE", # too slow for using in testing on common data size
62
+ "RandomForestRegressor", # too slow for using in testing on common data size
63
+ "KMeans", # does not support GPU offloading
64
+ "config_context", # does not malloc
65
+ "get_config", # does not malloc
66
+ "set_config", # does not malloc
67
+ "Ridge", # does not support GPU offloading (fails silently)
68
+ "ElasticNet", # does not support GPU offloading (fails silently)
69
+ "Lasso", # does not support GPU offloading (fails silently)
70
+ "SVR", # does not support GPU offloading (fails silently)
71
+ "NuSVR", # does not support GPU offloading (fails silently)
72
+ "NuSVC", # does not support GPU offloading (fails silently)
73
+ "LogisticRegression", # default parameters not supported, see solver=newton-cg
74
+ "NuSVC(probability=True)", # does not support GPU offloading (fails silently)
75
+ "IncrementalLinearRegression", # issue with potrf with the specific dataset
76
+ "LinearRegression", # issue with potrf with the specific dataset
77
+ )
78
+
79
+
80
+ def gen_functions(functions):
81
+ func_dict = functions.copy()
82
+
83
+ roc_auc_score = func_dict.pop("roc_auc_score")
84
+ func_dict["roc_auc_score"] = lambda x, y: roc_auc_score(y, y)
85
+
86
+ pairwise_distances = func_dict.pop("pairwise_distances")
87
+ func_dict["pairwise_distances(metric='cosine')"] = lambda x, y: pairwise_distances(
88
+ x, metric="cosine"
89
+ )
90
+ func_dict["pairwise_distances(metric='correlation')"] = (
91
+ lambda x, y: pairwise_distances(x, metric="correlation")
92
+ )
78
93
 
94
+ _assert_all_finite = func_dict.pop("_assert_all_finite")
95
+ func_dict["_assert_all_finite"] = lambda x, y: [
96
+ _assert_all_finite(x),
97
+ _assert_all_finite(y),
98
+ ]
99
+ return func_dict
79
100
 
80
- def get_patched_estimators(ban_list, output_list):
81
- patched_estimators = get_patch_map().values()
82
- for listing in patched_estimators:
83
- estimator, name = listing[0][0][2], listing[0][0][1]
84
- if not isinstance(estimator, types.FunctionType):
85
- if name not in ban_list:
86
- if issubclass(estimator, BaseEstimator):
87
- if hasattr(estimator, "fit"):
88
- output_list.append(estimator)
89
101
 
102
+ FUNCTIONS = gen_functions(PATCHED_FUNCTIONS)
90
103
 
91
- def remove_duplicated_estimators(estimators_list):
92
- estimators_map = {}
93
- for estimator in estimators_list:
94
- full_name = f"{estimator.__module__}.{estimator.__name__}"
95
- estimators_map[full_name] = estimator
96
- return estimators_map.values()
104
+ CPU_ESTIMATORS = {
105
+ k: v
106
+ for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES, **FUNCTIONS}.items()
107
+ if not k in CPU_SKIP_LIST
108
+ }
97
109
 
110
+ GPU_ESTIMATORS = {
111
+ k: v
112
+ for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES}.items()
113
+ if not k in GPU_SKIP_LIST
114
+ }
98
115
 
99
- BANNED_ESTIMATORS = ("TSNE",) # too slow for using in testing on common data size
100
- estimators = [
101
- TrainTestSplitEstimator,
102
- FiniteCheckEstimator,
103
- CosineDistancesEstimator,
104
- CorrelationDistancesEstimator,
105
- RocAucEstimator,
116
+ data_shapes = [
117
+ pytest.param((1000, 100), id="(1000, 100)"),
118
+ pytest.param((2000, 50), id="(2000, 50)"),
106
119
  ]
107
- get_patched_estimators(BANNED_ESTIMATORS, estimators)
108
- estimators = remove_duplicated_estimators(estimators)
109
-
110
-
111
- def ndarray_c(x, y):
112
- return np.ascontiguousarray(x), y
113
-
114
-
115
- def ndarray_f(x, y):
116
- return np.asfortranarray(x), y
117
-
118
-
119
- def dataframe_c(x, y):
120
- return pd.DataFrame(np.ascontiguousarray(x)), pd.Series(y)
121
-
122
-
123
- def dataframe_f(x, y):
124
- return pd.DataFrame(np.asfortranarray(x)), pd.Series(y)
125
-
126
-
127
- data_transforms = [ndarray_c, ndarray_f, dataframe_c, dataframe_f]
128
-
129
- data_shapes = [(1000, 100), (2000, 50)]
130
120
 
131
121
  EXTRA_MEMORY_THRESHOLD = 0.15
132
122
  N_SPLITS = 10
123
+ ORDER_DICT = {"F": np.asfortranarray, "C": np.ascontiguousarray}
133
124
 
134
125
 
135
126
  def gen_clsf_data(n_samples, n_features):
@@ -143,45 +134,82 @@ def gen_clsf_data(n_samples, n_features):
143
134
  )
144
135
 
145
136
 
146
- def split_train_inference(kf, x, y, estimator):
137
+ def get_traced_memory(queue=None):
138
+ if _is_dpc_backend and queue and queue.sycl_device.is_gpu:
139
+ return _backend.get_used_memory(queue)
140
+ else:
141
+ return tracemalloc.get_traced_memory()[0]
142
+
143
+
144
+ def take(x, index, axis=0, queue=None):
145
+ xp, array_api = get_namespace(x)
146
+ if array_api:
147
+ return xp.take(x, xp.asarray(index, device=queue), axis=axis)
148
+ else:
149
+ return x.take(index, axis=axis)
150
+
151
+
152
+ def split_train_inference(kf, x, y, estimator, queue=None):
147
153
  mem_tracks = []
148
154
  for train_index, test_index in kf.split(x):
149
- if isinstance(x, np.ndarray):
150
- x_train, x_test = x[train_index], x[test_index]
151
- y_train, y_test = y[train_index], y[test_index]
152
- elif isinstance(x, pd.core.frame.DataFrame):
153
- x_train, x_test = x.iloc[train_index], x.iloc[test_index]
154
- y_train, y_test = y.iloc[train_index], y.iloc[test_index]
155
- # TODO: add parameters for all estimators to prevent
156
- # fallback to stock scikit-learn with default parameters
157
-
158
- alg = estimator()
159
- alg.fit(x_train, y_train)
160
- if hasattr(alg, "predict"):
161
- alg.predict(x_test)
162
- elif hasattr(alg, "transform"):
163
- alg.transform(x_test)
164
- elif hasattr(alg, "kneighbors"):
165
- alg.kneighbors(x_test)
166
- del alg, x_train, x_test, y_train, y_test
167
- mem_tracks.append(tracemalloc.get_traced_memory()[0])
155
+ x_train = take(x, train_index, queue=queue)
156
+ y_train = take(y, train_index, queue=queue)
157
+ x_test = take(x, test_index, queue=queue)
158
+ y_test = take(y, test_index, queue=queue)
159
+
160
+ if isclass(estimator) and issubclass(estimator, BaseEstimator):
161
+ alg = estimator()
162
+ flag = True
163
+ elif isinstance(estimator, BaseEstimator):
164
+ alg = clone(estimator)
165
+ flag = True
166
+ else:
167
+ flag = False
168
+
169
+ if flag:
170
+ alg.fit(x_train, y_train)
171
+ if hasattr(alg, "predict"):
172
+ alg.predict(x_test)
173
+ elif hasattr(alg, "transform"):
174
+ alg.transform(x_test)
175
+ elif hasattr(alg, "kneighbors"):
176
+ alg.kneighbors(x_test)
177
+ del alg
178
+ else:
179
+ estimator(x_train, y_train)
180
+
181
+ del x_train, x_test, y_train, y_test, flag
182
+ mem_tracks.append(get_traced_memory(queue))
168
183
  return mem_tracks
169
184
 
170
185
 
171
- def _kfold_function_template(estimator, data_transform_function, data_shape):
186
+ def _kfold_function_template(estimator, dataframe, data_shape, queue=None, func=None):
172
187
  tracemalloc.start()
173
188
 
174
189
  n_samples, n_features = data_shape
175
- x, y, data_memory_size = gen_clsf_data(n_samples, n_features)
190
+ X, y, data_memory_size = gen_clsf_data(n_samples, n_features)
176
191
  kf = KFold(n_splits=N_SPLITS)
177
- x, y = data_transform_function(x, y)
192
+ if func:
193
+ X = func(X)
194
+
195
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
196
+ y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
178
197
 
179
- mem_before, _ = tracemalloc.get_traced_memory()
180
- mem_tracks = split_train_inference(kf, x, y, estimator)
198
+ mem_before = get_traced_memory(queue)
199
+ mem_tracks = split_train_inference(kf, X, y, estimator, queue=queue)
181
200
  mem_iter_diffs = np.array(mem_tracks[1:]) - np.array(mem_tracks[:-1])
182
201
  mem_incr_mean, mem_incr_std = mem_iter_diffs.mean(), mem_iter_diffs.std()
183
202
  mem_incr_mean, mem_incr_std = round(mem_incr_mean), round(mem_incr_std)
184
- mem_iter_corr, _ = pearsonr(mem_tracks, list(range(len(mem_tracks))))
203
+ with warnings.catch_warnings():
204
+ # In the case that the memory usage is constant, this will raise
205
+ # a ConstantInputWarning error in pearsonr from scipy, this can
206
+ # be ignored.
207
+ warnings.filterwarnings(
208
+ "ignore",
209
+ message="An input array is constant; the correlation coefficient is not defined",
210
+ )
211
+ mem_iter_corr, _ = pearsonr(mem_tracks, list(range(len(mem_tracks))))
212
+
185
213
  if mem_iter_corr > 0.95:
186
214
  logging.warning(
187
215
  "Memory usage is steadily increasing with iterations "
@@ -190,12 +218,17 @@ def _kfold_function_template(estimator, data_transform_function, data_shape):
190
218
  "Memory usage increase per iteration: "
191
219
  f"{mem_incr_mean}±{mem_incr_std} bytes"
192
220
  )
193
- mem_before_gc, _ = tracemalloc.get_traced_memory()
221
+ mem_before_gc = get_traced_memory(queue)
194
222
  mem_diff = mem_before_gc - mem_before
223
+ if isinstance(estimator, BaseEstimator):
224
+ name = str(estimator)
225
+ else:
226
+ name = estimator.__name__
227
+
195
228
  message = (
196
229
  "Size of extra allocated memory {} using garbage collector "
197
230
  f"is greater than {EXTRA_MEMORY_THRESHOLD * 100}% of input data"
198
- f"\n\tAlgorithm: {estimator.__name__}"
231
+ f"\n\tAlgorithm: {name}"
199
232
  f"\n\tInput data size: {data_memory_size} bytes"
200
233
  "\n\tExtra allocated memory size: {} bytes"
201
234
  " / {} %"
@@ -207,21 +240,51 @@ def _kfold_function_template(estimator, data_transform_function, data_shape):
207
240
  )
208
241
  )
209
242
  gc.collect()
210
- mem_after, _ = tracemalloc.get_traced_memory()
243
+ mem_after = get_traced_memory(queue)
211
244
  tracemalloc.stop()
212
245
  mem_diff = mem_after - mem_before
213
246
 
214
- assert mem_diff < EXTRA_MEMORY_THRESHOLD * data_memory_size, message.format(
215
- "after", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
216
- )
247
+ # GPU offloading with SYCL contains a program/kernel cache which should
248
+ # be controllable via a KernelProgramCache object in the SYCL context.
249
+ # The programs and kernels are stored on the GPU, but cannot be cleared
250
+ # as this class is not available for access in all oneDAL DPC++ runtimes.
251
+ # Therefore, until this is implemented this test must be skipped for gpu
252
+ # as it looks like a memory leak (at least there is no way to discern a
253
+ # leak on the first run).
254
+ if queue is None or queue.sycl_device.is_cpu:
255
+ assert mem_diff < EXTRA_MEMORY_THRESHOLD * data_memory_size, message.format(
256
+ "after", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
257
+ )
217
258
 
218
259
 
219
- # disable fallback check as logging impacts memory use
260
+ @pytest.mark.parametrize("order", ["F", "C"])
261
+ @pytest.mark.parametrize(
262
+ "dataframe,queue", get_dataframes_and_queues("numpy,pandas,dpctl", "cpu")
263
+ )
264
+ @pytest.mark.parametrize("estimator", CPU_ESTIMATORS.keys())
265
+ @pytest.mark.parametrize("data_shape", data_shapes)
266
+ def test_memory_leaks(estimator, dataframe, queue, order, data_shape):
267
+ func = ORDER_DICT[order]
268
+ if estimator == "_assert_all_finite" and queue is not None:
269
+ pytest.skip(f"{estimator} is not designed for device offloading")
220
270
 
271
+ _kfold_function_template(
272
+ CPU_ESTIMATORS[estimator], dataframe, data_shape, queue, func
273
+ )
221
274
 
222
- @pytest.mark.allow_sklearn_fallback
223
- @pytest.mark.parametrize("data_transform_function", data_transforms)
224
- @pytest.mark.parametrize("estimator", estimators)
275
+
276
+ @pytest.mark.skipif(
277
+ os.getenv("ZES_ENABLE_SYSMAN") is None or not is_dpctl_available("gpu"),
278
+ reason="SYCL device memory leak check requires the level zero sysman",
279
+ )
280
+ @pytest.mark.parametrize("queue", get_queues("gpu"))
281
+ @pytest.mark.parametrize("estimator", GPU_ESTIMATORS.keys())
282
+ @pytest.mark.parametrize("order", ["F", "C"])
225
283
  @pytest.mark.parametrize("data_shape", data_shapes)
226
- def test_memory_leaks(estimator, data_transform_function, data_shape):
227
- _kfold_function_template(estimator, data_transform_function, data_shape)
284
+ def test_gpu_memory_leaks(estimator, queue, order, data_shape):
285
+ func = ORDER_DICT[order]
286
+ if "ExtraTrees" in estimator and data_shape == (2000, 50):
287
+ pytest.skip("Avoid a segmentation fault in Extra Trees algorithms")
288
+
289
+ with config_context(target_offload=queue):
290
+ _kfold_function_template(GPU_ESTIMATORS[estimator], None, data_shape, queue, func)
@@ -22,6 +22,7 @@ import pytest
22
22
  from sklearn.base import BaseEstimator
23
23
  from sklearn.datasets import make_classification
24
24
 
25
+ from sklearnex.decomposition import PCA
25
26
  from sklearnex.dispatcher import get_patch_map
26
27
  from sklearnex.svm import SVC, NuSVC
27
28
 
@@ -73,6 +74,9 @@ def test_n_jobs_support(caplog, estimator_class, n_jobs):
73
74
  # by default, [Nu]SVC.predict_proba is restricted by @available_if decorator
74
75
  if estimator_class in [SVC, NuSVC]:
75
76
  estimator_kwargs["probability"] = True
77
+ # explicitly request oneDAL's PCA-Covariance algorithm
78
+ if estimator_class == PCA:
79
+ estimator_kwargs["svd_solver"] = "covariance_eigh"
76
80
  estimator_instance = estimator_class(**estimator_kwargs)
77
81
  # check `n_jobs` parameter doc entry
78
82
  check_estimator_doc(estimator_class)
@@ -61,12 +61,15 @@ def test_pairwise_distances_patching(caplog, dataframe, queue, dtype, metric):
61
61
  pytest.skip("pairwise_distances does not support GPU queues")
62
62
 
63
63
  rng = nprnd.default_rng()
64
- X = _convert_to_dataframe(
65
- rng.random(size=1000).reshape(1, -1),
66
- sycl_queue=queue,
67
- target_df=dataframe,
68
- dtype=dtype,
69
- )
64
+ if dataframe == "pandas":
65
+ X = _convert_to_dataframe(
66
+ rng.random(size=1000).astype(dtype).reshape(1, -1),
67
+ target_df=dataframe,
68
+ )
69
+ else:
70
+ X = _convert_to_dataframe(
71
+ rng.random(size=1000), sycl_queue=queue, target_df=dataframe, dtype=dtype
72
+ )[None, :]
70
73
 
71
74
  _ = pairwise_distances(X, metric=metric)
72
75
  assert all(
@@ -90,14 +93,17 @@ def test_roc_auc_score_patching(caplog, dataframe, queue, dtype):
90
93
 
91
94
  with caplog.at_level(logging.WARNING, logger="sklearnex"):
92
95
  rng = nprnd.default_rng()
96
+ X = rng.integers(2, size=1000)
97
+ y = rng.integers(2, size=1000)
98
+
93
99
  X = _convert_to_dataframe(
94
- rng.integers(2, size=1000),
100
+ X,
95
101
  sycl_queue=queue,
96
102
  target_df=dataframe,
97
103
  dtype=dtype,
98
104
  )
99
105
  y = _convert_to_dataframe(
100
- rng.integers(2, size=1000),
106
+ y,
101
107
  sycl_queue=queue,
102
108
  target_df=dataframe,
103
109
  dtype=dtype,
@@ -142,10 +148,16 @@ def test_standard_estimator_patching(caplog, dataframe, queue, dtype, estimator,
142
148
  and dtype in [np.uint32, np.uint64]
143
149
  ):
144
150
  pytest.skip("Windows segmentation fault for Ridge.predict for unsigned ints")
151
+ elif estimator == "IncrementalLinearRegression" and np.issubdtype(
152
+ dtype, np.integer
153
+ ):
154
+ pytest.skip(
155
+ "IncrementalLinearRegression fails on oneDAL side with int types because dataset is filled by zeroes"
156
+ )
145
157
  elif method and not hasattr(est, method):
146
158
  pytest.skip(f"sklearn available_if prevents testing {estimator}.{method}")
147
159
 
148
- X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)
160
+ X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0]
149
161
  est.fit(X, y)
150
162
 
151
163
  if method:
@@ -177,7 +189,7 @@ def test_special_estimator_patching(caplog, dataframe, queue, dtype, estimator,
177
189
  elif dtype == np.float64 and queue and not queue.sycl_device.has_aspect_fp64:
178
190
  pytest.skip("Hardware does not support fp64 SYCL testing")
179
191
 
180
- X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)
192
+ X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0]
181
193
  est.fit(X, y)
182
194
 
183
195
  if method and not hasattr(est, method):