scikit-learn-intelex 2024.5.0__py310-none-manylinux1_x86_64.whl → 2024.7.0__py310-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (73) hide show
  1. {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/METADATA +2 -2
  2. scikit_learn_intelex-2024.7.0.dist-info/RECORD +122 -0
  3. sklearnex/_config.py +3 -15
  4. sklearnex/_device_offload.py +9 -168
  5. sklearnex/basic_statistics/basic_statistics.py +127 -1
  6. sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
  7. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +1 -1
  8. sklearnex/cluster/dbscan.py +3 -1
  9. sklearnex/cluster/k_means.py +8 -0
  10. sklearnex/cluster/tests/test_dbscan.py +8 -6
  11. sklearnex/cluster/tests/test_kmeans.py +15 -3
  12. sklearnex/conftest.py +11 -1
  13. sklearnex/covariance/incremental_covariance.py +64 -13
  14. sklearnex/covariance/tests/test_incremental_covariance.py +35 -0
  15. sklearnex/decomposition/pca.py +25 -1
  16. sklearnex/decomposition/tests/test_pca.py +4 -2
  17. sklearnex/dispatcher.py +109 -1
  18. sklearnex/ensemble/_forest.py +121 -57
  19. sklearnex/ensemble/tests/test_forest.py +7 -0
  20. sklearnex/glob/dispatcher.py +16 -2
  21. sklearnex/linear_model/coordinate_descent.py +13 -0
  22. sklearnex/linear_model/incremental_linear.py +102 -25
  23. sklearnex/linear_model/linear.py +25 -39
  24. sklearnex/linear_model/logistic_regression.py +92 -74
  25. sklearnex/linear_model/ridge.py +7 -0
  26. sklearnex/linear_model/tests/test_incremental_linear.py +10 -10
  27. sklearnex/linear_model/tests/test_linear.py +30 -5
  28. sklearnex/linear_model/tests/test_logreg.py +45 -3
  29. sklearnex/manifold/t_sne.py +4 -0
  30. sklearnex/metrics/pairwise.py +5 -0
  31. sklearnex/metrics/ranking.py +3 -0
  32. sklearnex/model_selection/split.py +3 -0
  33. sklearnex/neighbors/_lof.py +9 -0
  34. sklearnex/neighbors/common.py +45 -1
  35. sklearnex/neighbors/knn_classification.py +1 -20
  36. sklearnex/neighbors/knn_regression.py +25 -20
  37. sklearnex/neighbors/knn_unsupervised.py +31 -7
  38. sklearnex/preview/__init__.py +1 -1
  39. sklearnex/preview/decomposition/__init__.py +19 -0
  40. sklearnex/preview/decomposition/incremental_pca.py +228 -0
  41. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  42. sklearnex/preview/linear_model/__init__.py +19 -0
  43. sklearnex/preview/linear_model/ridge.py +419 -0
  44. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  45. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  46. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  47. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  48. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  49. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  50. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  51. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  52. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +163 -0
  53. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  54. sklearnex/svm/_common.py +163 -20
  55. sklearnex/svm/nusvc.py +40 -4
  56. sklearnex/svm/nusvr.py +31 -2
  57. sklearnex/svm/svc.py +40 -4
  58. sklearnex/svm/svr.py +31 -2
  59. sklearnex/svm/tests/test_svm.py +12 -20
  60. sklearnex/tests/_utils.py +185 -30
  61. sklearnex/tests/_utils_spmd.py +185 -0
  62. sklearnex/tests/test_common.py +54 -0
  63. sklearnex/tests/test_config.py +4 -0
  64. sklearnex/tests/test_memory_usage.py +185 -126
  65. sklearnex/tests/test_monkeypatch.py +12 -4
  66. sklearnex/tests/test_patching.py +21 -25
  67. sklearnex/tests/test_run_to_run_stability.py +295 -0
  68. sklearnex/utils/_namespace.py +1 -1
  69. scikit_learn_intelex-2024.5.0.dist-info/RECORD +0 -104
  70. sklearnex/tests/test_run_to_run_stability_tests.py +0 -428
  71. {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/LICENSE.txt +0 -0
  72. {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/WHEEL +0 -0
  73. {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,54 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import os
18
+ from glob import glob
19
+
20
+ import pytest
21
+
22
+ ALLOWED_LOCATIONS = [
23
+ "_config.py",
24
+ "_device_offload.py",
25
+ "test",
26
+ "svc.py",
27
+ "svm" + os.sep + "_common.py",
28
+ ]
29
+
30
+
31
+ def test_target_offload_ban():
32
+ """This test blocks the use of target_offload in
33
+ in sklearnex files. Offloading computation to devices
34
+ via target_offload should only occur externally, and not
35
+ within the architecture of the sklearnex classes. This
36
+ is for clarity, traceability and maintainability.
37
+ """
38
+ from sklearnex import __file__ as loc
39
+
40
+ path = loc.replace("__init__.py", "")
41
+ files = [y for x in os.walk(path) for y in glob(os.path.join(x[0], "*.py"))]
42
+
43
+ output = []
44
+
45
+ for f in files:
46
+ if open(f, "r").read().find("target_offload") != -1:
47
+ output += [f.replace(path, "sklearnex" + os.sep)]
48
+
49
+ # remove this file from the list
50
+ for allowed in ALLOWED_LOCATIONS:
51
+ output = [i for i in output if allowed not in i]
52
+
53
+ output = "\n".join(output)
54
+ assert output == "", f"sklearn versioning is occuring in: \n{output}"
@@ -16,6 +16,7 @@
16
16
 
17
17
  import sklearn
18
18
 
19
+ import onedal
19
20
  import sklearnex
20
21
 
21
22
 
@@ -33,7 +34,10 @@ def test_set_config_works():
33
34
  )
34
35
 
35
36
  config = sklearnex.get_config()
37
+ onedal_config = onedal._config._get_config()
36
38
  assert config["target_offload"] == "cpu:0"
37
39
  assert config["allow_fallback_to_host"]
38
40
  assert config["assume_finite"]
41
+ assert onedal_config["target_offload"] == "cpu:0"
42
+ assert onedal_config["allow_fallback_to_host"]
39
43
  sklearnex.set_config(**default_config)
@@ -14,126 +14,113 @@
14
14
  # limitations under the License.
15
15
  # ==============================================================================
16
16
 
17
-
18
17
  import gc
19
18
  import logging
19
+ import os
20
20
  import tracemalloc
21
21
  import types
22
+ import warnings
23
+ from inspect import isclass
22
24
 
23
25
  import numpy as np
24
26
  import pandas as pd
25
27
  import pytest
26
28
  from scipy.stats import pearsonr
27
- from sklearn.base import BaseEstimator
29
+ from sklearn.base import BaseEstimator, clone
28
30
  from sklearn.datasets import make_classification
29
31
  from sklearn.model_selection import KFold
30
32
 
31
- from sklearnex import get_patch_map
32
- from sklearnex.metrics import pairwise_distances, roc_auc_score
33
- from sklearnex.model_selection import train_test_split
34
- from sklearnex.utils import _assert_all_finite
35
-
36
-
37
- class TrainTestSplitEstimator:
38
- def __init__(self):
39
- pass
40
-
41
- def fit(self, x, y):
42
- train_test_split(x, y)
43
-
44
-
45
- class FiniteCheckEstimator:
46
- def __init__(self):
47
- pass
48
-
49
- def fit(self, x, y):
50
- _assert_all_finite(x)
51
- _assert_all_finite(y)
52
-
53
-
54
- class PairwiseDistancesEstimator:
55
- def fit(self, x, y):
56
- pairwise_distances(x, metric=self.metric)
57
-
58
-
59
- class CosineDistancesEstimator(PairwiseDistancesEstimator):
60
- def __init__(self):
61
- self.metric = "cosine"
62
-
63
-
64
- class CorrelationDistancesEstimator(PairwiseDistancesEstimator):
65
- def __init__(self):
66
- self.metric = "correlation"
67
-
68
-
69
- class RocAucEstimator:
70
- def __init__(self):
71
- pass
72
-
73
- def fit(self, x, y):
74
- print(roc_auc_score(y, np.zeros(shape=y.shape, dtype=np.int32)))
75
-
76
-
77
- # add all daal4py estimators enabled in patching (except banned)
78
-
79
-
80
- def get_patched_estimators(ban_list, output_list):
81
- patched_estimators = get_patch_map().values()
82
- for listing in patched_estimators:
83
- estimator, name = listing[0][0][2], listing[0][0][1]
84
- if not isinstance(estimator, types.FunctionType):
85
- if name not in ban_list:
86
- if issubclass(estimator, BaseEstimator):
87
- if hasattr(estimator, "fit"):
88
- output_list.append(estimator)
89
-
33
+ from onedal import _is_dpc_backend
34
+ from onedal.tests.utils._dataframes_support import (
35
+ _convert_to_dataframe,
36
+ get_dataframes_and_queues,
37
+ )
38
+ from onedal.tests.utils._device_selection import get_queues, is_dpctl_available
39
+ from sklearnex import config_context
40
+ from sklearnex.tests._utils import PATCHED_FUNCTIONS, PATCHED_MODELS, SPECIAL_INSTANCES
41
+ from sklearnex.utils import get_namespace
90
42
 
91
- def remove_duplicated_estimators(estimators_list):
92
- estimators_map = {}
93
- for estimator in estimators_list:
94
- full_name = f"{estimator.__module__}.{estimator.__name__}"
95
- estimators_map[full_name] = estimator
96
- return estimators_map.values()
43
+ if _is_dpc_backend:
44
+ from onedal import _backend
97
45
 
98
46
 
99
- BANNED_ESTIMATORS = (
47
+ CPU_SKIP_LIST = (
48
+ "TSNE", # too slow for using in testing on common data size
49
+ "config_context", # does not malloc
50
+ "get_config", # does not malloc
51
+ "set_config", # does not malloc
52
+ "SVC(probability=True)", # memory leak fortran numpy (investigate _fit_proba)
53
+ "NuSVC(probability=True)", # memory leak fortran numpy (investigate _fit_proba)
100
54
  "IncrementalEmpiricalCovariance", # dataframe_f issues
101
55
  "IncrementalLinearRegression", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
102
- "TSNE", # too slow for using in testing on common data size
56
+ "IncrementalPCA", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
57
+ "LogisticRegression(solver='newton-cg')", # memory leak fortran (1000, 100)
103
58
  )
104
- estimators = [
105
- TrainTestSplitEstimator,
106
- FiniteCheckEstimator,
107
- CosineDistancesEstimator,
108
- CorrelationDistancesEstimator,
109
- RocAucEstimator,
110
- ]
111
- get_patched_estimators(BANNED_ESTIMATORS, estimators)
112
- estimators = remove_duplicated_estimators(estimators)
113
59
 
60
+ GPU_SKIP_LIST = (
61
+ "TSNE", # too slow for using in testing on common data size
62
+ "RandomForestRegressor", # too slow for using in testing on common data size
63
+ "KMeans", # does not support GPU offloading
64
+ "config_context", # does not malloc
65
+ "get_config", # does not malloc
66
+ "set_config", # does not malloc
67
+ "Ridge", # does not support GPU offloading (fails silently)
68
+ "ElasticNet", # does not support GPU offloading (fails silently)
69
+ "Lasso", # does not support GPU offloading (fails silently)
70
+ "SVR", # does not support GPU offloading (fails silently)
71
+ "NuSVR", # does not support GPU offloading (fails silently)
72
+ "NuSVC", # does not support GPU offloading (fails silently)
73
+ "LogisticRegression", # default parameters not supported, see solver=newton-cg
74
+ "NuSVC(probability=True)", # does not support GPU offloading (fails silently)
75
+ "IncrementalLinearRegression", # issue with potrf with the specific dataset
76
+ "LinearRegression", # issue with potrf with the specific dataset
77
+ )
114
78
 
115
- def ndarray_c(x, y):
116
- return np.ascontiguousarray(x), y
117
79
 
80
+ def gen_functions(functions):
81
+ func_dict = functions.copy()
118
82
 
119
- def ndarray_f(x, y):
120
- return np.asfortranarray(x), y
83
+ roc_auc_score = func_dict.pop("roc_auc_score")
84
+ func_dict["roc_auc_score"] = lambda x, y: roc_auc_score(y, y)
121
85
 
86
+ pairwise_distances = func_dict.pop("pairwise_distances")
87
+ func_dict["pairwise_distances(metric='cosine')"] = lambda x, y: pairwise_distances(
88
+ x, metric="cosine"
89
+ )
90
+ func_dict["pairwise_distances(metric='correlation')"] = (
91
+ lambda x, y: pairwise_distances(x, metric="correlation")
92
+ )
122
93
 
123
- def dataframe_c(x, y):
124
- return pd.DataFrame(np.ascontiguousarray(x)), pd.Series(y)
94
+ _assert_all_finite = func_dict.pop("_assert_all_finite")
95
+ func_dict["_assert_all_finite"] = lambda x, y: [
96
+ _assert_all_finite(x),
97
+ _assert_all_finite(y),
98
+ ]
99
+ return func_dict
125
100
 
126
101
 
127
- def dataframe_f(x, y):
128
- return pd.DataFrame(np.asfortranarray(x)), pd.Series(y)
102
+ FUNCTIONS = gen_functions(PATCHED_FUNCTIONS)
129
103
 
104
+ CPU_ESTIMATORS = {
105
+ k: v
106
+ for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES, **FUNCTIONS}.items()
107
+ if not k in CPU_SKIP_LIST
108
+ }
130
109
 
131
- data_transforms = [ndarray_c, ndarray_f, dataframe_c, dataframe_f]
110
+ GPU_ESTIMATORS = {
111
+ k: v
112
+ for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES}.items()
113
+ if not k in GPU_SKIP_LIST
114
+ }
132
115
 
133
- data_shapes = [(1000, 100), (2000, 50)]
116
+ data_shapes = [
117
+ pytest.param((1000, 100), id="(1000, 100)"),
118
+ pytest.param((2000, 50), id="(2000, 50)"),
119
+ ]
134
120
 
135
121
  EXTRA_MEMORY_THRESHOLD = 0.15
136
122
  N_SPLITS = 10
123
+ ORDER_DICT = {"F": np.asfortranarray, "C": np.ascontiguousarray}
137
124
 
138
125
 
139
126
  def gen_clsf_data(n_samples, n_features):
@@ -147,45 +134,82 @@ def gen_clsf_data(n_samples, n_features):
147
134
  )
148
135
 
149
136
 
150
- def split_train_inference(kf, x, y, estimator):
137
+ def get_traced_memory(queue=None):
138
+ if _is_dpc_backend and queue and queue.sycl_device.is_gpu:
139
+ return _backend.get_used_memory(queue)
140
+ else:
141
+ return tracemalloc.get_traced_memory()[0]
142
+
143
+
144
+ def take(x, index, axis=0, queue=None):
145
+ xp, array_api = get_namespace(x)
146
+ if array_api:
147
+ return xp.take(x, xp.asarray(index, device=queue), axis=axis)
148
+ else:
149
+ return x.take(index, axis=axis)
150
+
151
+
152
+ def split_train_inference(kf, x, y, estimator, queue=None):
151
153
  mem_tracks = []
152
154
  for train_index, test_index in kf.split(x):
153
- if isinstance(x, np.ndarray):
154
- x_train, x_test = x[train_index], x[test_index]
155
- y_train, y_test = y[train_index], y[test_index]
156
- elif isinstance(x, pd.core.frame.DataFrame):
157
- x_train, x_test = x.iloc[train_index], x.iloc[test_index]
158
- y_train, y_test = y.iloc[train_index], y.iloc[test_index]
159
- # TODO: add parameters for all estimators to prevent
160
- # fallback to stock scikit-learn with default parameters
161
-
162
- alg = estimator()
163
- alg.fit(x_train, y_train)
164
- if hasattr(alg, "predict"):
165
- alg.predict(x_test)
166
- elif hasattr(alg, "transform"):
167
- alg.transform(x_test)
168
- elif hasattr(alg, "kneighbors"):
169
- alg.kneighbors(x_test)
170
- del alg, x_train, x_test, y_train, y_test
171
- mem_tracks.append(tracemalloc.get_traced_memory()[0])
155
+ x_train = take(x, train_index, queue=queue)
156
+ y_train = take(y, train_index, queue=queue)
157
+ x_test = take(x, test_index, queue=queue)
158
+ y_test = take(y, test_index, queue=queue)
159
+
160
+ if isclass(estimator) and issubclass(estimator, BaseEstimator):
161
+ alg = estimator()
162
+ flag = True
163
+ elif isinstance(estimator, BaseEstimator):
164
+ alg = clone(estimator)
165
+ flag = True
166
+ else:
167
+ flag = False
168
+
169
+ if flag:
170
+ alg.fit(x_train, y_train)
171
+ if hasattr(alg, "predict"):
172
+ alg.predict(x_test)
173
+ elif hasattr(alg, "transform"):
174
+ alg.transform(x_test)
175
+ elif hasattr(alg, "kneighbors"):
176
+ alg.kneighbors(x_test)
177
+ del alg
178
+ else:
179
+ estimator(x_train, y_train)
180
+
181
+ del x_train, x_test, y_train, y_test, flag
182
+ mem_tracks.append(get_traced_memory(queue))
172
183
  return mem_tracks
173
184
 
174
185
 
175
- def _kfold_function_template(estimator, data_transform_function, data_shape):
186
+ def _kfold_function_template(estimator, dataframe, data_shape, queue=None, func=None):
176
187
  tracemalloc.start()
177
188
 
178
189
  n_samples, n_features = data_shape
179
- x, y, data_memory_size = gen_clsf_data(n_samples, n_features)
190
+ X, y, data_memory_size = gen_clsf_data(n_samples, n_features)
180
191
  kf = KFold(n_splits=N_SPLITS)
181
- x, y = data_transform_function(x, y)
192
+ if func:
193
+ X = func(X)
194
+
195
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
196
+ y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
182
197
 
183
- mem_before, _ = tracemalloc.get_traced_memory()
184
- mem_tracks = split_train_inference(kf, x, y, estimator)
198
+ mem_before = get_traced_memory(queue)
199
+ mem_tracks = split_train_inference(kf, X, y, estimator, queue=queue)
185
200
  mem_iter_diffs = np.array(mem_tracks[1:]) - np.array(mem_tracks[:-1])
186
201
  mem_incr_mean, mem_incr_std = mem_iter_diffs.mean(), mem_iter_diffs.std()
187
202
  mem_incr_mean, mem_incr_std = round(mem_incr_mean), round(mem_incr_std)
188
- mem_iter_corr, _ = pearsonr(mem_tracks, list(range(len(mem_tracks))))
203
+ with warnings.catch_warnings():
204
+ # In the case that the memory usage is constant, this will raise
205
+ # a ConstantInputWarning error in pearsonr from scipy, this can
206
+ # be ignored.
207
+ warnings.filterwarnings(
208
+ "ignore",
209
+ message="An input array is constant; the correlation coefficient is not defined",
210
+ )
211
+ mem_iter_corr, _ = pearsonr(mem_tracks, list(range(len(mem_tracks))))
212
+
189
213
  if mem_iter_corr > 0.95:
190
214
  logging.warning(
191
215
  "Memory usage is steadily increasing with iterations "
@@ -194,12 +218,17 @@ def _kfold_function_template(estimator, data_transform_function, data_shape):
194
218
  "Memory usage increase per iteration: "
195
219
  f"{mem_incr_mean}±{mem_incr_std} bytes"
196
220
  )
197
- mem_before_gc, _ = tracemalloc.get_traced_memory()
221
+ mem_before_gc = get_traced_memory(queue)
198
222
  mem_diff = mem_before_gc - mem_before
223
+ if isinstance(estimator, BaseEstimator):
224
+ name = str(estimator)
225
+ else:
226
+ name = estimator.__name__
227
+
199
228
  message = (
200
229
  "Size of extra allocated memory {} using garbage collector "
201
230
  f"is greater than {EXTRA_MEMORY_THRESHOLD * 100}% of input data"
202
- f"\n\tAlgorithm: {estimator.__name__}"
231
+ f"\n\tAlgorithm: {name}"
203
232
  f"\n\tInput data size: {data_memory_size} bytes"
204
233
  "\n\tExtra allocated memory size: {} bytes"
205
234
  " / {} %"
@@ -211,21 +240,51 @@ def _kfold_function_template(estimator, data_transform_function, data_shape):
211
240
  )
212
241
  )
213
242
  gc.collect()
214
- mem_after, _ = tracemalloc.get_traced_memory()
243
+ mem_after = get_traced_memory(queue)
215
244
  tracemalloc.stop()
216
245
  mem_diff = mem_after - mem_before
217
246
 
218
- assert mem_diff < EXTRA_MEMORY_THRESHOLD * data_memory_size, message.format(
219
- "after", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
220
- )
247
+ # GPU offloading with SYCL contains a program/kernel cache which should
248
+ # be controllable via a KernelProgramCache object in the SYCL context.
249
+ # The programs and kernels are stored on the GPU, but cannot be cleared
250
+ # as this class is not available for access in all oneDAL DPC++ runtimes.
251
+ # Therefore, until this is implemented this test must be skipped for gpu
252
+ # as it looks like a memory leak (at least there is no way to discern a
253
+ # leak on the first run).
254
+ if queue is None or queue.sycl_device.is_cpu:
255
+ assert mem_diff < EXTRA_MEMORY_THRESHOLD * data_memory_size, message.format(
256
+ "after", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
257
+ )
221
258
 
222
259
 
223
- # disable fallback check as logging impacts memory use
260
+ @pytest.mark.parametrize("order", ["F", "C"])
261
+ @pytest.mark.parametrize(
262
+ "dataframe,queue", get_dataframes_and_queues("numpy,pandas,dpctl", "cpu")
263
+ )
264
+ @pytest.mark.parametrize("estimator", CPU_ESTIMATORS.keys())
265
+ @pytest.mark.parametrize("data_shape", data_shapes)
266
+ def test_memory_leaks(estimator, dataframe, queue, order, data_shape):
267
+ func = ORDER_DICT[order]
268
+ if estimator == "_assert_all_finite" and queue is not None:
269
+ pytest.skip(f"{estimator} is not designed for device offloading")
270
+
271
+ _kfold_function_template(
272
+ CPU_ESTIMATORS[estimator], dataframe, data_shape, queue, func
273
+ )
224
274
 
225
275
 
226
- @pytest.mark.allow_sklearn_fallback
227
- @pytest.mark.parametrize("data_transform_function", data_transforms)
228
- @pytest.mark.parametrize("estimator", estimators)
276
+ @pytest.mark.skipif(
277
+ os.getenv("ZES_ENABLE_SYSMAN") is None or not is_dpctl_available("gpu"),
278
+ reason="SYCL device memory leak check requires the level zero sysman",
279
+ )
280
+ @pytest.mark.parametrize("queue", get_queues("gpu"))
281
+ @pytest.mark.parametrize("estimator", GPU_ESTIMATORS.keys())
282
+ @pytest.mark.parametrize("order", ["F", "C"])
229
283
  @pytest.mark.parametrize("data_shape", data_shapes)
230
- def test_memory_leaks(estimator, data_transform_function, data_shape):
231
- _kfold_function_template(estimator, data_transform_function, data_shape)
284
+ def test_gpu_memory_leaks(estimator, queue, order, data_shape):
285
+ func = ORDER_DICT[order]
286
+ if "ExtraTrees" in estimator and data_shape == (2000, 50):
287
+ pytest.skip("Avoid a segmentation fault in Extra Trees algorithms")
288
+
289
+ with config_context(target_offload=queue):
290
+ _kfold_function_template(GPU_ESTIMATORS[estimator], None, data_shape, queue, func)
@@ -208,10 +208,11 @@ def test_preview_namespace():
208
208
  from sklearn.cluster import DBSCAN
209
209
  from sklearn.decomposition import PCA
210
210
  from sklearn.ensemble import RandomForestClassifier
211
- from sklearn.linear_model import LinearRegression
211
+ from sklearn.linear_model import LinearRegression, Ridge
212
212
  from sklearn.svm import SVC
213
213
 
214
214
  return (
215
+ Ridge(),
215
216
  LinearRegression(),
216
217
  PCA(),
217
218
  DBSCAN(),
@@ -226,9 +227,12 @@ def test_preview_namespace():
226
227
 
227
228
  assert _is_preview_enabled()
228
229
 
229
- lr, pca, dbscan, svc, rfc = get_estimators()
230
+ ridge, lr, pca, dbscan, svc, rfc = get_estimators()
230
231
  assert "sklearnex" in rfc.__module__
231
232
 
233
+ if daal_check_version((2024, "P", 600)):
234
+ assert "sklearnex.preview" in ridge.__module__
235
+
232
236
  if daal_check_version((2023, "P", 100)):
233
237
  assert "sklearnex" in lr.__module__
234
238
  else:
@@ -242,7 +246,8 @@ def test_preview_namespace():
242
246
  sklearnex.unpatch_sklearn()
243
247
 
244
248
  # no patching behavior
245
- lr, pca, dbscan, svc, rfc = get_estimators()
249
+ ridge, lr, pca, dbscan, svc, rfc = get_estimators()
250
+ assert "sklearn." in ridge.__module__ and "daal4py" not in ridge.__module__
246
251
  assert "sklearn." in lr.__module__ and "daal4py" not in lr.__module__
247
252
  assert "sklearn." in pca.__module__ and "daal4py" not in pca.__module__
248
253
  assert "sklearn." in dbscan.__module__ and "daal4py" not in dbscan.__module__
@@ -254,7 +259,10 @@ def test_preview_namespace():
254
259
  sklearnex.patch_sklearn()
255
260
  assert not _is_preview_enabled()
256
261
 
257
- lr, pca, dbscan, svc, rfc = get_estimators()
262
+ ridge, lr, pca, dbscan, svc, rfc = get_estimators()
263
+
264
+ assert "daal4py" in ridge.__module__
265
+
258
266
  if daal_check_version((2023, "P", 100)):
259
267
  assert "sklearnex" in lr.__module__
260
268
  else:
@@ -43,6 +43,7 @@ from sklearnex.tests._utils import (
43
43
  SPECIAL_INSTANCES,
44
44
  UNPATCHED_FUNCTIONS,
45
45
  UNPATCHED_MODELS,
46
+ call_method,
46
47
  gen_dataset,
47
48
  gen_models_info,
48
49
  )
@@ -139,6 +140,9 @@ def test_standard_estimator_patching(caplog, dataframe, queue, dtype, estimator,
139
140
  ]:
140
141
  pytest.skip(f"{estimator} does not support GPU queues")
141
142
 
143
+ if "NearestNeighbors" in estimator and "radius" in method:
144
+ pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex")
145
+
142
146
  if estimator == "TSNE" and method == "fit_transform":
143
147
  pytest.skip("TSNE.fit_transform is too slow for common testing")
144
148
  elif (
@@ -148,30 +152,21 @@ def test_standard_estimator_patching(caplog, dataframe, queue, dtype, estimator,
148
152
  and dtype in [np.uint32, np.uint64]
149
153
  ):
150
154
  pytest.skip("Windows segmentation fault for Ridge.predict for unsigned ints")
151
- elif estimator == "IncrementalLinearRegression" and dtype in [
152
- np.int8,
153
- np.int16,
154
- np.int32,
155
- np.int64,
156
- np.uint8,
157
- np.uint16,
158
- np.uint32,
159
- np.uint64,
160
- ]:
155
+ elif estimator == "IncrementalLinearRegression" and np.issubdtype(
156
+ dtype, np.integer
157
+ ):
161
158
  pytest.skip(
162
159
  "IncrementalLinearRegression fails on oneDAL side with int types because dataset is filled by zeroes"
163
160
  )
164
161
  elif method and not hasattr(est, method):
165
162
  pytest.skip(f"sklearn available_if prevents testing {estimator}.{method}")
166
163
 
167
- X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)
164
+ X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0]
168
165
  est.fit(X, y)
169
166
 
170
167
  if method:
171
- if method != "score":
172
- getattr(est, method)(X)
173
- else:
174
- est.score(X, y)
168
+ call_method(est, method, X, y)
169
+
175
170
  assert all(
176
171
  [
177
172
  "running accelerated version" in i.message
@@ -190,23 +185,24 @@ def test_special_estimator_patching(caplog, dataframe, queue, dtype, estimator,
190
185
  with caplog.at_level(logging.WARNING, logger="sklearnex"):
191
186
  est = SPECIAL_INSTANCES[estimator]
192
187
 
193
- # Its not possible to get the dpnp/dpctl arrays to be in the proper dtype
194
- if dtype == np.float16 and queue and not queue.sycl_device.has_aspect_fp16:
195
- pytest.skip("Hardware does not support fp16 SYCL testing")
196
- elif dtype == np.float64 and queue and not queue.sycl_device.has_aspect_fp64:
197
- pytest.skip("Hardware does not support fp64 SYCL testing")
188
+ if queue:
189
+ # Its not possible to get the dpnp/dpctl arrays to be in the proper dtype
190
+ if dtype == np.float16 and not queue.sycl_device.has_aspect_fp16:
191
+ pytest.skip("Hardware does not support fp16 SYCL testing")
192
+ elif dtype == np.float64 and not queue.sycl_device.has_aspect_fp64:
193
+ pytest.skip("Hardware does not support fp64 SYCL testing")
194
+
195
+ if "NearestNeighbors" in estimator and "radius" in method:
196
+ pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex")
198
197
 
199
- X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)
198
+ X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0]
200
199
  est.fit(X, y)
201
200
 
202
201
  if method and not hasattr(est, method):
203
202
  pytest.skip(f"sklearn available_if prevents testing {estimator}.{method}")
204
203
 
205
204
  if method:
206
- if method != "score":
207
- getattr(est, method)(X)
208
- else:
209
- est.score(X, y)
205
+ call_method(est, method, X, y)
210
206
 
211
207
  assert all(
212
208
  [