scikit-learn-intelex 2024.2.0__py312-none-manylinux1_x86_64.whl → 2024.3.0__py312-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (35) hide show
  1. {scikit_learn_intelex-2024.2.0.dist-info → scikit_learn_intelex-2024.3.0.dist-info}/METADATA +2 -2
  2. {scikit_learn_intelex-2024.2.0.dist-info → scikit_learn_intelex-2024.3.0.dist-info}/RECORD +30 -33
  3. sklearnex/__init__.py +9 -7
  4. sklearnex/cluster/dbscan.py +3 -1
  5. sklearnex/conftest.py +63 -0
  6. sklearnex/decomposition/pca.py +322 -1
  7. sklearnex/decomposition/tests/test_pca.py +34 -5
  8. sklearnex/dispatcher.py +72 -41
  9. sklearnex/ensemble/_forest.py +10 -14
  10. sklearnex/ensemble/tests/test_forest.py +15 -19
  11. sklearnex/linear_model/logistic_regression.py +13 -2
  12. sklearnex/linear_model/tests/test_logreg.py +0 -2
  13. sklearnex/neighbors/_lof.py +39 -2
  14. sklearnex/neighbors/knn_classification.py +7 -9
  15. sklearnex/neighbors/knn_regression.py +6 -9
  16. sklearnex/neighbors/knn_unsupervised.py +5 -8
  17. sklearnex/neighbors/tests/test_neighbors.py +0 -5
  18. sklearnex/preview/__init__.py +1 -1
  19. sklearnex/spmd/ensemble/forest.py +4 -12
  20. sklearnex/svm/nusvc.py +4 -0
  21. sklearnex/svm/nusvr.py +3 -3
  22. sklearnex/svm/svc.py +4 -0
  23. sklearnex/tests/_utils.py +155 -0
  24. sklearnex/tests/test_memory_usage.py +8 -3
  25. sklearnex/tests/test_monkeypatch.py +177 -149
  26. sklearnex/tests/test_parallel.py +6 -8
  27. sklearnex/tests/test_patching.py +305 -80
  28. sklearnex/preview/decomposition/__init__.py +0 -19
  29. sklearnex/preview/decomposition/pca.py +0 -374
  30. sklearnex/preview/decomposition/tests/test_preview_pca.py +0 -42
  31. sklearnex/tests/_models_info.py +0 -170
  32. sklearnex/tests/utils/_launch_algorithms.py +0 -118
  33. {scikit_learn_intelex-2024.2.0.dist-info → scikit_learn_intelex-2024.3.0.dist-info}/LICENSE.txt +0 -0
  34. {scikit_learn_intelex-2024.2.0.dist-info → scikit_learn_intelex-2024.3.0.dist-info}/WHEEL +0 -0
  35. {scikit_learn_intelex-2024.2.0.dist-info → scikit_learn_intelex-2024.3.0.dist-info}/top_level.txt +0 -0
@@ -14,107 +14,284 @@
14
14
  # limitations under the License.
15
15
  # ==============================================================================
16
16
 
17
+
18
+ import importlib
17
19
  import inspect
20
+ import logging
18
21
  import os
19
- import pathlib
20
22
  import re
21
- import subprocess
22
23
  import sys
23
- from inspect import isclass
24
+ from inspect import signature
24
25
 
26
+ import numpy as np
27
+ import numpy.random as nprnd
25
28
  import pytest
26
- from _models_info import TO_SKIP
27
- from sklearn.base import BaseEstimator
28
-
29
- from sklearnex import get_patch_map, is_patched_instance, patch_sklearn, unpatch_sklearn
30
-
31
-
32
- def get_branch(s):
33
- if len(s) == 0:
34
- return "NO INFO"
35
- for i in s:
36
- if "failed to run accelerated version, fallback to original Scikit-learn" in i:
37
- return "was in OPT, but go in Scikit"
38
- for i in s:
39
- if "running accelerated version" in i:
40
- return "OPT"
41
- return "Scikit"
42
-
43
-
44
- def run_parse(mas, result):
45
- name, dtype = mas[0].split()
46
- temp = []
47
- INFO_POS = 16
48
- for i in range(1, len(mas)):
49
- mas[i] = mas[i][INFO_POS:] # remove 'SKLEARNEX INFO: '
50
- if not mas[i].startswith("sklearn"):
51
- ind = name + " " + dtype + " " + mas[i]
52
- result[ind] = get_branch(temp)
53
- temp.clear()
54
- else:
55
- temp.append(mas[i])
29
+ from _utils import (
30
+ DTYPES,
31
+ PATCHED_FUNCTIONS,
32
+ PATCHED_MODELS,
33
+ SPECIAL_INSTANCES,
34
+ UNPATCHED_FUNCTIONS,
35
+ UNPATCHED_MODELS,
36
+ gen_dataset,
37
+ gen_models_info,
38
+ )
39
+ from sklearn.base import (
40
+ BaseEstimator,
41
+ ClassifierMixin,
42
+ ClusterMixin,
43
+ OutlierMixin,
44
+ RegressorMixin,
45
+ TransformerMixin,
46
+ )
47
+
48
+ from daal4py.sklearn._utils import sklearn_check_version
49
+ from onedal.tests.utils._dataframes_support import (
50
+ _convert_to_dataframe,
51
+ get_dataframes_and_queues,
52
+ )
53
+ from sklearnex import is_patched_instance
54
+ from sklearnex.dispatcher import _is_preview_enabled
55
+ from sklearnex.metrics import pairwise_distances, roc_auc_score
56
56
 
57
57
 
58
- def get_result_log():
59
- os.environ["SKLEARNEX_VERBOSE"] = "INFO"
60
- absolute_path = str(pathlib.Path(__file__).parent.absolute())
61
- try:
62
- process = subprocess.check_output(
63
- [sys.executable, absolute_path + "/utils/_launch_algorithms.py"]
58
+ @pytest.mark.parametrize("dtype", DTYPES)
59
+ @pytest.mark.parametrize(
60
+ "dataframe, queue", get_dataframes_and_queues(dataframe_filter_="numpy")
61
+ )
62
+ @pytest.mark.parametrize("metric", ["cosine", "correlation"])
63
+ def test_pairwise_distances_patching(caplog, dataframe, queue, dtype, metric):
64
+ with caplog.at_level(logging.WARNING, logger="sklearnex"):
65
+ rng = nprnd.default_rng()
66
+ X = _convert_to_dataframe(
67
+ rng.random(size=1000), sycl_queue=queue, target_df=dataframe, dtype=dtype
64
68
  )
65
- except subprocess.CalledProcessError as e:
66
- print(e)
67
- exit(1)
68
- mas = []
69
- result = {}
70
- for i in process.decode().split("\n"):
71
- if i.startswith("SKLEARNEX WARNING"):
72
- continue
73
- if not i.startswith("SKLEARNEX INFO") and len(mas) != 0:
74
- run_parse(mas, result)
75
- mas.clear()
76
- mas.append(i.strip())
77
- else:
78
- mas.append(i.strip())
79
- del os.environ["SKLEARNEX_VERBOSE"]
80
- return result
81
69
 
70
+ _ = pairwise_distances(X.reshape(1, -1), metric=metric)
71
+ assert all(
72
+ [
73
+ "running accelerated version" in i.message
74
+ or "fallback to original Scikit-learn" in i.message
75
+ for i in caplog.records
76
+ ]
77
+ ), f"sklearnex patching issue in pairwise_distances with log: \n{caplog.text}"
82
78
 
83
- result_log = get_result_log()
84
79
 
80
+ @pytest.mark.parametrize(
81
+ "dtype", [i for i in DTYPES if "32" in i.__name__ or "64" in i.__name__]
82
+ )
83
+ @pytest.mark.parametrize(
84
+ "dataframe, queue", get_dataframes_and_queues(dataframe_filter_="numpy")
85
+ )
86
+ def test_roc_auc_score_patching(caplog, dataframe, queue, dtype):
87
+ if dtype in [np.uint32, np.uint64] and sys.platform == "win32":
88
+ pytest.skip("Windows issue with unsigned ints")
89
+ with caplog.at_level(logging.WARNING, logger="sklearnex"):
90
+ rng = nprnd.default_rng()
91
+ X = _convert_to_dataframe(
92
+ rng.integers(2, size=1000),
93
+ sycl_queue=queue,
94
+ target_df=dataframe,
95
+ dtype=dtype,
96
+ )
97
+ y = _convert_to_dataframe(
98
+ rng.integers(2, size=1000),
99
+ sycl_queue=queue,
100
+ target_df=dataframe,
101
+ dtype=dtype,
102
+ )
85
103
 
86
- @pytest.mark.parametrize("configuration", result_log)
87
- def test_patching(configuration):
88
- if "OPT" in result_log[configuration]:
89
- return
90
- for skip in TO_SKIP:
91
- if re.search(skip, configuration) is not None:
92
- pytest.skip("SKIPPED", allow_module_level=False)
93
- raise ValueError("Test patching failed: " + configuration)
104
+ _ = roc_auc_score(X, y)
105
+ assert all(
106
+ [
107
+ "running accelerated version" in i.message
108
+ or "fallback to original Scikit-learn" in i.message
109
+ for i in caplog.records
110
+ ]
111
+ ), f"sklearnex patching issue in roc_auc_score with log: \n{caplog.text}"
94
112
 
95
113
 
96
- def _load_all_models(patched):
97
- if patched:
98
- patch_sklearn()
114
+ @pytest.mark.parametrize("dtype", DTYPES)
115
+ @pytest.mark.parametrize(
116
+ "dataframe, queue", get_dataframes_and_queues(dataframe_filter_="numpy")
117
+ )
118
+ @pytest.mark.parametrize("estimator, method", gen_models_info(PATCHED_MODELS))
119
+ def test_standard_estimator_patching(caplog, dataframe, queue, dtype, estimator, method):
120
+ with caplog.at_level(logging.WARNING, logger="sklearnex"):
121
+ est = PATCHED_MODELS[estimator]()
99
122
 
100
- models = {}
101
- for patch_infos in get_patch_map().values():
102
- maybe_class = getattr(patch_infos[0][0][0], patch_infos[0][0][1], None)
103
- if (
104
- maybe_class is not None
105
- and isclass(maybe_class)
106
- and issubclass(maybe_class, BaseEstimator)
123
+ if estimator == "TSNE" and method == "fit_transform":
124
+ pytest.skip("TSNE.fit_transform is too slow for common testing")
125
+ elif (
126
+ estimator == "Ridge"
127
+ and method in ["predict", "score"]
128
+ and sys.platform == "win32"
129
+ and dtype in [np.uint32, np.uint64]
107
130
  ):
108
- models[patch_infos[0][0][1]] = maybe_class
131
+ pytest.skip("Windows segmentation fault for Ridge.predict for unsigned ints")
132
+ elif not hasattr(est, method):
133
+ pytest.skip(f"sklearn available_if prevents testing {estimator}.{method}")
134
+ X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)
135
+ est.fit(X, y)
136
+
137
+ if method != "score":
138
+ getattr(est, method)(X)
139
+ else:
140
+ est.score(X, y)
141
+ assert all(
142
+ [
143
+ "running accelerated version" in i.message
144
+ or "fallback to original Scikit-learn" in i.message
145
+ for i in caplog.records
146
+ ]
147
+ ), f"sklearnex patching issue in {estimator}.{method} with log: \n{caplog.text}"
148
+
149
+
150
+ @pytest.mark.parametrize("dtype", DTYPES)
151
+ @pytest.mark.parametrize(
152
+ "dataframe, queue", get_dataframes_and_queues(dataframe_filter_="numpy")
153
+ )
154
+ @pytest.mark.parametrize("estimator, method", gen_models_info(SPECIAL_INSTANCES))
155
+ def test_special_estimator_patching(caplog, dataframe, queue, dtype, estimator, method):
156
+ # prepare logging
157
+
158
+ with caplog.at_level(logging.WARNING, logger="sklearnex"):
159
+ est = SPECIAL_INSTANCES[estimator]
160
+
161
+ X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)
162
+ est.fit(X, y)
163
+
164
+ if not hasattr(est, method):
165
+ pytest.skip(f"sklearn available_if prevents testing {estimator}.{method}")
166
+ if method != "score":
167
+ getattr(est, method)(X)
168
+ else:
169
+ est.score(X, y)
170
+ assert all(
171
+ [
172
+ "running accelerated version" in i.message
173
+ or "fallback to original Scikit-learn" in i.message
174
+ for i in caplog.records
175
+ ]
176
+ ), f"sklearnex patching issue in {estimator}.{method} with log: \n{caplog.text}"
177
+
178
+
179
+ @pytest.mark.parametrize("estimator", UNPATCHED_MODELS.keys())
180
+ def test_standard_estimator_signatures(estimator):
181
+ est = PATCHED_MODELS[estimator]()
182
+ unpatched_est = UNPATCHED_MODELS[estimator]()
109
183
 
110
- if patched:
111
- unpatch_sklearn()
184
+ # all public sklearn methods should have signature matches in sklearnex
112
185
 
113
- return models
186
+ unpatched_est_methods = [
187
+ i
188
+ for i in dir(unpatched_est)
189
+ if not i.startswith("_") and not i.endswith("_") and hasattr(unpatched_est, i)
190
+ ]
191
+ for method in unpatched_est_methods:
192
+ est_method = getattr(est, method)
193
+ unpatched_est_method = getattr(unpatched_est, method)
194
+ if callable(unpatched_est_method):
195
+ regex = rf"(?:sklearn|daal4py)\S*{estimator}" # needed due to differences in module structure
196
+ patched_sig = re.sub(regex, estimator, str(signature(est_method)))
197
+ unpatched_sig = re.sub(regex, estimator, str(signature(unpatched_est_method)))
198
+ assert (
199
+ patched_sig == unpatched_sig
200
+ ), f"Signature of {estimator}.{method} does not match sklearn"
114
201
 
115
202
 
116
- PATCHED_MODELS = _load_all_models(patched=True)
117
- UNPATCHED_MODELS = _load_all_models(patched=False)
203
+ @pytest.mark.parametrize("estimator", UNPATCHED_MODELS.keys())
204
+ def test_standard_estimator_init_signatures(estimator):
205
+ # Several estimators have additional parameters that are user-accessible
206
+ # which are sklearnex-specific. They will fail and are removed from tests.
207
+ # remove n_jobs due to estimator patching for sklearnex (known deviation)
208
+ patched_sig = str(signature(PATCHED_MODELS[estimator].__init__))
209
+ unpatched_sig = str(signature(UNPATCHED_MODELS[estimator].__init__))
210
+
211
+ # Sklearnex allows for positional kwargs and n_jobs, when sklearn doesn't
212
+ for kwarg in ["n_jobs=None", "*"]:
213
+ patched_sig = patched_sig.replace(", " + kwarg, "")
214
+ unpatched_sig = unpatched_sig.replace(", " + kwarg, "")
215
+
216
+ # Special sklearnex-specific kwargs are removed from signatures here
217
+ if estimator in [
218
+ "RandomForestRegressor",
219
+ "RandomForestClassifier",
220
+ "ExtraTreesRegressor",
221
+ "ExtraTreesClassifier",
222
+ ]:
223
+ for kwarg in ["min_bin_size=1", "max_bins=256"]:
224
+ patched_sig = patched_sig.replace(", " + kwarg, "")
225
+
226
+ assert (
227
+ patched_sig == unpatched_sig
228
+ ), f"Signature of {estimator}.__init__ does not match sklearn"
229
+
230
+
231
+ @pytest.mark.parametrize(
232
+ "function",
233
+ [
234
+ i
235
+ for i in UNPATCHED_FUNCTIONS.keys()
236
+ if i not in ["train_test_split", "set_config", "config_context"]
237
+ ],
238
+ )
239
+ def test_patched_function_signatures(function):
240
+ # certain functions are dropped from the test
241
+ # as they add functionality to the underlying sklearn function
242
+ if not sklearn_check_version("1.1") and function == "_assert_all_finite":
243
+ pytest.skip("Sklearn versioning not added to _assert_all_finite")
244
+ func = PATCHED_FUNCTIONS[function]
245
+ unpatched_func = UNPATCHED_FUNCTIONS[function]
246
+
247
+ if callable(unpatched_func):
248
+ assert str(signature(func)) == str(
249
+ signature(unpatched_func)
250
+ ), f"Signature of {func} does not match sklearn"
251
+
252
+
253
+ def test_patch_map_match():
254
+ # This rule applies to functions and classes which are out of preview.
255
+ # Items listed in a matching submodule's __all__ attribute should be
256
+ # in get_patch_map. There should not be any missing or additional elements.
257
+
258
+ def list_all_attr(string):
259
+ try:
260
+ modules = set(importlib.import_module(string).__all__)
261
+ except ModuleNotFoundError:
262
+ modules = set([None])
263
+ return modules
264
+
265
+ if _is_preview_enabled():
266
+ pytest.skip("preview sklearnex has been activated")
267
+ patched = {**PATCHED_MODELS, **PATCHED_FUNCTIONS}
268
+
269
+ sklearnex__all__ = list_all_attr("sklearnex")
270
+ sklearn__all__ = list_all_attr("sklearn")
271
+
272
+ module_map = {i: i for i in sklearnex__all__.intersection(sklearn__all__)}
273
+
274
+ # _assert_all_finite patches an internal sklearn function which isn't
275
+ # exposed via __all__ in sklearn. It is a special case where this rule
276
+ # is not applied (e.g. it is grandfathered in).
277
+ del patched["_assert_all_finite"]
278
+
279
+ # remove all scikit-learn-intelex-only estimators
280
+ for i in patched.copy():
281
+ if i not in UNPATCHED_MODELS and i not in UNPATCHED_FUNCTIONS:
282
+ del patched[i]
283
+
284
+ for module in module_map:
285
+ sklearn_module__all__ = list_all_attr("sklearn." + module_map[module])
286
+ sklearnex_module__all__ = list_all_attr("sklearnex." + module)
287
+ intersect = sklearnex_module__all__.intersection(sklearn_module__all__)
288
+
289
+ for i in intersect:
290
+ if i:
291
+ del patched[i]
292
+ else:
293
+ del patched[module]
294
+ assert patched == {}, f"{patched.keys()} were not properly patched"
118
295
 
119
296
 
120
297
  @pytest.mark.parametrize("estimator", UNPATCHED_MODELS.keys())
@@ -125,6 +302,54 @@ def test_is_patched_instance(estimator):
125
302
  assert not is_patched_instance(unpatched), f"{unpatched} is an unpatched instance"
126
303
 
127
304
 
305
+ @pytest.mark.parametrize("estimator", PATCHED_MODELS.keys())
306
+ def test_if_estimator_inherits_sklearn(estimator):
307
+ est = PATCHED_MODELS[estimator]
308
+ if estimator in UNPATCHED_MODELS:
309
+ assert issubclass(
310
+ est, UNPATCHED_MODELS[estimator]
311
+ ), f"{estimator} does not inherit from the patched sklearn estimator"
312
+ else:
313
+ assert issubclass(est, BaseEstimator)
314
+ assert any(
315
+ [
316
+ issubclass(est, i)
317
+ for i in [
318
+ ClassifierMixin,
319
+ ClusterMixin,
320
+ OutlierMixin,
321
+ RegressorMixin,
322
+ TransformerMixin,
323
+ ]
324
+ ]
325
+ ), f"{estimator} does not inherit a sklearn Mixin"
326
+
327
+
328
+ @pytest.mark.parametrize("estimator", UNPATCHED_MODELS.keys())
329
+ def test_docstring_patching_match(estimator):
330
+ patched = PATCHED_MODELS[estimator]
331
+ unpatched = UNPATCHED_MODELS[estimator]
332
+ patched_docstrings = {
333
+ i: getattr(patched, i).__doc__
334
+ for i in dir(patched)
335
+ if not i.startswith("_") and not i.endswith("_") and hasattr(patched, i)
336
+ }
337
+ unpatched_docstrings = {
338
+ i: getattr(unpatched, i).__doc__
339
+ for i in dir(unpatched)
340
+ if not i.startswith("_") and not i.endswith("_") and hasattr(unpatched, i)
341
+ }
342
+
343
+ # check class docstring match if a docstring is available
344
+
345
+ assert (patched.__doc__ is None) == (unpatched.__doc__ is None)
346
+
347
+ # check class attribute docstrings
348
+
349
+ for i in unpatched_docstrings:
350
+ assert (patched_docstrings[i] is None) == (unpatched_docstrings[i] is None)
351
+
352
+
128
353
  @pytest.mark.parametrize("member", ["_onedal_cpu_supported", "_onedal_gpu_supported"])
129
354
  @pytest.mark.parametrize(
130
355
  "name",
@@ -1,19 +0,0 @@
1
- # ===============================================================================
2
- # Copyright 2023 Intel Corporation
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- # ===============================================================================
16
-
17
- from .pca import PCA
18
-
19
- __all__ = ["PCA"]