scikit-learn-intelex 2024.3.0__py310-none-manylinux1_x86_64.whl → 2024.5.0__py310-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (43) hide show
  1. {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/METADATA +2 -2
  2. {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/RECORD +43 -37
  3. sklearnex/_device_offload.py +39 -5
  4. sklearnex/basic_statistics/__init__.py +2 -1
  5. sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
  6. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +384 -0
  7. sklearnex/covariance/incremental_covariance.py +217 -30
  8. sklearnex/covariance/tests/test_incremental_covariance.py +54 -17
  9. sklearnex/decomposition/pca.py +71 -19
  10. sklearnex/decomposition/tests/test_pca.py +2 -2
  11. sklearnex/dispatcher.py +33 -2
  12. sklearnex/ensemble/_forest.py +73 -79
  13. sklearnex/linear_model/__init__.py +5 -3
  14. sklearnex/linear_model/incremental_linear.py +387 -0
  15. sklearnex/linear_model/linear.py +275 -340
  16. sklearnex/linear_model/logistic_regression.py +50 -9
  17. sklearnex/linear_model/tests/test_incremental_linear.py +200 -0
  18. sklearnex/linear_model/tests/test_linear.py +40 -5
  19. sklearnex/neighbors/_lof.py +53 -36
  20. sklearnex/neighbors/common.py +4 -1
  21. sklearnex/neighbors/knn_classification.py +37 -122
  22. sklearnex/neighbors/knn_regression.py +10 -117
  23. sklearnex/neighbors/knn_unsupervised.py +6 -78
  24. sklearnex/neighbors/tests/test_neighbors.py +2 -2
  25. sklearnex/preview/cluster/k_means.py +5 -73
  26. sklearnex/preview/covariance/covariance.py +6 -5
  27. sklearnex/preview/covariance/tests/test_covariance.py +18 -5
  28. sklearnex/svm/_common.py +4 -7
  29. sklearnex/svm/nusvc.py +66 -50
  30. sklearnex/svm/nusvr.py +3 -49
  31. sklearnex/svm/svc.py +66 -51
  32. sklearnex/svm/svr.py +3 -49
  33. sklearnex/tests/_utils.py +34 -16
  34. sklearnex/tests/test_memory_usage.py +5 -1
  35. sklearnex/tests/test_n_jobs_support.py +12 -2
  36. sklearnex/tests/test_patching.py +87 -58
  37. sklearnex/tests/test_run_to_run_stability_tests.py +1 -1
  38. sklearnex/utils/__init__.py +2 -1
  39. sklearnex/utils/_namespace.py +97 -0
  40. sklearnex/utils/tests/test_finite.py +89 -0
  41. {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/LICENSE.txt +0 -0
  42. {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/WHEEL +0 -0
  43. {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/top_level.txt +0 -0
@@ -26,24 +26,7 @@ from inspect import signature
26
26
  import numpy as np
27
27
  import numpy.random as nprnd
28
28
  import pytest
29
- from _utils import (
30
- DTYPES,
31
- PATCHED_FUNCTIONS,
32
- PATCHED_MODELS,
33
- SPECIAL_INSTANCES,
34
- UNPATCHED_FUNCTIONS,
35
- UNPATCHED_MODELS,
36
- gen_dataset,
37
- gen_models_info,
38
- )
39
- from sklearn.base import (
40
- BaseEstimator,
41
- ClassifierMixin,
42
- ClusterMixin,
43
- OutlierMixin,
44
- RegressorMixin,
45
- TransformerMixin,
46
- )
29
+ from sklearn.base import BaseEstimator
47
30
 
48
31
  from daal4py.sklearn._utils import sklearn_check_version
49
32
  from onedal.tests.utils._dataframes_support import (
@@ -53,21 +36,42 @@ from onedal.tests.utils._dataframes_support import (
53
36
  from sklearnex import is_patched_instance
54
37
  from sklearnex.dispatcher import _is_preview_enabled
55
38
  from sklearnex.metrics import pairwise_distances, roc_auc_score
39
+ from sklearnex.tests._utils import (
40
+ DTYPES,
41
+ PATCHED_FUNCTIONS,
42
+ PATCHED_MODELS,
43
+ SPECIAL_INSTANCES,
44
+ UNPATCHED_FUNCTIONS,
45
+ UNPATCHED_MODELS,
46
+ gen_dataset,
47
+ gen_models_info,
48
+ )
56
49
 
57
50
 
58
51
  @pytest.mark.parametrize("dtype", DTYPES)
59
- @pytest.mark.parametrize(
60
- "dataframe, queue", get_dataframes_and_queues(dataframe_filter_="numpy")
61
- )
52
+ @pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
62
53
  @pytest.mark.parametrize("metric", ["cosine", "correlation"])
63
54
  def test_pairwise_distances_patching(caplog, dataframe, queue, dtype, metric):
64
55
  with caplog.at_level(logging.WARNING, logger="sklearnex"):
56
+ if dtype == np.float16 and queue and not queue.sycl_device.has_aspect_fp16:
57
+ pytest.skip("Hardware does not support fp16 SYCL testing")
58
+ elif dtype == np.float64 and queue and not queue.sycl_device.has_aspect_fp64:
59
+ pytest.skip("Hardware does not support fp64 SYCL testing")
60
+ elif queue and queue.sycl_device.is_gpu:
61
+ pytest.skip("pairwise_distances does not support GPU queues")
62
+
65
63
  rng = nprnd.default_rng()
66
- X = _convert_to_dataframe(
67
- rng.random(size=1000), sycl_queue=queue, target_df=dataframe, dtype=dtype
68
- )
64
+ if dataframe == "pandas":
65
+ X = _convert_to_dataframe(
66
+ rng.random(size=1000).astype(dtype).reshape(1, -1),
67
+ target_df=dataframe,
68
+ )
69
+ else:
70
+ X = _convert_to_dataframe(
71
+ rng.random(size=1000), sycl_queue=queue, target_df=dataframe, dtype=dtype
72
+ )[None, :]
69
73
 
70
- _ = pairwise_distances(X.reshape(1, -1), metric=metric)
74
+ _ = pairwise_distances(X, metric=metric)
71
75
  assert all(
72
76
  [
73
77
  "running accelerated version" in i.message
@@ -80,22 +84,26 @@ def test_pairwise_distances_patching(caplog, dataframe, queue, dtype, metric):
80
84
  @pytest.mark.parametrize(
81
85
  "dtype", [i for i in DTYPES if "32" in i.__name__ or "64" in i.__name__]
82
86
  )
83
- @pytest.mark.parametrize(
84
- "dataframe, queue", get_dataframes_and_queues(dataframe_filter_="numpy")
85
- )
87
+ @pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
86
88
  def test_roc_auc_score_patching(caplog, dataframe, queue, dtype):
87
89
  if dtype in [np.uint32, np.uint64] and sys.platform == "win32":
88
90
  pytest.skip("Windows issue with unsigned ints")
91
+ elif dtype == np.float64 and queue and not queue.sycl_device.has_aspect_fp64:
92
+ pytest.skip("Hardware does not support fp64 SYCL testing")
93
+
89
94
  with caplog.at_level(logging.WARNING, logger="sklearnex"):
90
95
  rng = nprnd.default_rng()
96
+ X = rng.integers(2, size=1000)
97
+ y = rng.integers(2, size=1000)
98
+
91
99
  X = _convert_to_dataframe(
92
- rng.integers(2, size=1000),
100
+ X,
93
101
  sycl_queue=queue,
94
102
  target_df=dataframe,
95
103
  dtype=dtype,
96
104
  )
97
105
  y = _convert_to_dataframe(
98
- rng.integers(2, size=1000),
106
+ y,
99
107
  sycl_queue=queue,
100
108
  target_df=dataframe,
101
109
  dtype=dtype,
@@ -112,14 +120,25 @@ def test_roc_auc_score_patching(caplog, dataframe, queue, dtype):
112
120
 
113
121
 
114
122
  @pytest.mark.parametrize("dtype", DTYPES)
115
- @pytest.mark.parametrize(
116
- "dataframe, queue", get_dataframes_and_queues(dataframe_filter_="numpy")
117
- )
123
+ @pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
118
124
  @pytest.mark.parametrize("estimator, method", gen_models_info(PATCHED_MODELS))
119
125
  def test_standard_estimator_patching(caplog, dataframe, queue, dtype, estimator, method):
120
126
  with caplog.at_level(logging.WARNING, logger="sklearnex"):
121
127
  est = PATCHED_MODELS[estimator]()
122
128
 
129
+ if queue:
130
+ if dtype == np.float16 and not queue.sycl_device.has_aspect_fp16:
131
+ pytest.skip("Hardware does not support fp16 SYCL testing")
132
+ elif dtype == np.float64 and not queue.sycl_device.has_aspect_fp64:
133
+ pytest.skip("Hardware does not support fp64 SYCL testing")
134
+ elif queue.sycl_device.is_gpu and estimator in [
135
+ "KMeans",
136
+ "ElasticNet",
137
+ "Lasso",
138
+ "Ridge",
139
+ ]:
140
+ pytest.skip(f"{estimator} does not support GPU queues")
141
+
123
142
  if estimator == "TSNE" and method == "fit_transform":
124
143
  pytest.skip("TSNE.fit_transform is too slow for common testing")
125
144
  elif (
@@ -129,15 +148,30 @@ def test_standard_estimator_patching(caplog, dataframe, queue, dtype, estimator,
129
148
  and dtype in [np.uint32, np.uint64]
130
149
  ):
131
150
  pytest.skip("Windows segmentation fault for Ridge.predict for unsigned ints")
132
- elif not hasattr(est, method):
151
+ elif estimator == "IncrementalLinearRegression" and dtype in [
152
+ np.int8,
153
+ np.int16,
154
+ np.int32,
155
+ np.int64,
156
+ np.uint8,
157
+ np.uint16,
158
+ np.uint32,
159
+ np.uint64,
160
+ ]:
161
+ pytest.skip(
162
+ "IncrementalLinearRegression fails on oneDAL side with int types because dataset is filled by zeroes"
163
+ )
164
+ elif method and not hasattr(est, method):
133
165
  pytest.skip(f"sklearn available_if prevents testing {estimator}.{method}")
166
+
134
167
  X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)
135
168
  est.fit(X, y)
136
169
 
137
- if method != "score":
138
- getattr(est, method)(X)
139
- else:
140
- est.score(X, y)
170
+ if method:
171
+ if method != "score":
172
+ getattr(est, method)(X)
173
+ else:
174
+ est.score(X, y)
141
175
  assert all(
142
176
  [
143
177
  "running accelerated version" in i.message
@@ -148,9 +182,7 @@ def test_standard_estimator_patching(caplog, dataframe, queue, dtype, estimator,
148
182
 
149
183
 
150
184
  @pytest.mark.parametrize("dtype", DTYPES)
151
- @pytest.mark.parametrize(
152
- "dataframe, queue", get_dataframes_and_queues(dataframe_filter_="numpy")
153
- )
185
+ @pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
154
186
  @pytest.mark.parametrize("estimator, method", gen_models_info(SPECIAL_INSTANCES))
155
187
  def test_special_estimator_patching(caplog, dataframe, queue, dtype, estimator, method):
156
188
  # prepare logging
@@ -158,15 +190,24 @@ def test_special_estimator_patching(caplog, dataframe, queue, dtype, estimator,
158
190
  with caplog.at_level(logging.WARNING, logger="sklearnex"):
159
191
  est = SPECIAL_INSTANCES[estimator]
160
192
 
193
+ # Its not possible to get the dpnp/dpctl arrays to be in the proper dtype
194
+ if dtype == np.float16 and queue and not queue.sycl_device.has_aspect_fp16:
195
+ pytest.skip("Hardware does not support fp16 SYCL testing")
196
+ elif dtype == np.float64 and queue and not queue.sycl_device.has_aspect_fp64:
197
+ pytest.skip("Hardware does not support fp64 SYCL testing")
198
+
161
199
  X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)
162
200
  est.fit(X, y)
163
201
 
164
- if not hasattr(est, method):
202
+ if method and not hasattr(est, method):
165
203
  pytest.skip(f"sklearn available_if prevents testing {estimator}.{method}")
166
- if method != "score":
167
- getattr(est, method)(X)
168
- else:
169
- est.score(X, y)
204
+
205
+ if method:
206
+ if method != "score":
207
+ getattr(est, method)(X)
208
+ else:
209
+ est.score(X, y)
210
+
170
211
  assert all(
171
212
  [
172
213
  "running accelerated version" in i.message
@@ -311,18 +352,6 @@ def test_if_estimator_inherits_sklearn(estimator):
311
352
  ), f"{estimator} does not inherit from the patched sklearn estimator"
312
353
  else:
313
354
  assert issubclass(est, BaseEstimator)
314
- assert any(
315
- [
316
- issubclass(est, i)
317
- for i in [
318
- ClassifierMixin,
319
- ClusterMixin,
320
- OutlierMixin,
321
- RegressorMixin,
322
- TransformerMixin,
323
- ]
324
- ]
325
- ), f"{estimator} does not inherit a sklearn Mixin"
326
355
 
327
356
 
328
357
  @pytest.mark.parametrize("estimator", UNPATCHED_MODELS.keys())
@@ -294,7 +294,7 @@ MODELS_INFO = [
294
294
  "dataset": "regression",
295
295
  },
296
296
  {
297
- "model": PCA(n_components=0.5, svd_solver="full", random_state=0),
297
+ "model": PCA(n_components=0.5, svd_solver="covariance_eigh", random_state=0),
298
298
  "methods": ["transform", "get_covariance", "get_precision", "score_samples"],
299
299
  "dataset": "classifier",
300
300
  },
@@ -14,6 +14,7 @@
14
14
  # limitations under the License.
15
15
  # ===============================================================================
16
16
 
17
+ from ._namespace import get_namespace
17
18
  from .validation import _assert_all_finite
18
19
 
19
- __all__ = ["_assert_all_finite"]
20
+ __all__ = ["get_namespace", "_assert_all_finite"]
@@ -0,0 +1,97 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import numpy as np
18
+
19
+ from daal4py.sklearn._utils import sklearn_check_version
20
+
21
+ from .._device_offload import dpnp_available
22
+
23
+ if sklearn_check_version("1.2"):
24
+ from sklearn.utils._array_api import get_namespace as sklearn_get_namespace
25
+
26
+ if dpnp_available:
27
+ import dpnp
28
+
29
+
30
+ def get_namespace(*arrays):
31
+ """Get namespace of arrays.
32
+
33
+ Introspect `arrays` arguments and return their common Array API
34
+ compatible namespace object, if any. NumPy 1.22 and later can
35
+ construct such containers using the `numpy.array_api` namespace
36
+ for instance.
37
+
38
+ This function will return the namespace of SYCL-related arrays
39
+ which define the __sycl_usm_array_interface__ attribute
40
+ regardless of array_api support, the configuration of
41
+ array_api_dispatch, or scikit-learn version.
42
+
43
+ See: https://numpy.org/neps/nep-0047-array-api-standard.html
44
+
45
+ If `arrays` are regular numpy arrays, an instance of the
46
+ `_NumPyApiWrapper` compatibility wrapper is returned instead.
47
+
48
+ Namespace support is not enabled by default. To enabled it
49
+ call:
50
+
51
+ sklearn.set_config(array_api_dispatch=True)
52
+
53
+ or:
54
+
55
+ with sklearn.config_context(array_api_dispatch=True):
56
+ # your code here
57
+
58
+ Otherwise an instance of the `_NumPyApiWrapper`
59
+ compatibility wrapper is always returned irrespective of
60
+ the fact that arrays implement the `__array_namespace__`
61
+ protocol or not.
62
+
63
+ Parameters
64
+ ----------
65
+ *arrays : array objects
66
+ Array objects.
67
+
68
+ Returns
69
+ -------
70
+ namespace : module
71
+ Namespace shared by array objects.
72
+
73
+ is_array_api : bool
74
+ True of the arrays are containers that implement the Array API spec.
75
+ """
76
+
77
+ # sycl support designed to work regardless of array_api_dispatch sklearn global value
78
+ sycl_type = {type(x): x for x in arrays if hasattr(x, "__sycl_usm_array_interface__")}
79
+
80
+ if len(sycl_type) > 1:
81
+ raise ValueError(f"Multiple SYCL types for array inputs: {sycl_type}")
82
+
83
+ if sycl_type:
84
+
85
+ (X,) = sycl_type.values()
86
+
87
+ if hasattr(X, "__array_namespace__"):
88
+ return X.__array_namespace__(), True
89
+ elif dpnp_available and isinstance(X, dpnp.ndarray):
90
+ return dpnp, False
91
+ else:
92
+ raise ValueError(f"SYCL type not recognized: {sycl_type}")
93
+
94
+ elif sklearn_check_version("1.2"):
95
+ return sklearn_get_namespace(*arrays)
96
+ else:
97
+ return np, True
@@ -0,0 +1,89 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import time
18
+
19
+ import numpy as np
20
+ import numpy.random as rand
21
+ import pytest
22
+ from numpy.testing import assert_raises
23
+
24
+ from sklearnex.utils import _assert_all_finite
25
+
26
+
27
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
28
+ @pytest.mark.parametrize(
29
+ "shape",
30
+ [
31
+ [16, 2048],
32
+ [
33
+ 2**16 + 3,
34
+ ],
35
+ [1000, 1000],
36
+ ],
37
+ )
38
+ @pytest.mark.parametrize("allow_nan", [False, True])
39
+ def test_sum_infinite_actually_finite(dtype, shape, allow_nan):
40
+ X = np.array(shape, dtype=dtype)
41
+ X.fill(np.finfo(dtype).max)
42
+ _assert_all_finite(X, allow_nan=allow_nan)
43
+
44
+
45
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
46
+ @pytest.mark.parametrize(
47
+ "shape",
48
+ [
49
+ [16, 2048],
50
+ [
51
+ 2**16 + 3,
52
+ ],
53
+ [1000, 1000],
54
+ ],
55
+ )
56
+ @pytest.mark.parametrize("allow_nan", [False, True])
57
+ @pytest.mark.parametrize("check", ["inf", "NaN", None])
58
+ @pytest.mark.parametrize("seed", [0, int(time.time())])
59
+ def test_assert_finite_random_location(dtype, shape, allow_nan, check, seed):
60
+ rand.seed(seed)
61
+ X = rand.uniform(high=np.finfo(dtype).max, size=shape).astype(dtype)
62
+
63
+ if check:
64
+ loc = rand.randint(0, X.size - 1)
65
+ X.reshape((-1,))[loc] = float(check)
66
+
67
+ if check is None or (allow_nan and check == "NaN"):
68
+ _assert_all_finite(X, allow_nan=allow_nan)
69
+ else:
70
+ assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan)
71
+
72
+
73
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
74
+ @pytest.mark.parametrize("allow_nan", [False, True])
75
+ @pytest.mark.parametrize("check", ["inf", "NaN", None])
76
+ @pytest.mark.parametrize("seed", [0, int(time.time())])
77
+ def test_assert_finite_random_shape_and_location(dtype, allow_nan, check, seed):
78
+ lb, ub = 32768, 1048576 # lb is a patching condition, ub 2^20
79
+ rand.seed(seed)
80
+ X = rand.uniform(high=np.finfo(dtype).max, size=rand.randint(lb, ub)).astype(dtype)
81
+
82
+ if check:
83
+ loc = rand.randint(0, X.size - 1)
84
+ X[loc] = float(check)
85
+
86
+ if check is None or (allow_nan and check == "NaN"):
87
+ _assert_all_finite(X, allow_nan=allow_nan)
88
+ else:
89
+ assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan)