scikit-learn-intelex 2024.5.0__py39-none-manylinux1_x86_64.whl → 2024.7.0__py39-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (73) hide show
  1. {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/METADATA +2 -2
  2. scikit_learn_intelex-2024.7.0.dist-info/RECORD +122 -0
  3. sklearnex/_config.py +3 -15
  4. sklearnex/_device_offload.py +9 -168
  5. sklearnex/basic_statistics/basic_statistics.py +127 -1
  6. sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
  7. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +1 -1
  8. sklearnex/cluster/dbscan.py +3 -1
  9. sklearnex/cluster/k_means.py +8 -0
  10. sklearnex/cluster/tests/test_dbscan.py +8 -6
  11. sklearnex/cluster/tests/test_kmeans.py +15 -3
  12. sklearnex/conftest.py +11 -1
  13. sklearnex/covariance/incremental_covariance.py +64 -13
  14. sklearnex/covariance/tests/test_incremental_covariance.py +35 -0
  15. sklearnex/decomposition/pca.py +25 -1
  16. sklearnex/decomposition/tests/test_pca.py +4 -2
  17. sklearnex/dispatcher.py +109 -1
  18. sklearnex/ensemble/_forest.py +121 -57
  19. sklearnex/ensemble/tests/test_forest.py +7 -0
  20. sklearnex/glob/dispatcher.py +16 -2
  21. sklearnex/linear_model/coordinate_descent.py +13 -0
  22. sklearnex/linear_model/incremental_linear.py +102 -25
  23. sklearnex/linear_model/linear.py +25 -39
  24. sklearnex/linear_model/logistic_regression.py +92 -74
  25. sklearnex/linear_model/ridge.py +7 -0
  26. sklearnex/linear_model/tests/test_incremental_linear.py +10 -10
  27. sklearnex/linear_model/tests/test_linear.py +30 -5
  28. sklearnex/linear_model/tests/test_logreg.py +45 -3
  29. sklearnex/manifold/t_sne.py +4 -0
  30. sklearnex/metrics/pairwise.py +5 -0
  31. sklearnex/metrics/ranking.py +3 -0
  32. sklearnex/model_selection/split.py +3 -0
  33. sklearnex/neighbors/_lof.py +9 -0
  34. sklearnex/neighbors/common.py +45 -1
  35. sklearnex/neighbors/knn_classification.py +1 -20
  36. sklearnex/neighbors/knn_regression.py +25 -20
  37. sklearnex/neighbors/knn_unsupervised.py +31 -7
  38. sklearnex/preview/__init__.py +1 -1
  39. sklearnex/preview/decomposition/__init__.py +19 -0
  40. sklearnex/preview/decomposition/incremental_pca.py +228 -0
  41. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  42. sklearnex/preview/linear_model/__init__.py +19 -0
  43. sklearnex/preview/linear_model/ridge.py +419 -0
  44. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  45. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  46. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  47. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  48. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  49. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  50. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  51. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  52. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +163 -0
  53. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  54. sklearnex/svm/_common.py +163 -20
  55. sklearnex/svm/nusvc.py +40 -4
  56. sklearnex/svm/nusvr.py +31 -2
  57. sklearnex/svm/svc.py +40 -4
  58. sklearnex/svm/svr.py +31 -2
  59. sklearnex/svm/tests/test_svm.py +12 -20
  60. sklearnex/tests/_utils.py +185 -30
  61. sklearnex/tests/_utils_spmd.py +185 -0
  62. sklearnex/tests/test_common.py +54 -0
  63. sklearnex/tests/test_config.py +4 -0
  64. sklearnex/tests/test_memory_usage.py +185 -126
  65. sklearnex/tests/test_monkeypatch.py +12 -4
  66. sklearnex/tests/test_patching.py +21 -25
  67. sklearnex/tests/test_run_to_run_stability.py +295 -0
  68. sklearnex/utils/_namespace.py +1 -1
  69. scikit_learn_intelex-2024.5.0.dist-info/RECORD +0 -104
  70. sklearnex/tests/test_run_to_run_stability_tests.py +0 -428
  71. {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/LICENSE.txt +0 -0
  72. {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/WHEEL +0 -0
  73. {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,251 @@
1
+ # ==============================================================================
2
+ # Copyright 2023 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.testing import assert_allclose
20
+
21
+ from onedal.basic_statistics.tests.test_basic_statistics import (
22
+ expected_max,
23
+ expected_mean,
24
+ expected_sum,
25
+ options_and_tests,
26
+ )
27
+ from onedal.tests.utils._dataframes_support import (
28
+ _convert_to_dataframe,
29
+ get_dataframes_and_queues,
30
+ )
31
+ from sklearnex.basic_statistics import BasicStatistics
32
+
33
+
34
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
35
+ def test_sklearnex_import_basic_statistics(dataframe, queue):
36
+ X = np.array([[0, 0], [1, 1]])
37
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
38
+
39
+ weights = np.array([1, 0.5])
40
+ weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
41
+
42
+ result = BasicStatistics().fit(X_df)
43
+
44
+ expected_mean = np.array([0.5, 0.5])
45
+ expected_min = np.array([0, 0])
46
+ expected_max = np.array([1, 1])
47
+
48
+ assert_allclose(expected_mean, result.mean)
49
+ assert_allclose(expected_max, result.max)
50
+ assert_allclose(expected_min, result.min)
51
+
52
+ result = BasicStatistics().fit(X_df, sample_weight=weights_df)
53
+
54
+ expected_weighted_mean = np.array([0.25, 0.25])
55
+ expected_weighted_min = np.array([0, 0])
56
+ expected_weighted_max = np.array([0.5, 0.5])
57
+
58
+ assert_allclose(expected_weighted_mean, result.mean)
59
+ assert_allclose(expected_weighted_min, result.min)
60
+ assert_allclose(expected_weighted_max, result.max)
61
+
62
+
63
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
64
+ @pytest.mark.parametrize("weighted", [True, False])
65
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
66
+ def test_multiple_options_on_gold_data(dataframe, queue, weighted, dtype):
67
+ X = np.array([[0, 0], [1, 1]])
68
+ X = X.astype(dtype=dtype)
69
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
70
+ if weighted:
71
+ weights = np.array([1, 0.5])
72
+ weights = weights.astype(dtype=dtype)
73
+ weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
74
+ basicstat = BasicStatistics()
75
+
76
+ if weighted:
77
+ result = basicstat.fit(X_df, sample_weight=weights_df)
78
+ else:
79
+ result = basicstat.fit(X_df)
80
+
81
+ if weighted:
82
+ expected_weighted_mean = np.array([0.25, 0.25])
83
+ expected_weighted_min = np.array([0, 0])
84
+ expected_weighted_max = np.array([0.5, 0.5])
85
+ assert_allclose(expected_weighted_mean, result.mean)
86
+ assert_allclose(expected_weighted_max, result.max)
87
+ assert_allclose(expected_weighted_min, result.min)
88
+ else:
89
+ expected_mean = np.array([0.5, 0.5])
90
+ expected_min = np.array([0, 0])
91
+ expected_max = np.array([1, 1])
92
+ assert_allclose(expected_mean, result.mean)
93
+ assert_allclose(expected_max, result.max)
94
+ assert_allclose(expected_min, result.min)
95
+
96
+
97
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
98
+ @pytest.mark.parametrize("option", options_and_tests)
99
+ @pytest.mark.parametrize("row_count", [100, 1000])
100
+ @pytest.mark.parametrize("column_count", [10, 100])
101
+ @pytest.mark.parametrize("weighted", [True, False])
102
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
103
+ def test_single_option_on_random_data(
104
+ dataframe, queue, option, row_count, column_count, weighted, dtype
105
+ ):
106
+ result_option, function, tols = option
107
+ fp32tol, fp64tol = tols
108
+ seed = 77
109
+ gen = np.random.default_rng(seed)
110
+ X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
111
+ X = X.astype(dtype=dtype)
112
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
113
+ if weighted:
114
+ weights = gen.uniform(low=-0.5, high=1.0, size=row_count)
115
+ weights = weights.astype(dtype=dtype)
116
+ weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
117
+ basicstat = BasicStatistics(result_options=result_option)
118
+
119
+ if weighted:
120
+ result = basicstat.fit(X_df, sample_weight=weights_df)
121
+ else:
122
+ result = basicstat.fit(X_df)
123
+
124
+ res = getattr(result, result_option)
125
+ if weighted:
126
+ weighted_data = np.diag(weights) @ X
127
+ gtr = function(weighted_data)
128
+ else:
129
+ gtr = function(X)
130
+
131
+ tol = fp32tol if res.dtype == np.float32 else fp64tol
132
+ assert_allclose(gtr, res, atol=tol)
133
+
134
+
135
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
136
+ @pytest.mark.parametrize("row_count", [100, 1000])
137
+ @pytest.mark.parametrize("column_count", [10, 100])
138
+ @pytest.mark.parametrize("weighted", [True, False])
139
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
140
+ def test_multiple_options_on_random_data(
141
+ dataframe, queue, row_count, column_count, weighted, dtype
142
+ ):
143
+ seed = 77
144
+ gen = np.random.default_rng(seed)
145
+ X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
146
+ X = X.astype(dtype=dtype)
147
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
148
+ if weighted:
149
+ weights = gen.uniform(low=-0.5, high=1.0, size=row_count)
150
+ weights = weights.astype(dtype=dtype)
151
+ weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
152
+ basicstat = BasicStatistics(result_options=["mean", "max", "sum"])
153
+
154
+ if weighted:
155
+ result = basicstat.fit(X_df, sample_weight=weights_df)
156
+ else:
157
+ result = basicstat.fit(X_df)
158
+
159
+ res_mean, res_max, res_sum = result.mean, result.max, result.sum
160
+ if weighted:
161
+ weighted_data = np.diag(weights) @ X
162
+ gtr_mean, gtr_max, gtr_sum = (
163
+ expected_mean(weighted_data),
164
+ expected_max(weighted_data),
165
+ expected_sum(weighted_data),
166
+ )
167
+ else:
168
+ gtr_mean, gtr_max, gtr_sum = (
169
+ expected_mean(X),
170
+ expected_max(X),
171
+ expected_sum(X),
172
+ )
173
+
174
+ tol = 5e-4 if res_mean.dtype == np.float32 else 1e-7
175
+ assert_allclose(gtr_mean, res_mean, atol=tol)
176
+ assert_allclose(gtr_max, res_max, atol=tol)
177
+ assert_allclose(gtr_sum, res_sum, atol=tol)
178
+
179
+
180
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
181
+ @pytest.mark.parametrize("row_count", [100, 1000])
182
+ @pytest.mark.parametrize("column_count", [10, 100])
183
+ @pytest.mark.parametrize("weighted", [True, False])
184
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
185
+ def test_all_option_on_random_data(
186
+ dataframe, queue, row_count, column_count, weighted, dtype
187
+ ):
188
+ seed = 77
189
+ gen = np.random.default_rng(seed)
190
+ X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
191
+ X = X.astype(dtype=dtype)
192
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
193
+ if weighted:
194
+ weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
195
+ weights = weights.astype(dtype=dtype)
196
+ weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
197
+ basicstat = BasicStatistics(result_options="all")
198
+
199
+ if weighted:
200
+ result = basicstat.fit(X_df, sample_weight=weights_df)
201
+ else:
202
+ result = basicstat.fit(X_df)
203
+
204
+ if weighted:
205
+ weighted_data = np.diag(weights) @ X
206
+
207
+ for option in options_and_tests:
208
+ result_option, function, tols = option
209
+ fp32tol, fp64tol = tols
210
+ res = getattr(result, result_option)
211
+ if weighted:
212
+ gtr = function(weighted_data)
213
+ else:
214
+ gtr = function(X)
215
+ tol = fp32tol if res.dtype == np.float32 else fp64tol
216
+ assert_allclose(gtr, res, atol=tol)
217
+
218
+
219
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
220
+ @pytest.mark.parametrize("option", options_and_tests)
221
+ @pytest.mark.parametrize("data_size", [100, 1000])
222
+ @pytest.mark.parametrize("weighted", [True, False])
223
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
224
+ def test_1d_input_on_random_data(dataframe, queue, option, data_size, weighted, dtype):
225
+ result_option, function, tols = option
226
+ fp32tol, fp64tol = tols
227
+ seed = 77
228
+ gen = np.random.default_rng(seed)
229
+ X = gen.uniform(low=-0.3, high=+0.7, size=data_size)
230
+ X = X.astype(dtype=dtype)
231
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
232
+ if weighted:
233
+ weights = gen.uniform(low=-0.5, high=1.0, size=data_size)
234
+ weights = weights.astype(dtype=dtype)
235
+ weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
236
+ basicstat = BasicStatistics(result_options=result_option)
237
+
238
+ if weighted:
239
+ result = basicstat.fit(X_df, sample_weight=weights_df)
240
+ else:
241
+ result = basicstat.fit(X_df)
242
+
243
+ res = getattr(result, result_option)
244
+ if weighted:
245
+ weighted_data = weights * X
246
+ gtr = function(weighted_data)
247
+ else:
248
+ gtr = function(X)
249
+
250
+ tol = fp32tol if res.dtype == np.float32 else fp64tol
251
+ assert_allclose(gtr, res, atol=tol)
@@ -18,7 +18,7 @@ import numpy as np
18
18
  import pytest
19
19
  from numpy.testing import assert_allclose
20
20
 
21
- from onedal.basic_statistics.tests.test_incremental_basic_statistics import (
21
+ from onedal.basic_statistics.tests.test_basic_statistics import (
22
22
  expected_max,
23
23
  expected_mean,
24
24
  expected_sum,
@@ -17,7 +17,6 @@
17
17
  import numbers
18
18
  from abc import ABC
19
19
 
20
- import numpy as np
21
20
  from scipy import sparse as sp
22
21
  from sklearn.cluster import DBSCAN as sklearn_DBSCAN
23
22
  from sklearn.utils.validation import _check_sample_weight
@@ -85,6 +84,9 @@ class DBSCAN(sklearn_DBSCAN, BaseDBSCAN):
85
84
  self.n_jobs = n_jobs
86
85
 
87
86
  def _onedal_fit(self, X, y, sample_weight=None, queue=None):
87
+ if sklearn_check_version("1.0"):
88
+ X = self._validate_data(X, force_all_finite=False)
89
+
88
90
  onedal_params = {
89
91
  "eps": self.eps,
90
92
  "min_samples": self.min_samples,
@@ -15,3 +15,11 @@
15
15
  # ===============================================================================
16
16
 
17
17
  from daal4py.sklearn.cluster import KMeans
18
+ from onedal._device_offload import support_usm_ndarray
19
+
20
+ # Note: `sklearnex.cluster.KMeans` only has functional
21
+ # sycl GPU support. No GPU device will be offloaded.
22
+ KMeans.fit = support_usm_ndarray(queue_param=False)(KMeans.fit)
23
+ KMeans.fit_predict = support_usm_ndarray(queue_param=False)(KMeans.fit_predict)
24
+ KMeans.predict = support_usm_ndarray(queue_param=False)(KMeans.predict)
25
+ KMeans.score = support_usm_ndarray(queue_param=False)(KMeans.score)
@@ -18,16 +18,18 @@ import numpy as np
18
18
  import pytest
19
19
  from numpy.testing import assert_allclose
20
20
 
21
+ from onedal.tests.utils._dataframes_support import (
22
+ _convert_to_dataframe,
23
+ get_dataframes_and_queues,
24
+ )
21
25
 
22
- # TODO:
23
- # adding this parameterized testing
24
- # somehow breaks other test with preview module patch:
25
- # sklearnex/tests/test_monkeypatch.py::test_preview_namespace.
26
- # @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
27
- def test_sklearnex_import_dbscan():
26
+
27
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
28
+ def test_sklearnex_import_dbscan(dataframe, queue):
28
29
  from sklearnex.cluster import DBSCAN
29
30
 
30
31
  X = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]])
32
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
31
33
  dbscan = DBSCAN(eps=3, min_samples=2).fit(X)
32
34
  assert "sklearnex" in dbscan.__module__
33
35
 
@@ -15,16 +15,28 @@
15
15
  # ===============================================================================
16
16
 
17
17
  import numpy as np
18
+ import pytest
18
19
  from numpy.testing import assert_allclose
19
20
 
21
+ from onedal.tests.utils._dataframes_support import (
22
+ _as_numpy,
23
+ _convert_to_dataframe,
24
+ get_dataframes_and_queues,
25
+ )
26
+
27
+
28
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
29
+ def test_sklearnex_import(dataframe, queue):
20
30
 
21
- def test_sklearnex_import():
22
31
  from sklearnex.cluster import KMeans
23
32
 
24
33
  X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
34
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
25
35
  kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
26
36
  assert "daal4py" in kmeans.__module__
27
37
 
28
- result = kmeans.predict([[0, 0], [12, 3]])
38
+ X_test = [[0, 0], [12, 3]]
39
+ X_test = _convert_to_dataframe(X_test, sycl_queue=queue, target_df=dataframe)
40
+ result = kmeans.predict(X_test)
29
41
  expected = np.array([1, 0], dtype=np.int32)
30
- assert_allclose(expected, result)
42
+ assert_allclose(expected, _as_numpy(result))
sklearnex/conftest.py CHANGED
@@ -19,7 +19,8 @@ import logging
19
19
 
20
20
  import pytest
21
21
 
22
- from sklearnex import patch_sklearn, unpatch_sklearn
22
+ from daal4py.sklearn._utils import sklearn_check_version
23
+ from sklearnex import config_context, patch_sklearn, unpatch_sklearn
23
24
 
24
25
 
25
26
  def pytest_configure(config):
@@ -61,3 +62,12 @@ def with_sklearnex():
61
62
  patch_sklearn()
62
63
  yield
63
64
  unpatch_sklearn()
65
+
66
+
67
+ @pytest.fixture
68
+ def with_array_api():
69
+ if sklearn_check_version("1.2"):
70
+ with config_context(array_api_dispatch=True):
71
+ yield
72
+ else:
73
+ yield
@@ -19,13 +19,14 @@ import warnings
19
19
 
20
20
  import numpy as np
21
21
  from scipy import linalg
22
- from sklearn.base import BaseEstimator
22
+ from sklearn.base import BaseEstimator, clone
23
23
  from sklearn.covariance import EmpiricalCovariance as sklearn_EmpiricalCovariance
24
+ from sklearn.covariance import log_likelihood
24
25
  from sklearn.utils import check_array, gen_batches
26
+ from sklearn.utils.validation import _num_features
25
27
 
26
28
  from daal4py.sklearn._n_jobs_support import control_n_jobs
27
29
  from daal4py.sklearn._utils import daal_check_version, sklearn_check_version
28
- from onedal._device_offload import support_usm_ndarray
29
30
  from onedal.covariance import (
30
31
  IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance,
31
32
  )
@@ -34,6 +35,7 @@ from sklearnex import config_context
34
35
  from .._device_offload import dispatch, wrap_output_data
35
36
  from .._utils import PatchingConditionsChain, register_hyperparameters
36
37
  from ..metrics import pairwise_distances
38
+ from ..utils import get_namespace
37
39
 
38
40
  if sklearn_check_version("1.2"):
39
41
  from sklearn.utils._param_validation import Interval
@@ -98,7 +100,6 @@ class IncrementalEmpiricalCovariance(BaseEstimator):
98
100
 
99
101
  get_precision = sklearn_EmpiricalCovariance.get_precision
100
102
  error_norm = wrap_output_data(sklearn_EmpiricalCovariance.error_norm)
101
- score = wrap_output_data(sklearn_EmpiricalCovariance.score)
102
103
 
103
104
  def __init__(
104
105
  self, *, store_precision=False, assume_centered=False, batch_size=None, copy=True
@@ -197,6 +198,43 @@ class IncrementalEmpiricalCovariance(BaseEstimator):
197
198
 
198
199
  return self
199
200
 
201
+ @wrap_output_data
202
+ def score(self, X_test, y=None):
203
+ xp, _ = get_namespace(X_test)
204
+
205
+ location = self.location_
206
+ if sklearn_check_version("1.0"):
207
+ X = self._validate_data(
208
+ X_test,
209
+ dtype=[np.float64, np.float32],
210
+ reset=False,
211
+ )
212
+ else:
213
+ X = check_array(
214
+ X_test,
215
+ dtype=[np.float64, np.float32],
216
+ )
217
+
218
+ if "numpy" not in xp.__name__:
219
+ location = xp.asarray(location, device=X_test.device)
220
+ # depending on the sklearn version, check_array
221
+ # and validate_data will return only numpy arrays
222
+ # which will break dpnp/dpctl support. If the
223
+ # array namespace isn't from numpy and the data
224
+ # is now a numpy array, it has been validated and
225
+ # the original can be used.
226
+ if isinstance(X, np.ndarray):
227
+ X = X_test
228
+
229
+ est = clone(self)
230
+ est.set_params(**{"assume_centered": True})
231
+
232
+ # test_cov is a numpy array, but calculated on device
233
+ test_cov = est.fit(X - location).covariance_
234
+ res = log_likelihood(test_cov, self.get_precision())
235
+
236
+ return res
237
+
200
238
  def partial_fit(self, X, y=None, check_input=True):
201
239
  """
202
240
  Incremental fit with X. All of X is processed as a single batch.
@@ -293,21 +331,34 @@ class IncrementalEmpiricalCovariance(BaseEstimator):
293
331
  return self
294
332
 
295
333
  # expose sklearnex pairwise_distances if mahalanobis distance eventually supported
296
- @wrap_output_data
297
334
  def mahalanobis(self, X):
298
335
  if sklearn_check_version("1.0"):
299
- self._validate_data(X, reset=False, copy=self.copy)
300
- else:
301
- check_array(X, copy=self.copy)
336
+ self._check_feature_names(X, reset=False)
302
337
 
338
+ xp, _ = get_namespace(X)
303
339
  precision = self.get_precision()
304
- with config_context(assume_finite=True):
305
- # compute mahalanobis distances
306
- dist = pairwise_distances(
307
- X, self.location_[np.newaxis, :], metric="mahalanobis", VI=precision
308
- )
340
+ # compute mahalanobis distances
341
+ # pairwise_distances will check n_features (via n_feature matching with
342
+ # self.location_) , and will check for finiteness via check array
343
+ # check_feature_names will match _validate_data functionally
344
+ location = self.location_[np.newaxis, :]
345
+ if "numpy" not in xp.__name__:
346
+ # Guarantee that inputs to pairwise_distances match in type and location
347
+ location = xp.asarray(location, device=X.device)
348
+
349
+ try:
350
+ dist = pairwise_distances(X, location, metric="mahalanobis", VI=precision)
351
+ except ValueError as e:
352
+ # Throw the expected sklearn error in an n_feature length violation
353
+ if "Incompatible dimension for X and Y matrices: X.shape[1] ==" in str(e):
354
+ raise ValueError(
355
+ f"X has {_num_features(X)} features, but {self.__class__.__name__} "
356
+ f"is expecting {self.n_features_in_} features as input."
357
+ )
358
+ else:
359
+ raise e
309
360
 
310
- return np.reshape(dist, (len(X),)) ** 2
361
+ return (xp.reshape(dist, (-1,))) ** 2
311
362
 
312
363
  _onedal_cpu_supported = _onedal_supported
313
364
  _onedal_gpu_supported = _onedal_supported
@@ -16,13 +16,18 @@
16
16
 
17
17
  import numpy as np
18
18
  import pytest
19
+ from numpy.linalg import slogdet
19
20
  from numpy.testing import assert_allclose
21
+ from scipy.linalg import pinvh
20
22
  from sklearn.covariance.tests.test_covariance import (
21
23
  test_covariance,
22
24
  test_EmpiricalCovariance_validates_mahalanobis,
23
25
  )
26
+ from sklearn.datasets import load_diabetes
27
+ from sklearn.decomposition import PCA
24
28
 
25
29
  from onedal.tests.utils._dataframes_support import (
30
+ _as_numpy,
26
31
  _convert_to_dataframe,
27
32
  get_dataframes_and_queues,
28
33
  )
@@ -163,6 +168,36 @@ def test_sklearnex_fit_on_random_data(
163
168
  assert_allclose(expected_means, result.location_, atol=1e-6)
164
169
 
165
170
 
171
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
172
+ def test_whitened_toy_score(dataframe, queue):
173
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
174
+
175
+ # Load a sklearn toy dataset with sufficient data
176
+ X, _ = load_diabetes(return_X_y=True)
177
+ n = X.shape[1]
178
+
179
+ # Transform the data into uncorrelated, unity variance components
180
+ X = PCA(whiten=True).fit_transform(X)
181
+
182
+ # change dataframe
183
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
184
+
185
+ # fit data
186
+ est = IncrementalEmpiricalCovariance()
187
+ est.fit(X_df)
188
+ # location_ attribute approximately zero (10,), covariance_ identity (10,10)
189
+
190
+ # The log-likelihood can be calculated simply due to covariance_
191
+ # use of scipy.linalg.pinvh, np.linalg.sloget and np.cov for estimator
192
+ # independence
193
+ expected_result = (
194
+ -(n - slogdet(pinvh(np.cov(X.T, bias=1)))[1] + n * np.log(2 * np.pi)) / 2
195
+ )
196
+ # expected_result = -14.1780602988
197
+ result = _as_numpy(est.score(X_df))
198
+ assert_allclose(expected_result, result, atol=1e-6)
199
+
200
+
166
201
  # Monkeypatch IncrementalEmpiricalCovariance into relevant sklearn.covariance tests
167
202
  @pytest.mark.allow_sklearn_fallback
168
203
  @pytest.mark.parametrize(
@@ -32,6 +32,7 @@ if daal_check_version((2024, "P", 100)):
32
32
 
33
33
  from .._device_offload import dispatch, wrap_output_data
34
34
  from .._utils import PatchingConditionsChain
35
+ from ..utils import get_namespace
35
36
 
36
37
  if sklearn_check_version("1.1") and not sklearn_check_version("1.2"):
37
38
  from sklearn.utils import check_scalar
@@ -42,7 +43,6 @@ if daal_check_version((2024, "P", 100)):
42
43
  from sklearn.decomposition import PCA as sklearn_PCA
43
44
 
44
45
  from onedal.decomposition import PCA as onedal_PCA
45
- from sklearnex.utils import get_namespace
46
46
 
47
47
  @control_n_jobs(decorated_methods=["fit", "transform", "fit_transform"])
48
48
  class PCA(sklearn_PCA):
@@ -210,6 +210,29 @@ if daal_check_version((2024, "P", 100)):
210
210
  # Scikit-learn PCA["covariance_eigh"] was fit
211
211
  return self._transform(X_fit, xp, x_is_centered=x_is_centered)
212
212
 
213
+ @wrap_output_data
214
+ def inverse_transform(self, X):
215
+ xp, _ = get_namespace(X)
216
+
217
+ mean = self.mean_
218
+ if self.whiten:
219
+ components = (
220
+ xp.sqrt(self.explained_variance_[:, np.newaxis]) * self.components_
221
+ )
222
+ else:
223
+ components = self.components_
224
+
225
+ if "numpy" not in xp.__name__:
226
+ # DPCtl and dpnp require inputs to be on the same device for
227
+ # matrix multiplication and division. The type and location
228
+ # of the components and mean are dependent on the sklearn
229
+ # version, this makes sure it is of the same type and on the
230
+ # same device as the data (compute follows data).
231
+ components = xp.asarray(components, device=X.device)
232
+ mean = xp.asarray(mean, device=X.device)
233
+
234
+ return X @ components + mean
235
+
213
236
  def _onedal_supported(self, method_name, X):
214
237
  class_name = self.__class__.__name__
215
238
  patching_status = PatchingConditionsChain(
@@ -381,6 +404,7 @@ if daal_check_version((2024, "P", 100)):
381
404
  fit.__doc__ = sklearn_PCA.fit.__doc__
382
405
  transform.__doc__ = sklearn_PCA.transform.__doc__
383
406
  fit_transform.__doc__ = sklearn_PCA.fit_transform.__doc__
407
+ inverse_transform.__doc__ = sklearn_PCA.inverse_transform.__doc__
384
408
 
385
409
  else:
386
410
  from daal4py.sklearn.decomposition import PCA
@@ -51,6 +51,8 @@ def test_sklearnex_import(dataframe, queue):
51
51
  assert hasattr(pca, "_onedal_estimator")
52
52
  else:
53
53
  assert "daal4py" in pca.__module__
54
+
55
+ tol = 1e-5 if _as_numpy(X_transformed).dtype == np.float32 else 1e-7
54
56
  assert_allclose([6.30061232, 0.54980396], _as_numpy(pca.singular_values_))
55
- assert_allclose(X_transformed_expected, _as_numpy(X_transformed))
56
- assert_allclose(X_transformed_expected, _as_numpy(X_fit_transformed))
57
+ assert_allclose(X_transformed_expected, _as_numpy(X_transformed), rtol=tol)
58
+ assert_allclose(X_transformed_expected, _as_numpy(X_fit_transformed), rtol=tol)