scikit-learn-intelex 2024.5.0__py310-none-manylinux1_x86_64.whl → 2024.7.0__py310-none-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/METADATA +2 -2
- scikit_learn_intelex-2024.7.0.dist-info/RECORD +122 -0
- sklearnex/_config.py +3 -15
- sklearnex/_device_offload.py +9 -168
- sklearnex/basic_statistics/basic_statistics.py +127 -1
- sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
- sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +1 -1
- sklearnex/cluster/dbscan.py +3 -1
- sklearnex/cluster/k_means.py +8 -0
- sklearnex/cluster/tests/test_dbscan.py +8 -6
- sklearnex/cluster/tests/test_kmeans.py +15 -3
- sklearnex/conftest.py +11 -1
- sklearnex/covariance/incremental_covariance.py +64 -13
- sklearnex/covariance/tests/test_incremental_covariance.py +35 -0
- sklearnex/decomposition/pca.py +25 -1
- sklearnex/decomposition/tests/test_pca.py +4 -2
- sklearnex/dispatcher.py +109 -1
- sklearnex/ensemble/_forest.py +121 -57
- sklearnex/ensemble/tests/test_forest.py +7 -0
- sklearnex/glob/dispatcher.py +16 -2
- sklearnex/linear_model/coordinate_descent.py +13 -0
- sklearnex/linear_model/incremental_linear.py +102 -25
- sklearnex/linear_model/linear.py +25 -39
- sklearnex/linear_model/logistic_regression.py +92 -74
- sklearnex/linear_model/ridge.py +7 -0
- sklearnex/linear_model/tests/test_incremental_linear.py +10 -10
- sklearnex/linear_model/tests/test_linear.py +30 -5
- sklearnex/linear_model/tests/test_logreg.py +45 -3
- sklearnex/manifold/t_sne.py +4 -0
- sklearnex/metrics/pairwise.py +5 -0
- sklearnex/metrics/ranking.py +3 -0
- sklearnex/model_selection/split.py +3 -0
- sklearnex/neighbors/_lof.py +9 -0
- sklearnex/neighbors/common.py +45 -1
- sklearnex/neighbors/knn_classification.py +1 -20
- sklearnex/neighbors/knn_regression.py +25 -20
- sklearnex/neighbors/knn_unsupervised.py +31 -7
- sklearnex/preview/__init__.py +1 -1
- sklearnex/preview/decomposition/__init__.py +19 -0
- sklearnex/preview/decomposition/incremental_pca.py +228 -0
- sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
- sklearnex/preview/linear_model/__init__.py +19 -0
- sklearnex/preview/linear_model/ridge.py +419 -0
- sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
- sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
- sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
- sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
- sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
- sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
- sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
- sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
- sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +163 -0
- sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
- sklearnex/svm/_common.py +163 -20
- sklearnex/svm/nusvc.py +40 -4
- sklearnex/svm/nusvr.py +31 -2
- sklearnex/svm/svc.py +40 -4
- sklearnex/svm/svr.py +31 -2
- sklearnex/svm/tests/test_svm.py +12 -20
- sklearnex/tests/_utils.py +185 -30
- sklearnex/tests/_utils_spmd.py +185 -0
- sklearnex/tests/test_common.py +54 -0
- sklearnex/tests/test_config.py +4 -0
- sklearnex/tests/test_memory_usage.py +185 -126
- sklearnex/tests/test_monkeypatch.py +12 -4
- sklearnex/tests/test_patching.py +21 -25
- sklearnex/tests/test_run_to_run_stability.py +295 -0
- sklearnex/utils/_namespace.py +1 -1
- scikit_learn_intelex-2024.5.0.dist-info/RECORD +0 -104
- sklearnex/tests/test_run_to_run_stability_tests.py +0 -428
- {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/WHEEL +0 -0
- {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
# ==============================================================================
|
|
2
|
+
# Copyright 2023 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ==============================================================================
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pytest
|
|
19
|
+
from numpy.testing import assert_allclose
|
|
20
|
+
|
|
21
|
+
from onedal.basic_statistics.tests.test_basic_statistics import (
|
|
22
|
+
expected_max,
|
|
23
|
+
expected_mean,
|
|
24
|
+
expected_sum,
|
|
25
|
+
options_and_tests,
|
|
26
|
+
)
|
|
27
|
+
from onedal.tests.utils._dataframes_support import (
|
|
28
|
+
_convert_to_dataframe,
|
|
29
|
+
get_dataframes_and_queues,
|
|
30
|
+
)
|
|
31
|
+
from sklearnex.basic_statistics import BasicStatistics
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
35
|
+
def test_sklearnex_import_basic_statistics(dataframe, queue):
|
|
36
|
+
X = np.array([[0, 0], [1, 1]])
|
|
37
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
38
|
+
|
|
39
|
+
weights = np.array([1, 0.5])
|
|
40
|
+
weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
|
|
41
|
+
|
|
42
|
+
result = BasicStatistics().fit(X_df)
|
|
43
|
+
|
|
44
|
+
expected_mean = np.array([0.5, 0.5])
|
|
45
|
+
expected_min = np.array([0, 0])
|
|
46
|
+
expected_max = np.array([1, 1])
|
|
47
|
+
|
|
48
|
+
assert_allclose(expected_mean, result.mean)
|
|
49
|
+
assert_allclose(expected_max, result.max)
|
|
50
|
+
assert_allclose(expected_min, result.min)
|
|
51
|
+
|
|
52
|
+
result = BasicStatistics().fit(X_df, sample_weight=weights_df)
|
|
53
|
+
|
|
54
|
+
expected_weighted_mean = np.array([0.25, 0.25])
|
|
55
|
+
expected_weighted_min = np.array([0, 0])
|
|
56
|
+
expected_weighted_max = np.array([0.5, 0.5])
|
|
57
|
+
|
|
58
|
+
assert_allclose(expected_weighted_mean, result.mean)
|
|
59
|
+
assert_allclose(expected_weighted_min, result.min)
|
|
60
|
+
assert_allclose(expected_weighted_max, result.max)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
64
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
65
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
66
|
+
def test_multiple_options_on_gold_data(dataframe, queue, weighted, dtype):
|
|
67
|
+
X = np.array([[0, 0], [1, 1]])
|
|
68
|
+
X = X.astype(dtype=dtype)
|
|
69
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
70
|
+
if weighted:
|
|
71
|
+
weights = np.array([1, 0.5])
|
|
72
|
+
weights = weights.astype(dtype=dtype)
|
|
73
|
+
weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
|
|
74
|
+
basicstat = BasicStatistics()
|
|
75
|
+
|
|
76
|
+
if weighted:
|
|
77
|
+
result = basicstat.fit(X_df, sample_weight=weights_df)
|
|
78
|
+
else:
|
|
79
|
+
result = basicstat.fit(X_df)
|
|
80
|
+
|
|
81
|
+
if weighted:
|
|
82
|
+
expected_weighted_mean = np.array([0.25, 0.25])
|
|
83
|
+
expected_weighted_min = np.array([0, 0])
|
|
84
|
+
expected_weighted_max = np.array([0.5, 0.5])
|
|
85
|
+
assert_allclose(expected_weighted_mean, result.mean)
|
|
86
|
+
assert_allclose(expected_weighted_max, result.max)
|
|
87
|
+
assert_allclose(expected_weighted_min, result.min)
|
|
88
|
+
else:
|
|
89
|
+
expected_mean = np.array([0.5, 0.5])
|
|
90
|
+
expected_min = np.array([0, 0])
|
|
91
|
+
expected_max = np.array([1, 1])
|
|
92
|
+
assert_allclose(expected_mean, result.mean)
|
|
93
|
+
assert_allclose(expected_max, result.max)
|
|
94
|
+
assert_allclose(expected_min, result.min)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
98
|
+
@pytest.mark.parametrize("option", options_and_tests)
|
|
99
|
+
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
100
|
+
@pytest.mark.parametrize("column_count", [10, 100])
|
|
101
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
102
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
103
|
+
def test_single_option_on_random_data(
|
|
104
|
+
dataframe, queue, option, row_count, column_count, weighted, dtype
|
|
105
|
+
):
|
|
106
|
+
result_option, function, tols = option
|
|
107
|
+
fp32tol, fp64tol = tols
|
|
108
|
+
seed = 77
|
|
109
|
+
gen = np.random.default_rng(seed)
|
|
110
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
|
|
111
|
+
X = X.astype(dtype=dtype)
|
|
112
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
113
|
+
if weighted:
|
|
114
|
+
weights = gen.uniform(low=-0.5, high=1.0, size=row_count)
|
|
115
|
+
weights = weights.astype(dtype=dtype)
|
|
116
|
+
weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
|
|
117
|
+
basicstat = BasicStatistics(result_options=result_option)
|
|
118
|
+
|
|
119
|
+
if weighted:
|
|
120
|
+
result = basicstat.fit(X_df, sample_weight=weights_df)
|
|
121
|
+
else:
|
|
122
|
+
result = basicstat.fit(X_df)
|
|
123
|
+
|
|
124
|
+
res = getattr(result, result_option)
|
|
125
|
+
if weighted:
|
|
126
|
+
weighted_data = np.diag(weights) @ X
|
|
127
|
+
gtr = function(weighted_data)
|
|
128
|
+
else:
|
|
129
|
+
gtr = function(X)
|
|
130
|
+
|
|
131
|
+
tol = fp32tol if res.dtype == np.float32 else fp64tol
|
|
132
|
+
assert_allclose(gtr, res, atol=tol)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
136
|
+
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
137
|
+
@pytest.mark.parametrize("column_count", [10, 100])
|
|
138
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
139
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
140
|
+
def test_multiple_options_on_random_data(
|
|
141
|
+
dataframe, queue, row_count, column_count, weighted, dtype
|
|
142
|
+
):
|
|
143
|
+
seed = 77
|
|
144
|
+
gen = np.random.default_rng(seed)
|
|
145
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
|
|
146
|
+
X = X.astype(dtype=dtype)
|
|
147
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
148
|
+
if weighted:
|
|
149
|
+
weights = gen.uniform(low=-0.5, high=1.0, size=row_count)
|
|
150
|
+
weights = weights.astype(dtype=dtype)
|
|
151
|
+
weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
|
|
152
|
+
basicstat = BasicStatistics(result_options=["mean", "max", "sum"])
|
|
153
|
+
|
|
154
|
+
if weighted:
|
|
155
|
+
result = basicstat.fit(X_df, sample_weight=weights_df)
|
|
156
|
+
else:
|
|
157
|
+
result = basicstat.fit(X_df)
|
|
158
|
+
|
|
159
|
+
res_mean, res_max, res_sum = result.mean, result.max, result.sum
|
|
160
|
+
if weighted:
|
|
161
|
+
weighted_data = np.diag(weights) @ X
|
|
162
|
+
gtr_mean, gtr_max, gtr_sum = (
|
|
163
|
+
expected_mean(weighted_data),
|
|
164
|
+
expected_max(weighted_data),
|
|
165
|
+
expected_sum(weighted_data),
|
|
166
|
+
)
|
|
167
|
+
else:
|
|
168
|
+
gtr_mean, gtr_max, gtr_sum = (
|
|
169
|
+
expected_mean(X),
|
|
170
|
+
expected_max(X),
|
|
171
|
+
expected_sum(X),
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
tol = 5e-4 if res_mean.dtype == np.float32 else 1e-7
|
|
175
|
+
assert_allclose(gtr_mean, res_mean, atol=tol)
|
|
176
|
+
assert_allclose(gtr_max, res_max, atol=tol)
|
|
177
|
+
assert_allclose(gtr_sum, res_sum, atol=tol)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
181
|
+
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
182
|
+
@pytest.mark.parametrize("column_count", [10, 100])
|
|
183
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
184
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
185
|
+
def test_all_option_on_random_data(
|
|
186
|
+
dataframe, queue, row_count, column_count, weighted, dtype
|
|
187
|
+
):
|
|
188
|
+
seed = 77
|
|
189
|
+
gen = np.random.default_rng(seed)
|
|
190
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
|
|
191
|
+
X = X.astype(dtype=dtype)
|
|
192
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
193
|
+
if weighted:
|
|
194
|
+
weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
|
|
195
|
+
weights = weights.astype(dtype=dtype)
|
|
196
|
+
weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
|
|
197
|
+
basicstat = BasicStatistics(result_options="all")
|
|
198
|
+
|
|
199
|
+
if weighted:
|
|
200
|
+
result = basicstat.fit(X_df, sample_weight=weights_df)
|
|
201
|
+
else:
|
|
202
|
+
result = basicstat.fit(X_df)
|
|
203
|
+
|
|
204
|
+
if weighted:
|
|
205
|
+
weighted_data = np.diag(weights) @ X
|
|
206
|
+
|
|
207
|
+
for option in options_and_tests:
|
|
208
|
+
result_option, function, tols = option
|
|
209
|
+
fp32tol, fp64tol = tols
|
|
210
|
+
res = getattr(result, result_option)
|
|
211
|
+
if weighted:
|
|
212
|
+
gtr = function(weighted_data)
|
|
213
|
+
else:
|
|
214
|
+
gtr = function(X)
|
|
215
|
+
tol = fp32tol if res.dtype == np.float32 else fp64tol
|
|
216
|
+
assert_allclose(gtr, res, atol=tol)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
220
|
+
@pytest.mark.parametrize("option", options_and_tests)
|
|
221
|
+
@pytest.mark.parametrize("data_size", [100, 1000])
|
|
222
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
223
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
224
|
+
def test_1d_input_on_random_data(dataframe, queue, option, data_size, weighted, dtype):
|
|
225
|
+
result_option, function, tols = option
|
|
226
|
+
fp32tol, fp64tol = tols
|
|
227
|
+
seed = 77
|
|
228
|
+
gen = np.random.default_rng(seed)
|
|
229
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=data_size)
|
|
230
|
+
X = X.astype(dtype=dtype)
|
|
231
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
232
|
+
if weighted:
|
|
233
|
+
weights = gen.uniform(low=-0.5, high=1.0, size=data_size)
|
|
234
|
+
weights = weights.astype(dtype=dtype)
|
|
235
|
+
weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
|
|
236
|
+
basicstat = BasicStatistics(result_options=result_option)
|
|
237
|
+
|
|
238
|
+
if weighted:
|
|
239
|
+
result = basicstat.fit(X_df, sample_weight=weights_df)
|
|
240
|
+
else:
|
|
241
|
+
result = basicstat.fit(X_df)
|
|
242
|
+
|
|
243
|
+
res = getattr(result, result_option)
|
|
244
|
+
if weighted:
|
|
245
|
+
weighted_data = weights * X
|
|
246
|
+
gtr = function(weighted_data)
|
|
247
|
+
else:
|
|
248
|
+
gtr = function(X)
|
|
249
|
+
|
|
250
|
+
tol = fp32tol if res.dtype == np.float32 else fp64tol
|
|
251
|
+
assert_allclose(gtr, res, atol=tol)
|
|
@@ -18,7 +18,7 @@ import numpy as np
|
|
|
18
18
|
import pytest
|
|
19
19
|
from numpy.testing import assert_allclose
|
|
20
20
|
|
|
21
|
-
from onedal.basic_statistics.tests.
|
|
21
|
+
from onedal.basic_statistics.tests.test_basic_statistics import (
|
|
22
22
|
expected_max,
|
|
23
23
|
expected_mean,
|
|
24
24
|
expected_sum,
|
sklearnex/cluster/dbscan.py
CHANGED
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
import numbers
|
|
18
18
|
from abc import ABC
|
|
19
19
|
|
|
20
|
-
import numpy as np
|
|
21
20
|
from scipy import sparse as sp
|
|
22
21
|
from sklearn.cluster import DBSCAN as sklearn_DBSCAN
|
|
23
22
|
from sklearn.utils.validation import _check_sample_weight
|
|
@@ -85,6 +84,9 @@ class DBSCAN(sklearn_DBSCAN, BaseDBSCAN):
|
|
|
85
84
|
self.n_jobs = n_jobs
|
|
86
85
|
|
|
87
86
|
def _onedal_fit(self, X, y, sample_weight=None, queue=None):
|
|
87
|
+
if sklearn_check_version("1.0"):
|
|
88
|
+
X = self._validate_data(X, force_all_finite=False)
|
|
89
|
+
|
|
88
90
|
onedal_params = {
|
|
89
91
|
"eps": self.eps,
|
|
90
92
|
"min_samples": self.min_samples,
|
sklearnex/cluster/k_means.py
CHANGED
|
@@ -15,3 +15,11 @@
|
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
17
|
from daal4py.sklearn.cluster import KMeans
|
|
18
|
+
from onedal._device_offload import support_usm_ndarray
|
|
19
|
+
|
|
20
|
+
# Note: `sklearnex.cluster.KMeans` only has functional
|
|
21
|
+
# sycl GPU support. No GPU device will be offloaded.
|
|
22
|
+
KMeans.fit = support_usm_ndarray(queue_param=False)(KMeans.fit)
|
|
23
|
+
KMeans.fit_predict = support_usm_ndarray(queue_param=False)(KMeans.fit_predict)
|
|
24
|
+
KMeans.predict = support_usm_ndarray(queue_param=False)(KMeans.predict)
|
|
25
|
+
KMeans.score = support_usm_ndarray(queue_param=False)(KMeans.score)
|
|
@@ -18,16 +18,18 @@ import numpy as np
|
|
|
18
18
|
import pytest
|
|
19
19
|
from numpy.testing import assert_allclose
|
|
20
20
|
|
|
21
|
+
from onedal.tests.utils._dataframes_support import (
|
|
22
|
+
_convert_to_dataframe,
|
|
23
|
+
get_dataframes_and_queues,
|
|
24
|
+
)
|
|
21
25
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
# sklearnex/tests/test_monkeypatch.py::test_preview_namespace.
|
|
26
|
-
# @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
27
|
-
def test_sklearnex_import_dbscan():
|
|
26
|
+
|
|
27
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
28
|
+
def test_sklearnex_import_dbscan(dataframe, queue):
|
|
28
29
|
from sklearnex.cluster import DBSCAN
|
|
29
30
|
|
|
30
31
|
X = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]])
|
|
32
|
+
X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
31
33
|
dbscan = DBSCAN(eps=3, min_samples=2).fit(X)
|
|
32
34
|
assert "sklearnex" in dbscan.__module__
|
|
33
35
|
|
|
@@ -15,16 +15,28 @@
|
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
17
|
import numpy as np
|
|
18
|
+
import pytest
|
|
18
19
|
from numpy.testing import assert_allclose
|
|
19
20
|
|
|
21
|
+
from onedal.tests.utils._dataframes_support import (
|
|
22
|
+
_as_numpy,
|
|
23
|
+
_convert_to_dataframe,
|
|
24
|
+
get_dataframes_and_queues,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
29
|
+
def test_sklearnex_import(dataframe, queue):
|
|
20
30
|
|
|
21
|
-
def test_sklearnex_import():
|
|
22
31
|
from sklearnex.cluster import KMeans
|
|
23
32
|
|
|
24
33
|
X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
|
|
34
|
+
X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
25
35
|
kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
|
|
26
36
|
assert "daal4py" in kmeans.__module__
|
|
27
37
|
|
|
28
|
-
|
|
38
|
+
X_test = [[0, 0], [12, 3]]
|
|
39
|
+
X_test = _convert_to_dataframe(X_test, sycl_queue=queue, target_df=dataframe)
|
|
40
|
+
result = kmeans.predict(X_test)
|
|
29
41
|
expected = np.array([1, 0], dtype=np.int32)
|
|
30
|
-
assert_allclose(expected, result)
|
|
42
|
+
assert_allclose(expected, _as_numpy(result))
|
sklearnex/conftest.py
CHANGED
|
@@ -19,7 +19,8 @@ import logging
|
|
|
19
19
|
|
|
20
20
|
import pytest
|
|
21
21
|
|
|
22
|
-
from
|
|
22
|
+
from daal4py.sklearn._utils import sklearn_check_version
|
|
23
|
+
from sklearnex import config_context, patch_sklearn, unpatch_sklearn
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
def pytest_configure(config):
|
|
@@ -61,3 +62,12 @@ def with_sklearnex():
|
|
|
61
62
|
patch_sklearn()
|
|
62
63
|
yield
|
|
63
64
|
unpatch_sklearn()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@pytest.fixture
|
|
68
|
+
def with_array_api():
|
|
69
|
+
if sklearn_check_version("1.2"):
|
|
70
|
+
with config_context(array_api_dispatch=True):
|
|
71
|
+
yield
|
|
72
|
+
else:
|
|
73
|
+
yield
|
|
@@ -19,13 +19,14 @@ import warnings
|
|
|
19
19
|
|
|
20
20
|
import numpy as np
|
|
21
21
|
from scipy import linalg
|
|
22
|
-
from sklearn.base import BaseEstimator
|
|
22
|
+
from sklearn.base import BaseEstimator, clone
|
|
23
23
|
from sklearn.covariance import EmpiricalCovariance as sklearn_EmpiricalCovariance
|
|
24
|
+
from sklearn.covariance import log_likelihood
|
|
24
25
|
from sklearn.utils import check_array, gen_batches
|
|
26
|
+
from sklearn.utils.validation import _num_features
|
|
25
27
|
|
|
26
28
|
from daal4py.sklearn._n_jobs_support import control_n_jobs
|
|
27
29
|
from daal4py.sklearn._utils import daal_check_version, sklearn_check_version
|
|
28
|
-
from onedal._device_offload import support_usm_ndarray
|
|
29
30
|
from onedal.covariance import (
|
|
30
31
|
IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance,
|
|
31
32
|
)
|
|
@@ -34,6 +35,7 @@ from sklearnex import config_context
|
|
|
34
35
|
from .._device_offload import dispatch, wrap_output_data
|
|
35
36
|
from .._utils import PatchingConditionsChain, register_hyperparameters
|
|
36
37
|
from ..metrics import pairwise_distances
|
|
38
|
+
from ..utils import get_namespace
|
|
37
39
|
|
|
38
40
|
if sklearn_check_version("1.2"):
|
|
39
41
|
from sklearn.utils._param_validation import Interval
|
|
@@ -98,7 +100,6 @@ class IncrementalEmpiricalCovariance(BaseEstimator):
|
|
|
98
100
|
|
|
99
101
|
get_precision = sklearn_EmpiricalCovariance.get_precision
|
|
100
102
|
error_norm = wrap_output_data(sklearn_EmpiricalCovariance.error_norm)
|
|
101
|
-
score = wrap_output_data(sklearn_EmpiricalCovariance.score)
|
|
102
103
|
|
|
103
104
|
def __init__(
|
|
104
105
|
self, *, store_precision=False, assume_centered=False, batch_size=None, copy=True
|
|
@@ -197,6 +198,43 @@ class IncrementalEmpiricalCovariance(BaseEstimator):
|
|
|
197
198
|
|
|
198
199
|
return self
|
|
199
200
|
|
|
201
|
+
@wrap_output_data
|
|
202
|
+
def score(self, X_test, y=None):
|
|
203
|
+
xp, _ = get_namespace(X_test)
|
|
204
|
+
|
|
205
|
+
location = self.location_
|
|
206
|
+
if sklearn_check_version("1.0"):
|
|
207
|
+
X = self._validate_data(
|
|
208
|
+
X_test,
|
|
209
|
+
dtype=[np.float64, np.float32],
|
|
210
|
+
reset=False,
|
|
211
|
+
)
|
|
212
|
+
else:
|
|
213
|
+
X = check_array(
|
|
214
|
+
X_test,
|
|
215
|
+
dtype=[np.float64, np.float32],
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if "numpy" not in xp.__name__:
|
|
219
|
+
location = xp.asarray(location, device=X_test.device)
|
|
220
|
+
# depending on the sklearn version, check_array
|
|
221
|
+
# and validate_data will return only numpy arrays
|
|
222
|
+
# which will break dpnp/dpctl support. If the
|
|
223
|
+
# array namespace isn't from numpy and the data
|
|
224
|
+
# is now a numpy array, it has been validated and
|
|
225
|
+
# the original can be used.
|
|
226
|
+
if isinstance(X, np.ndarray):
|
|
227
|
+
X = X_test
|
|
228
|
+
|
|
229
|
+
est = clone(self)
|
|
230
|
+
est.set_params(**{"assume_centered": True})
|
|
231
|
+
|
|
232
|
+
# test_cov is a numpy array, but calculated on device
|
|
233
|
+
test_cov = est.fit(X - location).covariance_
|
|
234
|
+
res = log_likelihood(test_cov, self.get_precision())
|
|
235
|
+
|
|
236
|
+
return res
|
|
237
|
+
|
|
200
238
|
def partial_fit(self, X, y=None, check_input=True):
|
|
201
239
|
"""
|
|
202
240
|
Incremental fit with X. All of X is processed as a single batch.
|
|
@@ -293,21 +331,34 @@ class IncrementalEmpiricalCovariance(BaseEstimator):
|
|
|
293
331
|
return self
|
|
294
332
|
|
|
295
333
|
# expose sklearnex pairwise_distances if mahalanobis distance eventually supported
|
|
296
|
-
@wrap_output_data
|
|
297
334
|
def mahalanobis(self, X):
|
|
298
335
|
if sklearn_check_version("1.0"):
|
|
299
|
-
self.
|
|
300
|
-
else:
|
|
301
|
-
check_array(X, copy=self.copy)
|
|
336
|
+
self._check_feature_names(X, reset=False)
|
|
302
337
|
|
|
338
|
+
xp, _ = get_namespace(X)
|
|
303
339
|
precision = self.get_precision()
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
340
|
+
# compute mahalanobis distances
|
|
341
|
+
# pairwise_distances will check n_features (via n_feature matching with
|
|
342
|
+
# self.location_) , and will check for finiteness via check array
|
|
343
|
+
# check_feature_names will match _validate_data functionally
|
|
344
|
+
location = self.location_[np.newaxis, :]
|
|
345
|
+
if "numpy" not in xp.__name__:
|
|
346
|
+
# Guarantee that inputs to pairwise_distances match in type and location
|
|
347
|
+
location = xp.asarray(location, device=X.device)
|
|
348
|
+
|
|
349
|
+
try:
|
|
350
|
+
dist = pairwise_distances(X, location, metric="mahalanobis", VI=precision)
|
|
351
|
+
except ValueError as e:
|
|
352
|
+
# Throw the expected sklearn error in an n_feature length violation
|
|
353
|
+
if "Incompatible dimension for X and Y matrices: X.shape[1] ==" in str(e):
|
|
354
|
+
raise ValueError(
|
|
355
|
+
f"X has {_num_features(X)} features, but {self.__class__.__name__} "
|
|
356
|
+
f"is expecting {self.n_features_in_} features as input."
|
|
357
|
+
)
|
|
358
|
+
else:
|
|
359
|
+
raise e
|
|
309
360
|
|
|
310
|
-
return
|
|
361
|
+
return (xp.reshape(dist, (-1,))) ** 2
|
|
311
362
|
|
|
312
363
|
_onedal_cpu_supported = _onedal_supported
|
|
313
364
|
_onedal_gpu_supported = _onedal_supported
|
|
@@ -16,13 +16,18 @@
|
|
|
16
16
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
import pytest
|
|
19
|
+
from numpy.linalg import slogdet
|
|
19
20
|
from numpy.testing import assert_allclose
|
|
21
|
+
from scipy.linalg import pinvh
|
|
20
22
|
from sklearn.covariance.tests.test_covariance import (
|
|
21
23
|
test_covariance,
|
|
22
24
|
test_EmpiricalCovariance_validates_mahalanobis,
|
|
23
25
|
)
|
|
26
|
+
from sklearn.datasets import load_diabetes
|
|
27
|
+
from sklearn.decomposition import PCA
|
|
24
28
|
|
|
25
29
|
from onedal.tests.utils._dataframes_support import (
|
|
30
|
+
_as_numpy,
|
|
26
31
|
_convert_to_dataframe,
|
|
27
32
|
get_dataframes_and_queues,
|
|
28
33
|
)
|
|
@@ -163,6 +168,36 @@ def test_sklearnex_fit_on_random_data(
|
|
|
163
168
|
assert_allclose(expected_means, result.location_, atol=1e-6)
|
|
164
169
|
|
|
165
170
|
|
|
171
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
172
|
+
def test_whitened_toy_score(dataframe, queue):
|
|
173
|
+
from sklearnex.covariance import IncrementalEmpiricalCovariance
|
|
174
|
+
|
|
175
|
+
# Load a sklearn toy dataset with sufficient data
|
|
176
|
+
X, _ = load_diabetes(return_X_y=True)
|
|
177
|
+
n = X.shape[1]
|
|
178
|
+
|
|
179
|
+
# Transform the data into uncorrelated, unity variance components
|
|
180
|
+
X = PCA(whiten=True).fit_transform(X)
|
|
181
|
+
|
|
182
|
+
# change dataframe
|
|
183
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
184
|
+
|
|
185
|
+
# fit data
|
|
186
|
+
est = IncrementalEmpiricalCovariance()
|
|
187
|
+
est.fit(X_df)
|
|
188
|
+
# location_ attribute approximately zero (10,), covariance_ identity (10,10)
|
|
189
|
+
|
|
190
|
+
# The log-likelihood can be calculated simply due to covariance_
|
|
191
|
+
# use of scipy.linalg.pinvh, np.linalg.sloget and np.cov for estimator
|
|
192
|
+
# independence
|
|
193
|
+
expected_result = (
|
|
194
|
+
-(n - slogdet(pinvh(np.cov(X.T, bias=1)))[1] + n * np.log(2 * np.pi)) / 2
|
|
195
|
+
)
|
|
196
|
+
# expected_result = -14.1780602988
|
|
197
|
+
result = _as_numpy(est.score(X_df))
|
|
198
|
+
assert_allclose(expected_result, result, atol=1e-6)
|
|
199
|
+
|
|
200
|
+
|
|
166
201
|
# Monkeypatch IncrementalEmpiricalCovariance into relevant sklearn.covariance tests
|
|
167
202
|
@pytest.mark.allow_sklearn_fallback
|
|
168
203
|
@pytest.mark.parametrize(
|
sklearnex/decomposition/pca.py
CHANGED
|
@@ -32,6 +32,7 @@ if daal_check_version((2024, "P", 100)):
|
|
|
32
32
|
|
|
33
33
|
from .._device_offload import dispatch, wrap_output_data
|
|
34
34
|
from .._utils import PatchingConditionsChain
|
|
35
|
+
from ..utils import get_namespace
|
|
35
36
|
|
|
36
37
|
if sklearn_check_version("1.1") and not sklearn_check_version("1.2"):
|
|
37
38
|
from sklearn.utils import check_scalar
|
|
@@ -42,7 +43,6 @@ if daal_check_version((2024, "P", 100)):
|
|
|
42
43
|
from sklearn.decomposition import PCA as sklearn_PCA
|
|
43
44
|
|
|
44
45
|
from onedal.decomposition import PCA as onedal_PCA
|
|
45
|
-
from sklearnex.utils import get_namespace
|
|
46
46
|
|
|
47
47
|
@control_n_jobs(decorated_methods=["fit", "transform", "fit_transform"])
|
|
48
48
|
class PCA(sklearn_PCA):
|
|
@@ -210,6 +210,29 @@ if daal_check_version((2024, "P", 100)):
|
|
|
210
210
|
# Scikit-learn PCA["covariance_eigh"] was fit
|
|
211
211
|
return self._transform(X_fit, xp, x_is_centered=x_is_centered)
|
|
212
212
|
|
|
213
|
+
@wrap_output_data
|
|
214
|
+
def inverse_transform(self, X):
|
|
215
|
+
xp, _ = get_namespace(X)
|
|
216
|
+
|
|
217
|
+
mean = self.mean_
|
|
218
|
+
if self.whiten:
|
|
219
|
+
components = (
|
|
220
|
+
xp.sqrt(self.explained_variance_[:, np.newaxis]) * self.components_
|
|
221
|
+
)
|
|
222
|
+
else:
|
|
223
|
+
components = self.components_
|
|
224
|
+
|
|
225
|
+
if "numpy" not in xp.__name__:
|
|
226
|
+
# DPCtl and dpnp require inputs to be on the same device for
|
|
227
|
+
# matrix multiplication and division. The type and location
|
|
228
|
+
# of the components and mean are dependent on the sklearn
|
|
229
|
+
# version, this makes sure it is of the same type and on the
|
|
230
|
+
# same device as the data (compute follows data).
|
|
231
|
+
components = xp.asarray(components, device=X.device)
|
|
232
|
+
mean = xp.asarray(mean, device=X.device)
|
|
233
|
+
|
|
234
|
+
return X @ components + mean
|
|
235
|
+
|
|
213
236
|
def _onedal_supported(self, method_name, X):
|
|
214
237
|
class_name = self.__class__.__name__
|
|
215
238
|
patching_status = PatchingConditionsChain(
|
|
@@ -381,6 +404,7 @@ if daal_check_version((2024, "P", 100)):
|
|
|
381
404
|
fit.__doc__ = sklearn_PCA.fit.__doc__
|
|
382
405
|
transform.__doc__ = sklearn_PCA.transform.__doc__
|
|
383
406
|
fit_transform.__doc__ = sklearn_PCA.fit_transform.__doc__
|
|
407
|
+
inverse_transform.__doc__ = sklearn_PCA.inverse_transform.__doc__
|
|
384
408
|
|
|
385
409
|
else:
|
|
386
410
|
from daal4py.sklearn.decomposition import PCA
|
|
@@ -51,6 +51,8 @@ def test_sklearnex_import(dataframe, queue):
|
|
|
51
51
|
assert hasattr(pca, "_onedal_estimator")
|
|
52
52
|
else:
|
|
53
53
|
assert "daal4py" in pca.__module__
|
|
54
|
+
|
|
55
|
+
tol = 1e-5 if _as_numpy(X_transformed).dtype == np.float32 else 1e-7
|
|
54
56
|
assert_allclose([6.30061232, 0.54980396], _as_numpy(pca.singular_values_))
|
|
55
|
-
assert_allclose(X_transformed_expected, _as_numpy(X_transformed))
|
|
56
|
-
assert_allclose(X_transformed_expected, _as_numpy(X_fit_transformed))
|
|
57
|
+
assert_allclose(X_transformed_expected, _as_numpy(X_transformed), rtol=tol)
|
|
58
|
+
assert_allclose(X_transformed_expected, _as_numpy(X_fit_transformed), rtol=tol)
|