scikit-learn-intelex 2024.6.0__py39-none-manylinux1_x86_64.whl → 2024.7.0__py39-none-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- {scikit_learn_intelex-2024.6.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/METADATA +2 -2
- {scikit_learn_intelex-2024.6.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/RECORD +55 -41
- sklearnex/_config.py +3 -15
- sklearnex/_device_offload.py +9 -168
- sklearnex/basic_statistics/basic_statistics.py +127 -1
- sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
- sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +1 -1
- sklearnex/cluster/dbscan.py +0 -1
- sklearnex/cluster/k_means.py +8 -0
- sklearnex/cluster/tests/test_kmeans.py +15 -3
- sklearnex/covariance/incremental_covariance.py +64 -13
- sklearnex/covariance/tests/test_incremental_covariance.py +35 -0
- sklearnex/decomposition/pca.py +25 -1
- sklearnex/dispatcher.py +94 -0
- sklearnex/ensemble/_forest.py +8 -35
- sklearnex/ensemble/tests/test_forest.py +9 -12
- sklearnex/linear_model/coordinate_descent.py +13 -0
- sklearnex/linear_model/linear.py +2 -34
- sklearnex/linear_model/logistic_regression.py +79 -59
- sklearnex/linear_model/ridge.py +7 -0
- sklearnex/linear_model/tests/test_linear.py +28 -3
- sklearnex/linear_model/tests/test_logreg.py +45 -3
- sklearnex/manifold/t_sne.py +4 -0
- sklearnex/metrics/pairwise.py +5 -0
- sklearnex/metrics/ranking.py +3 -0
- sklearnex/model_selection/split.py +3 -0
- sklearnex/neighbors/_lof.py +9 -0
- sklearnex/neighbors/common.py +45 -1
- sklearnex/neighbors/knn_classification.py +1 -20
- sklearnex/neighbors/knn_regression.py +1 -20
- sklearnex/neighbors/knn_unsupervised.py +31 -7
- sklearnex/preview/__init__.py +1 -1
- sklearnex/preview/linear_model/__init__.py +19 -0
- sklearnex/preview/linear_model/ridge.py +419 -0
- sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
- sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
- sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
- sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
- sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
- sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
- sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
- sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
- sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +163 -0
- sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
- sklearnex/svm/_common.py +19 -21
- sklearnex/svm/tests/test_svm.py +12 -20
- sklearnex/tests/_utils.py +143 -20
- sklearnex/tests/_utils_spmd.py +185 -0
- sklearnex/tests/test_config.py +4 -0
- sklearnex/tests/test_monkeypatch.py +12 -4
- sklearnex/tests/test_patching.py +16 -13
- sklearnex/tests/test_run_to_run_stability.py +21 -9
- {scikit_learn_intelex-2024.6.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2024.6.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/WHEEL +0 -0
- {scikit_learn_intelex-2024.6.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
# ==============================================================================
|
|
2
|
+
# Copyright 2023 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ==============================================================================
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pytest
|
|
19
|
+
from numpy.testing import assert_allclose
|
|
20
|
+
|
|
21
|
+
from onedal.basic_statistics.tests.test_basic_statistics import (
|
|
22
|
+
expected_max,
|
|
23
|
+
expected_mean,
|
|
24
|
+
expected_sum,
|
|
25
|
+
options_and_tests,
|
|
26
|
+
)
|
|
27
|
+
from onedal.tests.utils._dataframes_support import (
|
|
28
|
+
_convert_to_dataframe,
|
|
29
|
+
get_dataframes_and_queues,
|
|
30
|
+
)
|
|
31
|
+
from sklearnex.basic_statistics import BasicStatistics
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
35
|
+
def test_sklearnex_import_basic_statistics(dataframe, queue):
|
|
36
|
+
X = np.array([[0, 0], [1, 1]])
|
|
37
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
38
|
+
|
|
39
|
+
weights = np.array([1, 0.5])
|
|
40
|
+
weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
|
|
41
|
+
|
|
42
|
+
result = BasicStatistics().fit(X_df)
|
|
43
|
+
|
|
44
|
+
expected_mean = np.array([0.5, 0.5])
|
|
45
|
+
expected_min = np.array([0, 0])
|
|
46
|
+
expected_max = np.array([1, 1])
|
|
47
|
+
|
|
48
|
+
assert_allclose(expected_mean, result.mean)
|
|
49
|
+
assert_allclose(expected_max, result.max)
|
|
50
|
+
assert_allclose(expected_min, result.min)
|
|
51
|
+
|
|
52
|
+
result = BasicStatistics().fit(X_df, sample_weight=weights_df)
|
|
53
|
+
|
|
54
|
+
expected_weighted_mean = np.array([0.25, 0.25])
|
|
55
|
+
expected_weighted_min = np.array([0, 0])
|
|
56
|
+
expected_weighted_max = np.array([0.5, 0.5])
|
|
57
|
+
|
|
58
|
+
assert_allclose(expected_weighted_mean, result.mean)
|
|
59
|
+
assert_allclose(expected_weighted_min, result.min)
|
|
60
|
+
assert_allclose(expected_weighted_max, result.max)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
64
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
65
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
66
|
+
def test_multiple_options_on_gold_data(dataframe, queue, weighted, dtype):
|
|
67
|
+
X = np.array([[0, 0], [1, 1]])
|
|
68
|
+
X = X.astype(dtype=dtype)
|
|
69
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
70
|
+
if weighted:
|
|
71
|
+
weights = np.array([1, 0.5])
|
|
72
|
+
weights = weights.astype(dtype=dtype)
|
|
73
|
+
weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
|
|
74
|
+
basicstat = BasicStatistics()
|
|
75
|
+
|
|
76
|
+
if weighted:
|
|
77
|
+
result = basicstat.fit(X_df, sample_weight=weights_df)
|
|
78
|
+
else:
|
|
79
|
+
result = basicstat.fit(X_df)
|
|
80
|
+
|
|
81
|
+
if weighted:
|
|
82
|
+
expected_weighted_mean = np.array([0.25, 0.25])
|
|
83
|
+
expected_weighted_min = np.array([0, 0])
|
|
84
|
+
expected_weighted_max = np.array([0.5, 0.5])
|
|
85
|
+
assert_allclose(expected_weighted_mean, result.mean)
|
|
86
|
+
assert_allclose(expected_weighted_max, result.max)
|
|
87
|
+
assert_allclose(expected_weighted_min, result.min)
|
|
88
|
+
else:
|
|
89
|
+
expected_mean = np.array([0.5, 0.5])
|
|
90
|
+
expected_min = np.array([0, 0])
|
|
91
|
+
expected_max = np.array([1, 1])
|
|
92
|
+
assert_allclose(expected_mean, result.mean)
|
|
93
|
+
assert_allclose(expected_max, result.max)
|
|
94
|
+
assert_allclose(expected_min, result.min)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
98
|
+
@pytest.mark.parametrize("option", options_and_tests)
|
|
99
|
+
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
100
|
+
@pytest.mark.parametrize("column_count", [10, 100])
|
|
101
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
102
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
103
|
+
def test_single_option_on_random_data(
|
|
104
|
+
dataframe, queue, option, row_count, column_count, weighted, dtype
|
|
105
|
+
):
|
|
106
|
+
result_option, function, tols = option
|
|
107
|
+
fp32tol, fp64tol = tols
|
|
108
|
+
seed = 77
|
|
109
|
+
gen = np.random.default_rng(seed)
|
|
110
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
|
|
111
|
+
X = X.astype(dtype=dtype)
|
|
112
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
113
|
+
if weighted:
|
|
114
|
+
weights = gen.uniform(low=-0.5, high=1.0, size=row_count)
|
|
115
|
+
weights = weights.astype(dtype=dtype)
|
|
116
|
+
weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
|
|
117
|
+
basicstat = BasicStatistics(result_options=result_option)
|
|
118
|
+
|
|
119
|
+
if weighted:
|
|
120
|
+
result = basicstat.fit(X_df, sample_weight=weights_df)
|
|
121
|
+
else:
|
|
122
|
+
result = basicstat.fit(X_df)
|
|
123
|
+
|
|
124
|
+
res = getattr(result, result_option)
|
|
125
|
+
if weighted:
|
|
126
|
+
weighted_data = np.diag(weights) @ X
|
|
127
|
+
gtr = function(weighted_data)
|
|
128
|
+
else:
|
|
129
|
+
gtr = function(X)
|
|
130
|
+
|
|
131
|
+
tol = fp32tol if res.dtype == np.float32 else fp64tol
|
|
132
|
+
assert_allclose(gtr, res, atol=tol)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
136
|
+
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
137
|
+
@pytest.mark.parametrize("column_count", [10, 100])
|
|
138
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
139
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
140
|
+
def test_multiple_options_on_random_data(
|
|
141
|
+
dataframe, queue, row_count, column_count, weighted, dtype
|
|
142
|
+
):
|
|
143
|
+
seed = 77
|
|
144
|
+
gen = np.random.default_rng(seed)
|
|
145
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
|
|
146
|
+
X = X.astype(dtype=dtype)
|
|
147
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
148
|
+
if weighted:
|
|
149
|
+
weights = gen.uniform(low=-0.5, high=1.0, size=row_count)
|
|
150
|
+
weights = weights.astype(dtype=dtype)
|
|
151
|
+
weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
|
|
152
|
+
basicstat = BasicStatistics(result_options=["mean", "max", "sum"])
|
|
153
|
+
|
|
154
|
+
if weighted:
|
|
155
|
+
result = basicstat.fit(X_df, sample_weight=weights_df)
|
|
156
|
+
else:
|
|
157
|
+
result = basicstat.fit(X_df)
|
|
158
|
+
|
|
159
|
+
res_mean, res_max, res_sum = result.mean, result.max, result.sum
|
|
160
|
+
if weighted:
|
|
161
|
+
weighted_data = np.diag(weights) @ X
|
|
162
|
+
gtr_mean, gtr_max, gtr_sum = (
|
|
163
|
+
expected_mean(weighted_data),
|
|
164
|
+
expected_max(weighted_data),
|
|
165
|
+
expected_sum(weighted_data),
|
|
166
|
+
)
|
|
167
|
+
else:
|
|
168
|
+
gtr_mean, gtr_max, gtr_sum = (
|
|
169
|
+
expected_mean(X),
|
|
170
|
+
expected_max(X),
|
|
171
|
+
expected_sum(X),
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
tol = 5e-4 if res_mean.dtype == np.float32 else 1e-7
|
|
175
|
+
assert_allclose(gtr_mean, res_mean, atol=tol)
|
|
176
|
+
assert_allclose(gtr_max, res_max, atol=tol)
|
|
177
|
+
assert_allclose(gtr_sum, res_sum, atol=tol)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
181
|
+
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
182
|
+
@pytest.mark.parametrize("column_count", [10, 100])
|
|
183
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
184
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
185
|
+
def test_all_option_on_random_data(
|
|
186
|
+
dataframe, queue, row_count, column_count, weighted, dtype
|
|
187
|
+
):
|
|
188
|
+
seed = 77
|
|
189
|
+
gen = np.random.default_rng(seed)
|
|
190
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
|
|
191
|
+
X = X.astype(dtype=dtype)
|
|
192
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
193
|
+
if weighted:
|
|
194
|
+
weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
|
|
195
|
+
weights = weights.astype(dtype=dtype)
|
|
196
|
+
weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
|
|
197
|
+
basicstat = BasicStatistics(result_options="all")
|
|
198
|
+
|
|
199
|
+
if weighted:
|
|
200
|
+
result = basicstat.fit(X_df, sample_weight=weights_df)
|
|
201
|
+
else:
|
|
202
|
+
result = basicstat.fit(X_df)
|
|
203
|
+
|
|
204
|
+
if weighted:
|
|
205
|
+
weighted_data = np.diag(weights) @ X
|
|
206
|
+
|
|
207
|
+
for option in options_and_tests:
|
|
208
|
+
result_option, function, tols = option
|
|
209
|
+
fp32tol, fp64tol = tols
|
|
210
|
+
res = getattr(result, result_option)
|
|
211
|
+
if weighted:
|
|
212
|
+
gtr = function(weighted_data)
|
|
213
|
+
else:
|
|
214
|
+
gtr = function(X)
|
|
215
|
+
tol = fp32tol if res.dtype == np.float32 else fp64tol
|
|
216
|
+
assert_allclose(gtr, res, atol=tol)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
220
|
+
@pytest.mark.parametrize("option", options_and_tests)
|
|
221
|
+
@pytest.mark.parametrize("data_size", [100, 1000])
|
|
222
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
223
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
224
|
+
def test_1d_input_on_random_data(dataframe, queue, option, data_size, weighted, dtype):
|
|
225
|
+
result_option, function, tols = option
|
|
226
|
+
fp32tol, fp64tol = tols
|
|
227
|
+
seed = 77
|
|
228
|
+
gen = np.random.default_rng(seed)
|
|
229
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=data_size)
|
|
230
|
+
X = X.astype(dtype=dtype)
|
|
231
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
232
|
+
if weighted:
|
|
233
|
+
weights = gen.uniform(low=-0.5, high=1.0, size=data_size)
|
|
234
|
+
weights = weights.astype(dtype=dtype)
|
|
235
|
+
weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
|
|
236
|
+
basicstat = BasicStatistics(result_options=result_option)
|
|
237
|
+
|
|
238
|
+
if weighted:
|
|
239
|
+
result = basicstat.fit(X_df, sample_weight=weights_df)
|
|
240
|
+
else:
|
|
241
|
+
result = basicstat.fit(X_df)
|
|
242
|
+
|
|
243
|
+
res = getattr(result, result_option)
|
|
244
|
+
if weighted:
|
|
245
|
+
weighted_data = weights * X
|
|
246
|
+
gtr = function(weighted_data)
|
|
247
|
+
else:
|
|
248
|
+
gtr = function(X)
|
|
249
|
+
|
|
250
|
+
tol = fp32tol if res.dtype == np.float32 else fp64tol
|
|
251
|
+
assert_allclose(gtr, res, atol=tol)
|
|
@@ -18,7 +18,7 @@ import numpy as np
|
|
|
18
18
|
import pytest
|
|
19
19
|
from numpy.testing import assert_allclose
|
|
20
20
|
|
|
21
|
-
from onedal.basic_statistics.tests.
|
|
21
|
+
from onedal.basic_statistics.tests.test_basic_statistics import (
|
|
22
22
|
expected_max,
|
|
23
23
|
expected_mean,
|
|
24
24
|
expected_sum,
|
sklearnex/cluster/dbscan.py
CHANGED
sklearnex/cluster/k_means.py
CHANGED
|
@@ -15,3 +15,11 @@
|
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
17
|
from daal4py.sklearn.cluster import KMeans
|
|
18
|
+
from onedal._device_offload import support_usm_ndarray
|
|
19
|
+
|
|
20
|
+
# Note: `sklearnex.cluster.KMeans` only has functional
|
|
21
|
+
# sycl GPU support. No GPU device will be offloaded.
|
|
22
|
+
KMeans.fit = support_usm_ndarray(queue_param=False)(KMeans.fit)
|
|
23
|
+
KMeans.fit_predict = support_usm_ndarray(queue_param=False)(KMeans.fit_predict)
|
|
24
|
+
KMeans.predict = support_usm_ndarray(queue_param=False)(KMeans.predict)
|
|
25
|
+
KMeans.score = support_usm_ndarray(queue_param=False)(KMeans.score)
|
|
@@ -15,16 +15,28 @@
|
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
17
|
import numpy as np
|
|
18
|
+
import pytest
|
|
18
19
|
from numpy.testing import assert_allclose
|
|
19
20
|
|
|
21
|
+
from onedal.tests.utils._dataframes_support import (
|
|
22
|
+
_as_numpy,
|
|
23
|
+
_convert_to_dataframe,
|
|
24
|
+
get_dataframes_and_queues,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
29
|
+
def test_sklearnex_import(dataframe, queue):
|
|
20
30
|
|
|
21
|
-
def test_sklearnex_import():
|
|
22
31
|
from sklearnex.cluster import KMeans
|
|
23
32
|
|
|
24
33
|
X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
|
|
34
|
+
X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
25
35
|
kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
|
|
26
36
|
assert "daal4py" in kmeans.__module__
|
|
27
37
|
|
|
28
|
-
|
|
38
|
+
X_test = [[0, 0], [12, 3]]
|
|
39
|
+
X_test = _convert_to_dataframe(X_test, sycl_queue=queue, target_df=dataframe)
|
|
40
|
+
result = kmeans.predict(X_test)
|
|
29
41
|
expected = np.array([1, 0], dtype=np.int32)
|
|
30
|
-
assert_allclose(expected, result)
|
|
42
|
+
assert_allclose(expected, _as_numpy(result))
|
|
@@ -19,13 +19,14 @@ import warnings
|
|
|
19
19
|
|
|
20
20
|
import numpy as np
|
|
21
21
|
from scipy import linalg
|
|
22
|
-
from sklearn.base import BaseEstimator
|
|
22
|
+
from sklearn.base import BaseEstimator, clone
|
|
23
23
|
from sklearn.covariance import EmpiricalCovariance as sklearn_EmpiricalCovariance
|
|
24
|
+
from sklearn.covariance import log_likelihood
|
|
24
25
|
from sklearn.utils import check_array, gen_batches
|
|
26
|
+
from sklearn.utils.validation import _num_features
|
|
25
27
|
|
|
26
28
|
from daal4py.sklearn._n_jobs_support import control_n_jobs
|
|
27
29
|
from daal4py.sklearn._utils import daal_check_version, sklearn_check_version
|
|
28
|
-
from onedal._device_offload import support_usm_ndarray
|
|
29
30
|
from onedal.covariance import (
|
|
30
31
|
IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance,
|
|
31
32
|
)
|
|
@@ -34,6 +35,7 @@ from sklearnex import config_context
|
|
|
34
35
|
from .._device_offload import dispatch, wrap_output_data
|
|
35
36
|
from .._utils import PatchingConditionsChain, register_hyperparameters
|
|
36
37
|
from ..metrics import pairwise_distances
|
|
38
|
+
from ..utils import get_namespace
|
|
37
39
|
|
|
38
40
|
if sklearn_check_version("1.2"):
|
|
39
41
|
from sklearn.utils._param_validation import Interval
|
|
@@ -98,7 +100,6 @@ class IncrementalEmpiricalCovariance(BaseEstimator):
|
|
|
98
100
|
|
|
99
101
|
get_precision = sklearn_EmpiricalCovariance.get_precision
|
|
100
102
|
error_norm = wrap_output_data(sklearn_EmpiricalCovariance.error_norm)
|
|
101
|
-
score = wrap_output_data(sklearn_EmpiricalCovariance.score)
|
|
102
103
|
|
|
103
104
|
def __init__(
|
|
104
105
|
self, *, store_precision=False, assume_centered=False, batch_size=None, copy=True
|
|
@@ -197,6 +198,43 @@ class IncrementalEmpiricalCovariance(BaseEstimator):
|
|
|
197
198
|
|
|
198
199
|
return self
|
|
199
200
|
|
|
201
|
+
@wrap_output_data
|
|
202
|
+
def score(self, X_test, y=None):
|
|
203
|
+
xp, _ = get_namespace(X_test)
|
|
204
|
+
|
|
205
|
+
location = self.location_
|
|
206
|
+
if sklearn_check_version("1.0"):
|
|
207
|
+
X = self._validate_data(
|
|
208
|
+
X_test,
|
|
209
|
+
dtype=[np.float64, np.float32],
|
|
210
|
+
reset=False,
|
|
211
|
+
)
|
|
212
|
+
else:
|
|
213
|
+
X = check_array(
|
|
214
|
+
X_test,
|
|
215
|
+
dtype=[np.float64, np.float32],
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if "numpy" not in xp.__name__:
|
|
219
|
+
location = xp.asarray(location, device=X_test.device)
|
|
220
|
+
# depending on the sklearn version, check_array
|
|
221
|
+
# and validate_data will return only numpy arrays
|
|
222
|
+
# which will break dpnp/dpctl support. If the
|
|
223
|
+
# array namespace isn't from numpy and the data
|
|
224
|
+
# is now a numpy array, it has been validated and
|
|
225
|
+
# the original can be used.
|
|
226
|
+
if isinstance(X, np.ndarray):
|
|
227
|
+
X = X_test
|
|
228
|
+
|
|
229
|
+
est = clone(self)
|
|
230
|
+
est.set_params(**{"assume_centered": True})
|
|
231
|
+
|
|
232
|
+
# test_cov is a numpy array, but calculated on device
|
|
233
|
+
test_cov = est.fit(X - location).covariance_
|
|
234
|
+
res = log_likelihood(test_cov, self.get_precision())
|
|
235
|
+
|
|
236
|
+
return res
|
|
237
|
+
|
|
200
238
|
def partial_fit(self, X, y=None, check_input=True):
|
|
201
239
|
"""
|
|
202
240
|
Incremental fit with X. All of X is processed as a single batch.
|
|
@@ -293,21 +331,34 @@ class IncrementalEmpiricalCovariance(BaseEstimator):
|
|
|
293
331
|
return self
|
|
294
332
|
|
|
295
333
|
# expose sklearnex pairwise_distances if mahalanobis distance eventually supported
|
|
296
|
-
@wrap_output_data
|
|
297
334
|
def mahalanobis(self, X):
|
|
298
335
|
if sklearn_check_version("1.0"):
|
|
299
|
-
self.
|
|
300
|
-
else:
|
|
301
|
-
check_array(X, copy=self.copy)
|
|
336
|
+
self._check_feature_names(X, reset=False)
|
|
302
337
|
|
|
338
|
+
xp, _ = get_namespace(X)
|
|
303
339
|
precision = self.get_precision()
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
340
|
+
# compute mahalanobis distances
|
|
341
|
+
# pairwise_distances will check n_features (via n_feature matching with
|
|
342
|
+
# self.location_) , and will check for finiteness via check array
|
|
343
|
+
# check_feature_names will match _validate_data functionally
|
|
344
|
+
location = self.location_[np.newaxis, :]
|
|
345
|
+
if "numpy" not in xp.__name__:
|
|
346
|
+
# Guarantee that inputs to pairwise_distances match in type and location
|
|
347
|
+
location = xp.asarray(location, device=X.device)
|
|
348
|
+
|
|
349
|
+
try:
|
|
350
|
+
dist = pairwise_distances(X, location, metric="mahalanobis", VI=precision)
|
|
351
|
+
except ValueError as e:
|
|
352
|
+
# Throw the expected sklearn error in an n_feature length violation
|
|
353
|
+
if "Incompatible dimension for X and Y matrices: X.shape[1] ==" in str(e):
|
|
354
|
+
raise ValueError(
|
|
355
|
+
f"X has {_num_features(X)} features, but {self.__class__.__name__} "
|
|
356
|
+
f"is expecting {self.n_features_in_} features as input."
|
|
357
|
+
)
|
|
358
|
+
else:
|
|
359
|
+
raise e
|
|
309
360
|
|
|
310
|
-
return
|
|
361
|
+
return (xp.reshape(dist, (-1,))) ** 2
|
|
311
362
|
|
|
312
363
|
_onedal_cpu_supported = _onedal_supported
|
|
313
364
|
_onedal_gpu_supported = _onedal_supported
|
|
@@ -16,13 +16,18 @@
|
|
|
16
16
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
import pytest
|
|
19
|
+
from numpy.linalg import slogdet
|
|
19
20
|
from numpy.testing import assert_allclose
|
|
21
|
+
from scipy.linalg import pinvh
|
|
20
22
|
from sklearn.covariance.tests.test_covariance import (
|
|
21
23
|
test_covariance,
|
|
22
24
|
test_EmpiricalCovariance_validates_mahalanobis,
|
|
23
25
|
)
|
|
26
|
+
from sklearn.datasets import load_diabetes
|
|
27
|
+
from sklearn.decomposition import PCA
|
|
24
28
|
|
|
25
29
|
from onedal.tests.utils._dataframes_support import (
|
|
30
|
+
_as_numpy,
|
|
26
31
|
_convert_to_dataframe,
|
|
27
32
|
get_dataframes_and_queues,
|
|
28
33
|
)
|
|
@@ -163,6 +168,36 @@ def test_sklearnex_fit_on_random_data(
|
|
|
163
168
|
assert_allclose(expected_means, result.location_, atol=1e-6)
|
|
164
169
|
|
|
165
170
|
|
|
171
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
172
|
+
def test_whitened_toy_score(dataframe, queue):
|
|
173
|
+
from sklearnex.covariance import IncrementalEmpiricalCovariance
|
|
174
|
+
|
|
175
|
+
# Load a sklearn toy dataset with sufficient data
|
|
176
|
+
X, _ = load_diabetes(return_X_y=True)
|
|
177
|
+
n = X.shape[1]
|
|
178
|
+
|
|
179
|
+
# Transform the data into uncorrelated, unity variance components
|
|
180
|
+
X = PCA(whiten=True).fit_transform(X)
|
|
181
|
+
|
|
182
|
+
# change dataframe
|
|
183
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
184
|
+
|
|
185
|
+
# fit data
|
|
186
|
+
est = IncrementalEmpiricalCovariance()
|
|
187
|
+
est.fit(X_df)
|
|
188
|
+
# location_ attribute approximately zero (10,), covariance_ identity (10,10)
|
|
189
|
+
|
|
190
|
+
# The log-likelihood can be calculated simply due to covariance_
|
|
191
|
+
# use of scipy.linalg.pinvh, np.linalg.sloget and np.cov for estimator
|
|
192
|
+
# independence
|
|
193
|
+
expected_result = (
|
|
194
|
+
-(n - slogdet(pinvh(np.cov(X.T, bias=1)))[1] + n * np.log(2 * np.pi)) / 2
|
|
195
|
+
)
|
|
196
|
+
# expected_result = -14.1780602988
|
|
197
|
+
result = _as_numpy(est.score(X_df))
|
|
198
|
+
assert_allclose(expected_result, result, atol=1e-6)
|
|
199
|
+
|
|
200
|
+
|
|
166
201
|
# Monkeypatch IncrementalEmpiricalCovariance into relevant sklearn.covariance tests
|
|
167
202
|
@pytest.mark.allow_sklearn_fallback
|
|
168
203
|
@pytest.mark.parametrize(
|
sklearnex/decomposition/pca.py
CHANGED
|
@@ -32,6 +32,7 @@ if daal_check_version((2024, "P", 100)):
|
|
|
32
32
|
|
|
33
33
|
from .._device_offload import dispatch, wrap_output_data
|
|
34
34
|
from .._utils import PatchingConditionsChain
|
|
35
|
+
from ..utils import get_namespace
|
|
35
36
|
|
|
36
37
|
if sklearn_check_version("1.1") and not sklearn_check_version("1.2"):
|
|
37
38
|
from sklearn.utils import check_scalar
|
|
@@ -42,7 +43,6 @@ if daal_check_version((2024, "P", 100)):
|
|
|
42
43
|
from sklearn.decomposition import PCA as sklearn_PCA
|
|
43
44
|
|
|
44
45
|
from onedal.decomposition import PCA as onedal_PCA
|
|
45
|
-
from sklearnex.utils import get_namespace
|
|
46
46
|
|
|
47
47
|
@control_n_jobs(decorated_methods=["fit", "transform", "fit_transform"])
|
|
48
48
|
class PCA(sklearn_PCA):
|
|
@@ -210,6 +210,29 @@ if daal_check_version((2024, "P", 100)):
|
|
|
210
210
|
# Scikit-learn PCA["covariance_eigh"] was fit
|
|
211
211
|
return self._transform(X_fit, xp, x_is_centered=x_is_centered)
|
|
212
212
|
|
|
213
|
+
@wrap_output_data
|
|
214
|
+
def inverse_transform(self, X):
|
|
215
|
+
xp, _ = get_namespace(X)
|
|
216
|
+
|
|
217
|
+
mean = self.mean_
|
|
218
|
+
if self.whiten:
|
|
219
|
+
components = (
|
|
220
|
+
xp.sqrt(self.explained_variance_[:, np.newaxis]) * self.components_
|
|
221
|
+
)
|
|
222
|
+
else:
|
|
223
|
+
components = self.components_
|
|
224
|
+
|
|
225
|
+
if "numpy" not in xp.__name__:
|
|
226
|
+
# DPCtl and dpnp require inputs to be on the same device for
|
|
227
|
+
# matrix multiplication and division. The type and location
|
|
228
|
+
# of the components and mean are dependent on the sklearn
|
|
229
|
+
# version, this makes sure it is of the same type and on the
|
|
230
|
+
# same device as the data (compute follows data).
|
|
231
|
+
components = xp.asarray(components, device=X.device)
|
|
232
|
+
mean = xp.asarray(mean, device=X.device)
|
|
233
|
+
|
|
234
|
+
return X @ components + mean
|
|
235
|
+
|
|
213
236
|
def _onedal_supported(self, method_name, X):
|
|
214
237
|
class_name = self.__class__.__name__
|
|
215
238
|
patching_status = PatchingConditionsChain(
|
|
@@ -381,6 +404,7 @@ if daal_check_version((2024, "P", 100)):
|
|
|
381
404
|
fit.__doc__ = sklearn_PCA.fit.__doc__
|
|
382
405
|
transform.__doc__ = sklearn_PCA.transform.__doc__
|
|
383
406
|
fit_transform.__doc__ = sklearn_PCA.fit_transform.__doc__
|
|
407
|
+
inverse_transform.__doc__ = sklearn_PCA.inverse_transform.__doc__
|
|
384
408
|
|
|
385
409
|
else:
|
|
386
410
|
from daal4py.sklearn.decomposition import PCA
|
sklearnex/dispatcher.py
CHANGED
|
@@ -53,6 +53,7 @@ def get_patch_map_core(preview=False):
|
|
|
53
53
|
EmpiricalCovariance as EmpiricalCovariance_sklearnex,
|
|
54
54
|
)
|
|
55
55
|
from .preview.decomposition import IncrementalPCA as IncrementalPCA_sklearnex
|
|
56
|
+
from .preview.linear_model import Ridge as Ridge_sklearnex
|
|
56
57
|
|
|
57
58
|
# Since the state of the lru_cache without preview cannot be
|
|
58
59
|
# guaranteed to not have already enabled sklearnex algorithms
|
|
@@ -90,6 +91,15 @@ def get_patch_map_core(preview=False):
|
|
|
90
91
|
None,
|
|
91
92
|
]
|
|
92
93
|
]
|
|
94
|
+
|
|
95
|
+
# Ridge
|
|
96
|
+
linear_model_module, _, _ = mapping["ridge"][0][0]
|
|
97
|
+
sklearn_obj = mapping["ridge"][0][1]
|
|
98
|
+
mapping.pop("ridge")
|
|
99
|
+
mapping["ridge"] = [
|
|
100
|
+
[(linear_model_module, "Ridge", Ridge_sklearnex), sklearn_obj]
|
|
101
|
+
]
|
|
102
|
+
|
|
93
103
|
return mapping
|
|
94
104
|
|
|
95
105
|
from daal4py.sklearn.monkeypatch.dispatcher import _get_map_of_algorithms
|
|
@@ -111,6 +121,9 @@ def get_patch_map_core(preview=False):
|
|
|
111
121
|
import sklearn.decomposition as decomposition_module
|
|
112
122
|
import sklearn.ensemble as ensemble_module
|
|
113
123
|
import sklearn.linear_model as linear_model_module
|
|
124
|
+
import sklearn.manifold as manifold_module
|
|
125
|
+
import sklearn.metrics as metrics_module
|
|
126
|
+
import sklearn.model_selection as model_selection_module
|
|
114
127
|
import sklearn.neighbors as neighbors_module
|
|
115
128
|
import sklearn.svm as svm_module
|
|
116
129
|
|
|
@@ -138,11 +151,18 @@ def get_patch_map_core(preview=False):
|
|
|
138
151
|
from .ensemble import ExtraTreesRegressor as ExtraTreesRegressor_sklearnex
|
|
139
152
|
from .ensemble import RandomForestClassifier as RandomForestClassifier_sklearnex
|
|
140
153
|
from .ensemble import RandomForestRegressor as RandomForestRegressor_sklearnex
|
|
154
|
+
from .linear_model import ElasticNet as ElasticNet_sklearnex
|
|
141
155
|
from .linear_model import (
|
|
142
156
|
IncrementalLinearRegression as IncrementalLinearRegression_sklearnex,
|
|
143
157
|
)
|
|
158
|
+
from .linear_model import Lasso as Lasso_sklearnex
|
|
144
159
|
from .linear_model import LinearRegression as LinearRegression_sklearnex
|
|
145
160
|
from .linear_model import LogisticRegression as LogisticRegression_sklearnex
|
|
161
|
+
from .linear_model import Ridge as Ridge_sklearnex
|
|
162
|
+
from .manifold import TSNE as TSNE_sklearnex
|
|
163
|
+
from .metrics import pairwise_distances as pairwise_distances_sklearnex
|
|
164
|
+
from .metrics import roc_auc_score as roc_auc_score_sklearnex
|
|
165
|
+
from .model_selection import train_test_split as train_test_split_sklearnex
|
|
146
166
|
from .neighbors import KNeighborsClassifier as KNeighborsClassifier_sklearnex
|
|
147
167
|
from .neighbors import KNeighborsRegressor as KNeighborsRegressor_sklearnex
|
|
148
168
|
from .neighbors import LocalOutlierFactor as LocalOutlierFactor_sklearnex
|
|
@@ -168,6 +188,32 @@ def get_patch_map_core(preview=False):
|
|
|
168
188
|
mapping["nusvr"] = [[(svm_module, "NuSVR", NuSVR_sklearnex), None]]
|
|
169
189
|
mapping["nusvc"] = [[(svm_module, "NuSVC", NuSVC_sklearnex), None]]
|
|
170
190
|
|
|
191
|
+
# ElasticNet
|
|
192
|
+
mapping.pop("elasticnet")
|
|
193
|
+
mapping["elasticnet"] = [
|
|
194
|
+
[
|
|
195
|
+
(
|
|
196
|
+
linear_model_module,
|
|
197
|
+
"ElasticNet",
|
|
198
|
+
ElasticNet_sklearnex,
|
|
199
|
+
),
|
|
200
|
+
None,
|
|
201
|
+
]
|
|
202
|
+
]
|
|
203
|
+
|
|
204
|
+
# Lasso
|
|
205
|
+
mapping.pop("lasso")
|
|
206
|
+
mapping["lasso"] = [
|
|
207
|
+
[
|
|
208
|
+
(
|
|
209
|
+
linear_model_module,
|
|
210
|
+
"Lasso",
|
|
211
|
+
Lasso_sklearnex,
|
|
212
|
+
),
|
|
213
|
+
None,
|
|
214
|
+
]
|
|
215
|
+
]
|
|
216
|
+
|
|
171
217
|
# Linear Regression
|
|
172
218
|
mapping.pop("linear")
|
|
173
219
|
mapping.pop("linearregression")
|
|
@@ -201,6 +247,54 @@ def get_patch_map_core(preview=False):
|
|
|
201
247
|
]
|
|
202
248
|
mapping["logisticregression"] = mapping["log_reg"]
|
|
203
249
|
|
|
250
|
+
# Ridge
|
|
251
|
+
mapping.pop("ridge")
|
|
252
|
+
mapping["ridge"] = [
|
|
253
|
+
[
|
|
254
|
+
(
|
|
255
|
+
linear_model_module,
|
|
256
|
+
"Ridge",
|
|
257
|
+
Ridge_sklearnex,
|
|
258
|
+
),
|
|
259
|
+
None,
|
|
260
|
+
]
|
|
261
|
+
]
|
|
262
|
+
|
|
263
|
+
# manifold
|
|
264
|
+
mapping.pop("tsne")
|
|
265
|
+
mapping["tsne"] = [
|
|
266
|
+
[
|
|
267
|
+
(manifold_module, "TSNE", TSNE_sklearnex),
|
|
268
|
+
None,
|
|
269
|
+
]
|
|
270
|
+
]
|
|
271
|
+
|
|
272
|
+
# metrics
|
|
273
|
+
mapping.pop("distances")
|
|
274
|
+
mapping.pop("roc_auc_score")
|
|
275
|
+
mapping["distances"] = [
|
|
276
|
+
[
|
|
277
|
+
(metrics_module, "pairwise_distances", pairwise_distances_sklearnex),
|
|
278
|
+
None,
|
|
279
|
+
]
|
|
280
|
+
]
|
|
281
|
+
mapping["pairwise_distances"] = mapping["distances"]
|
|
282
|
+
mapping["roc_auc_score"] = [
|
|
283
|
+
[
|
|
284
|
+
(metrics_module, "roc_auc_score", roc_auc_score_sklearnex),
|
|
285
|
+
None,
|
|
286
|
+
]
|
|
287
|
+
]
|
|
288
|
+
|
|
289
|
+
# model_selection
|
|
290
|
+
mapping.pop("train_test_split")
|
|
291
|
+
mapping["train_test_split"] = [
|
|
292
|
+
[
|
|
293
|
+
(model_selection_module, "train_test_split", train_test_split_sklearnex),
|
|
294
|
+
None,
|
|
295
|
+
]
|
|
296
|
+
]
|
|
297
|
+
|
|
204
298
|
# kNN
|
|
205
299
|
mapping.pop("knn_classifier")
|
|
206
300
|
mapping.pop("kneighborsclassifier")
|