scikit-learn-intelex 2024.3.0__py312-none-manylinux1_x86_64.whl → 2024.4.0__py312-none-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/METADATA +2 -2
- {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/RECORD +33 -30
- sklearnex/_device_offload.py +31 -4
- sklearnex/basic_statistics/__init__.py +2 -1
- sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
- sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +386 -0
- sklearnex/decomposition/pca.py +3 -6
- sklearnex/dispatcher.py +2 -2
- sklearnex/ensemble/_forest.py +68 -75
- sklearnex/linear_model/linear.py +275 -340
- sklearnex/linear_model/logistic_regression.py +50 -9
- sklearnex/linear_model/tests/test_linear.py +40 -5
- sklearnex/neighbors/_lof.py +53 -36
- sklearnex/neighbors/common.py +4 -1
- sklearnex/neighbors/knn_classification.py +37 -122
- sklearnex/neighbors/knn_regression.py +10 -117
- sklearnex/neighbors/knn_unsupervised.py +6 -78
- sklearnex/preview/cluster/k_means.py +5 -73
- sklearnex/preview/covariance/covariance.py +6 -5
- sklearnex/preview/covariance/tests/test_covariance.py +18 -5
- sklearnex/svm/_common.py +4 -7
- sklearnex/svm/nusvc.py +66 -50
- sklearnex/svm/nusvr.py +3 -49
- sklearnex/svm/svc.py +66 -51
- sklearnex/svm/svr.py +3 -49
- sklearnex/tests/_utils.py +14 -5
- sklearnex/tests/test_n_jobs_support.py +8 -2
- sklearnex/tests/test_patching.py +64 -54
- sklearnex/utils/__init__.py +2 -1
- sklearnex/utils/_namespace.py +97 -0
- {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/WHEEL +0 -0
- {scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
# ===============================================================================
|
|
2
|
+
# Copyright 2024 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ===============================================================================
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pytest
|
|
19
|
+
from numpy.testing import assert_allclose
|
|
20
|
+
|
|
21
|
+
from onedal.basic_statistics.tests.test_incremental_basic_statistics import (
|
|
22
|
+
expected_max,
|
|
23
|
+
expected_mean,
|
|
24
|
+
expected_sum,
|
|
25
|
+
options_and_tests,
|
|
26
|
+
)
|
|
27
|
+
from onedal.tests.utils._dataframes_support import (
|
|
28
|
+
_convert_to_dataframe,
|
|
29
|
+
get_dataframes_and_queues,
|
|
30
|
+
)
|
|
31
|
+
from sklearnex.basic_statistics import IncrementalBasicStatistics
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
35
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
36
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
37
|
+
def test_partial_fit_multiple_options_on_gold_data(dataframe, queue, weighted, dtype):
|
|
38
|
+
X = np.array([[0, 0], [1, 1]])
|
|
39
|
+
X = X.astype(dtype=dtype)
|
|
40
|
+
X_split = np.array_split(X, 2)
|
|
41
|
+
if weighted:
|
|
42
|
+
weights = np.array([1, 0.5])
|
|
43
|
+
weights = weights.astype(dtype=dtype)
|
|
44
|
+
weights_split = np.array_split(weights, 2)
|
|
45
|
+
|
|
46
|
+
incbs = IncrementalBasicStatistics()
|
|
47
|
+
for i in range(2):
|
|
48
|
+
X_split_df = _convert_to_dataframe(
|
|
49
|
+
X_split[i], sycl_queue=queue, target_df=dataframe
|
|
50
|
+
)
|
|
51
|
+
if weighted:
|
|
52
|
+
weights_split_df = _convert_to_dataframe(
|
|
53
|
+
weights_split[i], sycl_queue=queue, target_df=dataframe
|
|
54
|
+
)
|
|
55
|
+
result = incbs.partial_fit(X_split_df, sample_weight=weights_split_df)
|
|
56
|
+
else:
|
|
57
|
+
result = incbs.partial_fit(X_split_df)
|
|
58
|
+
|
|
59
|
+
if weighted:
|
|
60
|
+
expected_weighted_mean = np.array([0.25, 0.25])
|
|
61
|
+
expected_weighted_min = np.array([0, 0])
|
|
62
|
+
expected_weighted_max = np.array([0.5, 0.5])
|
|
63
|
+
assert_allclose(expected_weighted_mean, result.mean)
|
|
64
|
+
assert_allclose(expected_weighted_max, result.max)
|
|
65
|
+
assert_allclose(expected_weighted_min, result.min)
|
|
66
|
+
else:
|
|
67
|
+
expected_mean = np.array([0.5, 0.5])
|
|
68
|
+
expected_min = np.array([0, 0])
|
|
69
|
+
expected_max = np.array([1, 1])
|
|
70
|
+
assert_allclose(expected_mean, result.mean)
|
|
71
|
+
assert_allclose(expected_max, result.max)
|
|
72
|
+
assert_allclose(expected_min, result.min)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
76
|
+
@pytest.mark.parametrize("num_batches", [2, 10])
|
|
77
|
+
@pytest.mark.parametrize("option", options_and_tests)
|
|
78
|
+
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
79
|
+
@pytest.mark.parametrize("column_count", [10, 100])
|
|
80
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
81
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
82
|
+
def test_partial_fit_single_option_on_random_data(
|
|
83
|
+
dataframe, queue, num_batches, option, row_count, column_count, weighted, dtype
|
|
84
|
+
):
|
|
85
|
+
result_option, function, tols = option
|
|
86
|
+
fp32tol, fp64tol = tols
|
|
87
|
+
seed = 77
|
|
88
|
+
gen = np.random.default_rng(seed)
|
|
89
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
|
|
90
|
+
X = X.astype(dtype=dtype)
|
|
91
|
+
X_split = np.array_split(X, num_batches)
|
|
92
|
+
if weighted:
|
|
93
|
+
weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
|
|
94
|
+
weights = weights.astype(dtype=dtype)
|
|
95
|
+
weights_split = np.array_split(weights, num_batches)
|
|
96
|
+
incbs = IncrementalBasicStatistics(result_options=result_option)
|
|
97
|
+
|
|
98
|
+
for i in range(num_batches):
|
|
99
|
+
X_split_df = _convert_to_dataframe(
|
|
100
|
+
X_split[i], sycl_queue=queue, target_df=dataframe
|
|
101
|
+
)
|
|
102
|
+
if weighted:
|
|
103
|
+
weights_split_df = _convert_to_dataframe(
|
|
104
|
+
weights_split[i], sycl_queue=queue, target_df=dataframe
|
|
105
|
+
)
|
|
106
|
+
result = incbs.partial_fit(X_split_df, sample_weight=weights_split_df)
|
|
107
|
+
else:
|
|
108
|
+
result = incbs.partial_fit(X_split_df)
|
|
109
|
+
|
|
110
|
+
res = getattr(result, result_option)
|
|
111
|
+
if weighted:
|
|
112
|
+
weighted_data = np.diag(weights) @ X
|
|
113
|
+
gtr = function(weighted_data)
|
|
114
|
+
else:
|
|
115
|
+
gtr = function(X)
|
|
116
|
+
|
|
117
|
+
tol = fp32tol if res.dtype == np.float32 else fp64tol
|
|
118
|
+
assert_allclose(gtr, res, atol=tol)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
122
|
+
@pytest.mark.parametrize("num_batches", [2, 10])
|
|
123
|
+
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
124
|
+
@pytest.mark.parametrize("column_count", [10, 100])
|
|
125
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
126
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
127
|
+
def test_partial_fit_multiple_options_on_random_data(
|
|
128
|
+
dataframe, queue, num_batches, row_count, column_count, weighted, dtype
|
|
129
|
+
):
|
|
130
|
+
seed = 42
|
|
131
|
+
gen = np.random.default_rng(seed)
|
|
132
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
|
|
133
|
+
X = X.astype(dtype=dtype)
|
|
134
|
+
X_split = np.array_split(X, num_batches)
|
|
135
|
+
if weighted:
|
|
136
|
+
weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
|
|
137
|
+
weights = weights.astype(dtype=dtype)
|
|
138
|
+
weights_split = np.array_split(weights, num_batches)
|
|
139
|
+
incbs = IncrementalBasicStatistics(result_options=["mean", "max", "sum"])
|
|
140
|
+
|
|
141
|
+
for i in range(num_batches):
|
|
142
|
+
X_split_df = _convert_to_dataframe(
|
|
143
|
+
X_split[i], sycl_queue=queue, target_df=dataframe
|
|
144
|
+
)
|
|
145
|
+
if weighted:
|
|
146
|
+
weights_split_df = _convert_to_dataframe(
|
|
147
|
+
weights_split[i], sycl_queue=queue, target_df=dataframe
|
|
148
|
+
)
|
|
149
|
+
result = incbs.partial_fit(X_split_df, sample_weight=weights_split_df)
|
|
150
|
+
else:
|
|
151
|
+
result = incbs.partial_fit(X_split_df)
|
|
152
|
+
|
|
153
|
+
res_mean, res_max, res_sum = result.mean, result.max, result.sum
|
|
154
|
+
if weighted:
|
|
155
|
+
weighted_data = np.diag(weights) @ X
|
|
156
|
+
gtr_mean, gtr_max, gtr_sum = (
|
|
157
|
+
expected_mean(weighted_data),
|
|
158
|
+
expected_max(weighted_data),
|
|
159
|
+
expected_sum(weighted_data),
|
|
160
|
+
)
|
|
161
|
+
else:
|
|
162
|
+
gtr_mean, gtr_max, gtr_sum = (
|
|
163
|
+
expected_mean(X),
|
|
164
|
+
expected_max(X),
|
|
165
|
+
expected_sum(X),
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
tol = 1e-5 if res_mean.dtype == np.float32 else 1e-7
|
|
169
|
+
assert_allclose(gtr_mean, res_mean, atol=tol)
|
|
170
|
+
assert_allclose(gtr_max, res_max, atol=tol)
|
|
171
|
+
assert_allclose(gtr_sum, res_sum, atol=tol)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
175
|
+
@pytest.mark.parametrize("num_batches", [2, 10])
|
|
176
|
+
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
177
|
+
@pytest.mark.parametrize("column_count", [10, 100])
|
|
178
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
179
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
180
|
+
def test_partial_fit_all_option_on_random_data(
|
|
181
|
+
dataframe, queue, num_batches, row_count, column_count, weighted, dtype
|
|
182
|
+
):
|
|
183
|
+
seed = 77
|
|
184
|
+
gen = np.random.default_rng(seed)
|
|
185
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
|
|
186
|
+
X = X.astype(dtype=dtype)
|
|
187
|
+
X_split = np.array_split(X, num_batches)
|
|
188
|
+
if weighted:
|
|
189
|
+
weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
|
|
190
|
+
weights = weights.astype(dtype=dtype)
|
|
191
|
+
weights_split = np.array_split(weights, num_batches)
|
|
192
|
+
incbs = IncrementalBasicStatistics(result_options="all")
|
|
193
|
+
|
|
194
|
+
for i in range(num_batches):
|
|
195
|
+
X_split_df = _convert_to_dataframe(
|
|
196
|
+
X_split[i], sycl_queue=queue, target_df=dataframe
|
|
197
|
+
)
|
|
198
|
+
if weighted:
|
|
199
|
+
weights_split_df = _convert_to_dataframe(
|
|
200
|
+
weights_split[i], sycl_queue=queue, target_df=dataframe
|
|
201
|
+
)
|
|
202
|
+
result = incbs.partial_fit(X_split_df, sample_weight=weights_split_df)
|
|
203
|
+
else:
|
|
204
|
+
result = incbs.partial_fit(X_split_df)
|
|
205
|
+
|
|
206
|
+
if weighted:
|
|
207
|
+
weighted_data = np.diag(weights) @ X
|
|
208
|
+
|
|
209
|
+
for option in options_and_tests:
|
|
210
|
+
result_option, function, tols = option
|
|
211
|
+
print(result_option)
|
|
212
|
+
fp32tol, fp64tol = tols
|
|
213
|
+
res = getattr(result, result_option)
|
|
214
|
+
if weighted:
|
|
215
|
+
gtr = function(weighted_data)
|
|
216
|
+
else:
|
|
217
|
+
gtr = function(X)
|
|
218
|
+
tol = fp32tol if res.dtype == np.float32 else fp64tol
|
|
219
|
+
assert_allclose(gtr, res, atol=tol)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
223
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
224
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
225
|
+
def test_fit_multiple_options_on_gold_data(dataframe, queue, weighted, dtype):
|
|
226
|
+
X = np.array([[0, 0], [1, 1]])
|
|
227
|
+
X = X.astype(dtype=dtype)
|
|
228
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
229
|
+
if weighted:
|
|
230
|
+
weights = np.array([1, 0.5])
|
|
231
|
+
weights = weights.astype(dtype=dtype)
|
|
232
|
+
weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
|
|
233
|
+
incbs = IncrementalBasicStatistics(batch_size=1)
|
|
234
|
+
|
|
235
|
+
if weighted:
|
|
236
|
+
result = incbs.fit(X_df, sample_weight=weights_df)
|
|
237
|
+
else:
|
|
238
|
+
result = incbs.fit(X_df)
|
|
239
|
+
|
|
240
|
+
if weighted:
|
|
241
|
+
expected_weighted_mean = np.array([0.25, 0.25])
|
|
242
|
+
expected_weighted_min = np.array([0, 0])
|
|
243
|
+
expected_weighted_max = np.array([0.5, 0.5])
|
|
244
|
+
assert_allclose(expected_weighted_mean, result.mean)
|
|
245
|
+
assert_allclose(expected_weighted_max, result.max)
|
|
246
|
+
assert_allclose(expected_weighted_min, result.min)
|
|
247
|
+
else:
|
|
248
|
+
expected_mean = np.array([0.5, 0.5])
|
|
249
|
+
expected_min = np.array([0, 0])
|
|
250
|
+
expected_max = np.array([1, 1])
|
|
251
|
+
assert_allclose(expected_mean, result.mean)
|
|
252
|
+
assert_allclose(expected_max, result.max)
|
|
253
|
+
assert_allclose(expected_min, result.min)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
257
|
+
@pytest.mark.parametrize("num_batches", [2, 10])
|
|
258
|
+
@pytest.mark.parametrize("option", options_and_tests)
|
|
259
|
+
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
260
|
+
@pytest.mark.parametrize("column_count", [10, 100])
|
|
261
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
262
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
263
|
+
def test_fit_single_option_on_random_data(
|
|
264
|
+
dataframe, queue, num_batches, option, row_count, column_count, weighted, dtype
|
|
265
|
+
):
|
|
266
|
+
result_option, function, tols = option
|
|
267
|
+
fp32tol, fp64tol = tols
|
|
268
|
+
seed = 77
|
|
269
|
+
gen = np.random.default_rng(seed)
|
|
270
|
+
batch_size = row_count // num_batches
|
|
271
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
|
|
272
|
+
X = X.astype(dtype=dtype)
|
|
273
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
274
|
+
if weighted:
|
|
275
|
+
weights = gen.uniform(low=-0.5, high=1.0, size=row_count)
|
|
276
|
+
weights = weights.astype(dtype=dtype)
|
|
277
|
+
weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
|
|
278
|
+
incbs = IncrementalBasicStatistics(
|
|
279
|
+
result_options=result_option, batch_size=batch_size
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
if weighted:
|
|
283
|
+
result = incbs.fit(X_df, sample_weight=weights_df)
|
|
284
|
+
else:
|
|
285
|
+
result = incbs.fit(X_df)
|
|
286
|
+
|
|
287
|
+
res = getattr(result, result_option)
|
|
288
|
+
if weighted:
|
|
289
|
+
weighted_data = np.diag(weights) @ X
|
|
290
|
+
gtr = function(weighted_data)
|
|
291
|
+
else:
|
|
292
|
+
gtr = function(X)
|
|
293
|
+
|
|
294
|
+
tol = fp32tol if res.dtype == np.float32 else fp64tol
|
|
295
|
+
assert_allclose(gtr, res, atol=tol)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
299
|
+
@pytest.mark.parametrize("num_batches", [2, 10])
|
|
300
|
+
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
301
|
+
@pytest.mark.parametrize("column_count", [10, 100])
|
|
302
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
303
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
304
|
+
def test_partial_fit_multiple_options_on_random_data(
|
|
305
|
+
dataframe, queue, num_batches, row_count, column_count, weighted, dtype
|
|
306
|
+
):
|
|
307
|
+
seed = 77
|
|
308
|
+
gen = np.random.default_rng(seed)
|
|
309
|
+
batch_size = row_count // num_batches
|
|
310
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
|
|
311
|
+
X = X.astype(dtype=dtype)
|
|
312
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
313
|
+
if weighted:
|
|
314
|
+
weights = gen.uniform(low=-0.5, high=1.0, size=row_count)
|
|
315
|
+
weights = weights.astype(dtype=dtype)
|
|
316
|
+
weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
|
|
317
|
+
incbs = IncrementalBasicStatistics(
|
|
318
|
+
result_options=["mean", "max", "sum"], batch_size=batch_size
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
if weighted:
|
|
322
|
+
result = incbs.fit(X_df, sample_weight=weights_df)
|
|
323
|
+
else:
|
|
324
|
+
result = incbs.fit(X_df)
|
|
325
|
+
|
|
326
|
+
res_mean, res_max, res_sum = result.mean, result.max, result.sum
|
|
327
|
+
if weighted:
|
|
328
|
+
weighted_data = np.diag(weights) @ X
|
|
329
|
+
gtr_mean, gtr_max, gtr_sum = (
|
|
330
|
+
expected_mean(weighted_data),
|
|
331
|
+
expected_max(weighted_data),
|
|
332
|
+
expected_sum(weighted_data),
|
|
333
|
+
)
|
|
334
|
+
else:
|
|
335
|
+
gtr_mean, gtr_max, gtr_sum = (
|
|
336
|
+
expected_mean(X),
|
|
337
|
+
expected_max(X),
|
|
338
|
+
expected_sum(X),
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
tol = 3e-4 if res_mean.dtype == np.float32 else 1e-7
|
|
342
|
+
assert_allclose(gtr_mean, res_mean, atol=tol)
|
|
343
|
+
assert_allclose(gtr_max, res_max, atol=tol)
|
|
344
|
+
assert_allclose(gtr_sum, res_sum, atol=tol)
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
348
|
+
@pytest.mark.parametrize("num_batches", [2, 10])
|
|
349
|
+
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
350
|
+
@pytest.mark.parametrize("column_count", [10, 100])
|
|
351
|
+
@pytest.mark.parametrize("weighted", [True, False])
|
|
352
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
353
|
+
def test_fit_all_option_on_random_data(
|
|
354
|
+
dataframe, queue, num_batches, row_count, column_count, weighted, dtype
|
|
355
|
+
):
|
|
356
|
+
seed = 77
|
|
357
|
+
gen = np.random.default_rng(seed)
|
|
358
|
+
batch_size = row_count // num_batches
|
|
359
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
|
|
360
|
+
X = X.astype(dtype=dtype)
|
|
361
|
+
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
362
|
+
if weighted:
|
|
363
|
+
weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
|
|
364
|
+
weights = weights.astype(dtype=dtype)
|
|
365
|
+
weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
|
|
366
|
+
incbs = IncrementalBasicStatistics(result_options="all", batch_size=batch_size)
|
|
367
|
+
|
|
368
|
+
if weighted:
|
|
369
|
+
result = incbs.fit(X_df, sample_weight=weights_df)
|
|
370
|
+
else:
|
|
371
|
+
result = incbs.fit(X_df)
|
|
372
|
+
|
|
373
|
+
if weighted:
|
|
374
|
+
weighted_data = np.diag(weights) @ X
|
|
375
|
+
|
|
376
|
+
for option in options_and_tests:
|
|
377
|
+
result_option, function, tols = option
|
|
378
|
+
print(result_option)
|
|
379
|
+
fp32tol, fp64tol = tols
|
|
380
|
+
res = getattr(result, result_option)
|
|
381
|
+
if weighted:
|
|
382
|
+
gtr = function(weighted_data)
|
|
383
|
+
else:
|
|
384
|
+
gtr = function(X)
|
|
385
|
+
tol = fp32tol if res.dtype == np.float32 else fp64tol
|
|
386
|
+
assert_allclose(gtr, res, atol=tol)
|
sklearnex/decomposition/pca.py
CHANGED
|
@@ -95,6 +95,7 @@ if daal_check_version((2024, "P", 100)):
|
|
|
95
95
|
self._fit(X)
|
|
96
96
|
return self
|
|
97
97
|
|
|
98
|
+
@wrap_output_data
|
|
98
99
|
def _fit(self, X):
|
|
99
100
|
if sklearn_check_version("1.2"):
|
|
100
101
|
self._validate_params()
|
|
@@ -166,13 +167,11 @@ if daal_check_version((2024, "P", 100)):
|
|
|
166
167
|
|
|
167
168
|
return self._onedal_estimator.predict(X, queue=queue)
|
|
168
169
|
|
|
169
|
-
@wrap_output_data
|
|
170
170
|
def fit_transform(self, X, y=None):
|
|
171
171
|
U, S, Vt = self._fit(X)
|
|
172
172
|
if U is None:
|
|
173
173
|
# oneDAL PCA was fit
|
|
174
|
-
|
|
175
|
-
return X_transformed
|
|
174
|
+
return self.transform(X)
|
|
176
175
|
else:
|
|
177
176
|
# Scikit-learn PCA was fit
|
|
178
177
|
U = U[:, : self.n_components_]
|
|
@@ -298,11 +297,9 @@ if daal_check_version((2024, "P", 100)):
|
|
|
298
297
|
self.n_samples_ = self._onedal_estimator.n_samples_
|
|
299
298
|
if sklearn_check_version("1.2"):
|
|
300
299
|
self.n_features_in_ = self._onedal_estimator.n_features_
|
|
301
|
-
elif sklearn_check_version("0.24"):
|
|
302
|
-
self.n_features_ = self._onedal_estimator.n_features_
|
|
303
|
-
self.n_features_in_ = self._onedal_estimator.n_features_
|
|
304
300
|
else:
|
|
305
301
|
self.n_features_ = self._onedal_estimator.n_features_
|
|
302
|
+
self.n_features_in_ = self._onedal_estimator.n_features_
|
|
306
303
|
self.n_components_ = self._onedal_estimator.n_components_
|
|
307
304
|
self.components_ = self._onedal_estimator.components_
|
|
308
305
|
self.mean_ = self._onedal_estimator.mean_
|
sklearnex/dispatcher.py
CHANGED
|
@@ -314,10 +314,10 @@ def get_patch_names():
|
|
|
314
314
|
def patch_sklearn(name=None, verbose=True, global_patch=False, preview=False):
|
|
315
315
|
if preview:
|
|
316
316
|
os.environ["SKLEARNEX_PREVIEW"] = "enabled_via_patch_sklearn"
|
|
317
|
-
if not sklearn_check_version("0.
|
|
317
|
+
if not sklearn_check_version("0.24"):
|
|
318
318
|
raise NotImplementedError(
|
|
319
319
|
"Intel(R) Extension for Scikit-learn* patches apply "
|
|
320
|
-
"for scikit-learn >= 0.
|
|
320
|
+
"for scikit-learn >= 0.24 only ..."
|
|
321
321
|
)
|
|
322
322
|
|
|
323
323
|
if global_patch:
|
sklearnex/ensemble/_forest.py
CHANGED
|
@@ -25,8 +25,11 @@ from sklearn.ensemble import ExtraTreesClassifier as sklearn_ExtraTreesClassifie
|
|
|
25
25
|
from sklearn.ensemble import ExtraTreesRegressor as sklearn_ExtraTreesRegressor
|
|
26
26
|
from sklearn.ensemble import RandomForestClassifier as sklearn_RandomForestClassifier
|
|
27
27
|
from sklearn.ensemble import RandomForestRegressor as sklearn_RandomForestRegressor
|
|
28
|
+
from sklearn.ensemble._forest import ForestClassifier as sklearn_ForestClassifier
|
|
29
|
+
from sklearn.ensemble._forest import ForestRegressor as sklearn_ForestRegressor
|
|
28
30
|
from sklearn.ensemble._forest import _get_n_samples_bootstrap
|
|
29
31
|
from sklearn.exceptions import DataConversionWarning
|
|
32
|
+
from sklearn.metrics import accuracy_score
|
|
30
33
|
from sklearn.tree import (
|
|
31
34
|
DecisionTreeClassifier,
|
|
32
35
|
DecisionTreeRegressor,
|
|
@@ -35,12 +38,7 @@ from sklearn.tree import (
|
|
|
35
38
|
)
|
|
36
39
|
from sklearn.tree._tree import Tree
|
|
37
40
|
from sklearn.utils import check_random_state, deprecated
|
|
38
|
-
from sklearn.utils.validation import
|
|
39
|
-
check_array,
|
|
40
|
-
check_consistent_length,
|
|
41
|
-
check_is_fitted,
|
|
42
|
-
check_X_y,
|
|
43
|
-
)
|
|
41
|
+
from sklearn.utils.validation import check_array, check_is_fitted
|
|
44
42
|
|
|
45
43
|
from daal4py.sklearn._n_jobs_support import control_n_jobs
|
|
46
44
|
from daal4py.sklearn._utils import (
|
|
@@ -52,19 +50,10 @@ from onedal.ensemble import ExtraTreesClassifier as onedal_ExtraTreesClassifier
|
|
|
52
50
|
from onedal.ensemble import ExtraTreesRegressor as onedal_ExtraTreesRegressor
|
|
53
51
|
from onedal.ensemble import RandomForestClassifier as onedal_RandomForestClassifier
|
|
54
52
|
from onedal.ensemble import RandomForestRegressor as onedal_RandomForestRegressor
|
|
55
|
-
|
|
56
|
-
# try catch needed for changes in structures observed in Scikit-learn around v0.22
|
|
57
|
-
try:
|
|
58
|
-
from sklearn.ensemble._forest import ForestClassifier as sklearn_ForestClassifier
|
|
59
|
-
from sklearn.ensemble._forest import ForestRegressor as sklearn_ForestRegressor
|
|
60
|
-
except ModuleNotFoundError:
|
|
61
|
-
from sklearn.ensemble.forest import ForestClassifier as sklearn_ForestClassifier
|
|
62
|
-
from sklearn.ensemble.forest import ForestRegressor as sklearn_ForestRegressor
|
|
63
|
-
|
|
64
53
|
from onedal.primitives import get_tree_state_cls, get_tree_state_reg
|
|
65
54
|
from onedal.utils import _num_features, _num_samples
|
|
55
|
+
from sklearnex.utils import get_namespace
|
|
66
56
|
|
|
67
|
-
from .._config import get_config
|
|
68
57
|
from .._device_offload import dispatch, wrap_output_data
|
|
69
58
|
from .._utils import PatchingConditionsChain
|
|
70
59
|
|
|
@@ -78,24 +67,14 @@ class BaseForest(ABC):
|
|
|
78
67
|
_onedal_factory = None
|
|
79
68
|
|
|
80
69
|
def _onedal_fit(self, X, y, sample_weight=None, queue=None):
|
|
81
|
-
|
|
82
|
-
X,
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
)
|
|
90
|
-
else:
|
|
91
|
-
X, y = check_X_y(
|
|
92
|
-
X,
|
|
93
|
-
y,
|
|
94
|
-
accept_sparse=False,
|
|
95
|
-
dtype=[np.float64, np.float32],
|
|
96
|
-
multi_output=False,
|
|
97
|
-
force_all_finite=False,
|
|
98
|
-
)
|
|
70
|
+
X, y = self._validate_data(
|
|
71
|
+
X,
|
|
72
|
+
y,
|
|
73
|
+
multi_output=False,
|
|
74
|
+
accept_sparse=False,
|
|
75
|
+
dtype=[np.float64, np.float32],
|
|
76
|
+
force_all_finite=False,
|
|
77
|
+
)
|
|
99
78
|
|
|
100
79
|
if sample_weight is not None:
|
|
101
80
|
sample_weight = self.check_sample_weight(sample_weight, X)
|
|
@@ -173,15 +152,6 @@ class BaseForest(ABC):
|
|
|
173
152
|
|
|
174
153
|
return self
|
|
175
154
|
|
|
176
|
-
def _fit_proba(self, X, y, sample_weight=None, queue=None):
|
|
177
|
-
params = self.get_params()
|
|
178
|
-
self.__class__(**params)
|
|
179
|
-
|
|
180
|
-
# We use stock metaestimators below, so the only way
|
|
181
|
-
# to pass a queue is using config_context.
|
|
182
|
-
cfg = get_config()
|
|
183
|
-
cfg["target_offload"] = queue
|
|
184
|
-
|
|
185
155
|
def _save_attributes(self):
|
|
186
156
|
if self.oob_score:
|
|
187
157
|
self.oob_score_ = self._onedal_estimator.oob_score_
|
|
@@ -204,8 +174,6 @@ class BaseForest(ABC):
|
|
|
204
174
|
self._validate_estimator()
|
|
205
175
|
return self
|
|
206
176
|
|
|
207
|
-
# TODO:
|
|
208
|
-
# move to onedal modul.
|
|
209
177
|
def _check_parameters(self):
|
|
210
178
|
if isinstance(self.min_samples_leaf, numbers.Integral):
|
|
211
179
|
if not 1 <= self.min_samples_leaf:
|
|
@@ -550,18 +518,14 @@ class ForestClassifier(sklearn_ForestClassifier, BaseForest):
|
|
|
550
518
|
)
|
|
551
519
|
|
|
552
520
|
if patching_status.get_status():
|
|
553
|
-
|
|
554
|
-
X,
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
)
|
|
562
|
-
else:
|
|
563
|
-
X = check_array(X, dtype=[np.float64, np.float32], force_all_finite=False)
|
|
564
|
-
y = check_array(y, ensure_2d=False, dtype=X.dtype, force_all_finite=False)
|
|
521
|
+
X, y = self._validate_data(
|
|
522
|
+
X,
|
|
523
|
+
y,
|
|
524
|
+
multi_output=True,
|
|
525
|
+
accept_sparse=True,
|
|
526
|
+
dtype=[np.float64, np.float32],
|
|
527
|
+
force_all_finite=False,
|
|
528
|
+
)
|
|
565
529
|
|
|
566
530
|
if y.ndim == 2 and y.shape[1] == 1:
|
|
567
531
|
warnings.warn(
|
|
@@ -655,9 +619,38 @@ class ForestClassifier(sklearn_ForestClassifier, BaseForest):
|
|
|
655
619
|
X,
|
|
656
620
|
)
|
|
657
621
|
|
|
622
|
+
def predict_log_proba(self, X):
|
|
623
|
+
xp, _ = get_namespace(X)
|
|
624
|
+
proba = self.predict_proba(X)
|
|
625
|
+
|
|
626
|
+
if self.n_outputs_ == 1:
|
|
627
|
+
return xp.log(proba)
|
|
628
|
+
|
|
629
|
+
else:
|
|
630
|
+
for k in range(self.n_outputs_):
|
|
631
|
+
proba[k] = xp.log(proba[k])
|
|
632
|
+
|
|
633
|
+
return proba
|
|
634
|
+
|
|
635
|
+
@wrap_output_data
|
|
636
|
+
def score(self, X, y, sample_weight=None):
|
|
637
|
+
return dispatch(
|
|
638
|
+
self,
|
|
639
|
+
"score",
|
|
640
|
+
{
|
|
641
|
+
"onedal": self.__class__._onedal_score,
|
|
642
|
+
"sklearn": sklearn_ForestClassifier.score,
|
|
643
|
+
},
|
|
644
|
+
X,
|
|
645
|
+
y,
|
|
646
|
+
sample_weight=sample_weight,
|
|
647
|
+
)
|
|
648
|
+
|
|
658
649
|
fit.__doc__ = sklearn_ForestClassifier.fit.__doc__
|
|
659
650
|
predict.__doc__ = sklearn_ForestClassifier.predict.__doc__
|
|
660
651
|
predict_proba.__doc__ = sklearn_ForestClassifier.predict_proba.__doc__
|
|
652
|
+
predict_log_proba.__doc__ = sklearn_ForestClassifier.predict_log_proba.__doc__
|
|
653
|
+
score.__doc__ = sklearn_ForestClassifier.score.__doc__
|
|
661
654
|
|
|
662
655
|
def _onedal_cpu_supported(self, method_name, *data):
|
|
663
656
|
class_name = self.__class__.__name__
|
|
@@ -684,7 +677,7 @@ class ForestClassifier(sklearn_ForestClassifier, BaseForest):
|
|
|
684
677
|
]
|
|
685
678
|
)
|
|
686
679
|
|
|
687
|
-
elif method_name in ["predict", "predict_proba"]:
|
|
680
|
+
elif method_name in ["predict", "predict_proba", "score"]:
|
|
688
681
|
X = data[0]
|
|
689
682
|
|
|
690
683
|
patching_status.and_conditions(
|
|
@@ -749,7 +742,7 @@ class ForestClassifier(sklearn_ForestClassifier, BaseForest):
|
|
|
749
742
|
]
|
|
750
743
|
)
|
|
751
744
|
|
|
752
|
-
elif method_name in ["predict", "predict_proba"]:
|
|
745
|
+
elif method_name in ["predict", "predict_proba", "score"]:
|
|
753
746
|
X = data[0]
|
|
754
747
|
|
|
755
748
|
patching_status.and_conditions(
|
|
@@ -801,12 +794,16 @@ class ForestClassifier(sklearn_ForestClassifier, BaseForest):
|
|
|
801
794
|
X = check_array(X, dtype=[np.float64, np.float32], force_all_finite=False)
|
|
802
795
|
check_is_fitted(self, "_onedal_estimator")
|
|
803
796
|
|
|
804
|
-
|
|
805
|
-
self._check_n_features(X, reset=False)
|
|
797
|
+
self._check_n_features(X, reset=False)
|
|
806
798
|
if sklearn_check_version("1.0"):
|
|
807
799
|
self._check_feature_names(X, reset=False)
|
|
808
800
|
return self._onedal_estimator.predict_proba(X, queue=queue)
|
|
809
801
|
|
|
802
|
+
def _onedal_score(self, X, y, sample_weight=None, queue=None):
|
|
803
|
+
return accuracy_score(
|
|
804
|
+
y, self._onedal_predict(X, queue=queue), sample_weight=sample_weight
|
|
805
|
+
)
|
|
806
|
+
|
|
810
807
|
|
|
811
808
|
class ForestRegressor(sklearn_ForestRegressor, BaseForest):
|
|
812
809
|
_err = "out_of_bag_error_r2|out_of_bag_error_prediction"
|
|
@@ -916,18 +913,14 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
|
|
|
916
913
|
)
|
|
917
914
|
|
|
918
915
|
if patching_status.get_status():
|
|
919
|
-
|
|
920
|
-
X,
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
)
|
|
928
|
-
else:
|
|
929
|
-
X = check_array(X, dtype=[np.float64, np.float32], force_all_finite=False)
|
|
930
|
-
y = check_array(y, ensure_2d=False, dtype=X.dtype, force_all_finite=False)
|
|
916
|
+
X, y = self._validate_data(
|
|
917
|
+
X,
|
|
918
|
+
y,
|
|
919
|
+
multi_output=True,
|
|
920
|
+
accept_sparse=True,
|
|
921
|
+
dtype=[np.float64, np.float32],
|
|
922
|
+
force_all_finite=False,
|
|
923
|
+
)
|
|
931
924
|
|
|
932
925
|
if y.ndim == 2 and y.shape[1] == 1:
|
|
933
926
|
warnings.warn(
|
|
@@ -1129,7 +1122,7 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
|
|
|
1129
1122
|
predict.__doc__ = sklearn_ForestRegressor.predict.__doc__
|
|
1130
1123
|
|
|
1131
1124
|
|
|
1132
|
-
@control_n_jobs(decorated_methods=["fit", "predict", "predict_proba"])
|
|
1125
|
+
@control_n_jobs(decorated_methods=["fit", "predict", "predict_proba", "score"])
|
|
1133
1126
|
class RandomForestClassifier(ForestClassifier):
|
|
1134
1127
|
__doc__ = sklearn_RandomForestClassifier.__doc__
|
|
1135
1128
|
_onedal_factory = onedal_RandomForestClassifier
|
|
@@ -1540,7 +1533,7 @@ class RandomForestRegressor(ForestRegressor):
|
|
|
1540
1533
|
self.min_bin_size = min_bin_size
|
|
1541
1534
|
|
|
1542
1535
|
|
|
1543
|
-
@control_n_jobs(decorated_methods=["fit", "predict", "predict_proba"])
|
|
1536
|
+
@control_n_jobs(decorated_methods=["fit", "predict", "predict_proba", "score"])
|
|
1544
1537
|
class ExtraTreesClassifier(ForestClassifier):
|
|
1545
1538
|
__doc__ = sklearn_ExtraTreesClassifier.__doc__
|
|
1546
1539
|
_onedal_factory = onedal_ExtraTreesClassifier
|