scikit-learn-intelex 2024.4.0__py38-none-win_amd64.whl → 2024.5.0__py38-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/_device_offload.py +8 -1
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +2 -4
- scikit_learn_intelex-2024.5.0.data/data/Lib/site-packages/sklearnex/covariance/incremental_covariance.py +317 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/covariance/tests/test_incremental_covariance.py +54 -17
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/decomposition/pca.py +68 -13
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/decomposition/tests/test_pca.py +2 -2
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/dispatcher.py +31 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/ensemble/_forest.py +5 -4
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/linear_model/__init__.py +5 -3
- scikit_learn_intelex-2024.5.0.data/data/Lib/site-packages/sklearnex/linear_model/incremental_linear.py +387 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/linear_model/linear.py +2 -2
- scikit_learn_intelex-2024.5.0.data/data/Lib/site-packages/sklearnex/linear_model/tests/test_incremental_linear.py +200 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/linear_model/tests/test_linear.py +2 -2
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/neighbors/tests/test_neighbors.py +2 -2
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/tests/_utils.py +21 -12
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/tests/test_memory_usage.py +5 -1
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/tests/test_n_jobs_support.py +4 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/tests/test_patching.py +27 -8
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/tests/test_run_to_run_stability_tests.py +1 -1
- scikit_learn_intelex-2024.5.0.data/data/Lib/site-packages/sklearnex/utils/tests/test_finite.py +89 -0
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/METADATA +227 -230
- scikit_learn_intelex-2024.5.0.dist-info/RECORD +104 -0
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/WHEEL +1 -1
- scikit_learn_intelex-2024.4.0.data/data/Lib/site-packages/sklearnex/covariance/incremental_covariance.py +0 -130
- scikit_learn_intelex-2024.4.0.dist-info/RECORD +0 -101
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/__main__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/_config.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/_utils.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/basic_statistics/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/basic_statistics/basic_statistics.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/basic_statistics/incremental_basic_statistics.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/cluster/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/cluster/dbscan.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/cluster/k_means.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/cluster/tests/test_dbscan.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/cluster/tests/test_kmeans.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/conftest.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/covariance/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/decomposition/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/doc/third-party-programs.txt +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/ensemble/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/ensemble/tests/test_forest.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/glob/__main__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/glob/dispatcher.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/linear_model/coordinate_descent.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/linear_model/logistic_path.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/linear_model/logistic_regression.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/linear_model/ridge.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/linear_model/tests/test_logreg.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/manifold/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/manifold/t_sne.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/manifold/tests/test_tsne.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/metrics/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/metrics/pairwise.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/metrics/ranking.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/metrics/tests/test_metrics.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/model_selection/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/model_selection/split.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/model_selection/tests/test_model_selection.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/neighbors/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/neighbors/_lof.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/neighbors/common.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/neighbors/knn_classification.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/neighbors/knn_regression.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/neighbors/knn_unsupervised.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/preview/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/preview/cluster/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/preview/cluster/_common.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/preview/cluster/k_means.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/preview/covariance/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/preview/covariance/covariance.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/preview/covariance/tests/test_covariance.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/basic_statistics/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/basic_statistics/basic_statistics.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/cluster/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/cluster/dbscan.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/cluster/kmeans.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/covariance/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/covariance/covariance.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/decomposition/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/decomposition/pca.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/ensemble/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/ensemble/forest.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/linear_model.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/logistic_regression.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/neighbors/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/spmd/neighbors/neighbors.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/svm/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/svm/_common.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/svm/nusvc.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/svm/nusvr.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/svm/svc.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/svm/svr.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/svm/tests/test_svm.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/tests/test_config.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/tests/test_monkeypatch.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/tests/test_parallel.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/utils/__init__.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/utils/_namespace.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/utils/parallel.py +0 -0
- {scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/utils/validation.py +0 -0
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/top_level.txt +0 -0
|
@@ -127,8 +127,15 @@ def _transfer_to_host(queue, *data):
|
|
|
127
127
|
queue = usm_iface["syclobj"]
|
|
128
128
|
|
|
129
129
|
buffer = as_usm_memory(item).copy_to_host()
|
|
130
|
+
order = "C"
|
|
131
|
+
if usm_iface["strides"] is not None:
|
|
132
|
+
if usm_iface["strides"][0] < usm_iface["strides"][1]:
|
|
133
|
+
order = "F"
|
|
130
134
|
item = np.ndarray(
|
|
131
|
-
shape=usm_iface["shape"],
|
|
135
|
+
shape=usm_iface["shape"],
|
|
136
|
+
dtype=usm_iface["typestr"],
|
|
137
|
+
buffer=buffer,
|
|
138
|
+
order=order,
|
|
132
139
|
)
|
|
133
140
|
has_usm_data = True
|
|
134
141
|
else:
|
|
@@ -165,7 +165,7 @@ def test_partial_fit_multiple_options_on_random_data(
|
|
|
165
165
|
expected_sum(X),
|
|
166
166
|
)
|
|
167
167
|
|
|
168
|
-
tol =
|
|
168
|
+
tol = 3e-4 if res_mean.dtype == np.float32 else 1e-7
|
|
169
169
|
assert_allclose(gtr_mean, res_mean, atol=tol)
|
|
170
170
|
assert_allclose(gtr_max, res_max, atol=tol)
|
|
171
171
|
assert_allclose(gtr_sum, res_sum, atol=tol)
|
|
@@ -208,7 +208,6 @@ def test_partial_fit_all_option_on_random_data(
|
|
|
208
208
|
|
|
209
209
|
for option in options_and_tests:
|
|
210
210
|
result_option, function, tols = option
|
|
211
|
-
print(result_option)
|
|
212
211
|
fp32tol, fp64tol = tols
|
|
213
212
|
res = getattr(result, result_option)
|
|
214
213
|
if weighted:
|
|
@@ -301,7 +300,7 @@ def test_fit_single_option_on_random_data(
|
|
|
301
300
|
@pytest.mark.parametrize("column_count", [10, 100])
|
|
302
301
|
@pytest.mark.parametrize("weighted", [True, False])
|
|
303
302
|
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
304
|
-
def
|
|
303
|
+
def test_fit_multiple_options_on_random_data(
|
|
305
304
|
dataframe, queue, num_batches, row_count, column_count, weighted, dtype
|
|
306
305
|
):
|
|
307
306
|
seed = 77
|
|
@@ -375,7 +374,6 @@ def test_fit_all_option_on_random_data(
|
|
|
375
374
|
|
|
376
375
|
for option in options_and_tests:
|
|
377
376
|
result_option, function, tols = option
|
|
378
|
-
print(result_option)
|
|
379
377
|
fp32tol, fp64tol = tols
|
|
380
378
|
res = getattr(result, result_option)
|
|
381
379
|
if weighted:
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
# ===============================================================================
|
|
2
|
+
# Copyright 2024 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ===============================================================================
|
|
16
|
+
|
|
17
|
+
import numbers
|
|
18
|
+
import warnings
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
from scipy import linalg
|
|
22
|
+
from sklearn.base import BaseEstimator
|
|
23
|
+
from sklearn.covariance import EmpiricalCovariance as sklearn_EmpiricalCovariance
|
|
24
|
+
from sklearn.utils import check_array, gen_batches
|
|
25
|
+
|
|
26
|
+
from daal4py.sklearn._n_jobs_support import control_n_jobs
|
|
27
|
+
from daal4py.sklearn._utils import daal_check_version, sklearn_check_version
|
|
28
|
+
from onedal._device_offload import support_usm_ndarray
|
|
29
|
+
from onedal.covariance import (
|
|
30
|
+
IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance,
|
|
31
|
+
)
|
|
32
|
+
from sklearnex import config_context
|
|
33
|
+
|
|
34
|
+
from .._device_offload import dispatch, wrap_output_data
|
|
35
|
+
from .._utils import PatchingConditionsChain, register_hyperparameters
|
|
36
|
+
from ..metrics import pairwise_distances
|
|
37
|
+
|
|
38
|
+
if sklearn_check_version("1.2"):
|
|
39
|
+
from sklearn.utils._param_validation import Interval
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@control_n_jobs(decorated_methods=["partial_fit", "fit", "_onedal_finalize_fit"])
|
|
43
|
+
class IncrementalEmpiricalCovariance(BaseEstimator):
|
|
44
|
+
"""
|
|
45
|
+
Incremental estimator for covariance.
|
|
46
|
+
Allows to compute empirical covariance estimated by maximum
|
|
47
|
+
likelihood method if data are splitted into batches.
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
store_precision : bool, default=False
|
|
52
|
+
Specifies if the estimated precision is stored.
|
|
53
|
+
|
|
54
|
+
assume_centered : bool, default=False
|
|
55
|
+
If True, data are not centered before computation.
|
|
56
|
+
Useful when working with data whose mean is almost, but not exactly
|
|
57
|
+
zero.
|
|
58
|
+
If False (default), data are centered before computation.
|
|
59
|
+
|
|
60
|
+
batch_size : int, default=None
|
|
61
|
+
The number of samples to use for each batch. Only used when calling
|
|
62
|
+
``fit``. If ``batch_size`` is ``None``, then ``batch_size``
|
|
63
|
+
is inferred from the data and set to ``5 * n_features``, to provide a
|
|
64
|
+
balance between approximation accuracy and memory consumption.
|
|
65
|
+
|
|
66
|
+
copy : bool, default=True
|
|
67
|
+
If False, X will be overwritten. ``copy=False`` can be used to
|
|
68
|
+
save memory but is unsafe for general use.
|
|
69
|
+
|
|
70
|
+
Attributes
|
|
71
|
+
----------
|
|
72
|
+
location_ : ndarray of shape (n_features,)
|
|
73
|
+
Estimated location, i.e. the estimated mean.
|
|
74
|
+
|
|
75
|
+
covariance_ : ndarray of shape (n_features, n_features)
|
|
76
|
+
Estimated covariance matrix
|
|
77
|
+
|
|
78
|
+
n_samples_seen_ : int
|
|
79
|
+
The number of samples processed by the estimator. Will be reset on
|
|
80
|
+
new calls to fit, but increments across ``partial_fit`` calls.
|
|
81
|
+
|
|
82
|
+
batch_size_ : int
|
|
83
|
+
Inferred batch size from ``batch_size``.
|
|
84
|
+
|
|
85
|
+
n_features_in_ : int
|
|
86
|
+
Number of features seen during :term:`fit` `partial_fit`.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
_onedal_incremental_covariance = staticmethod(onedal_IncrementalEmpiricalCovariance)
|
|
90
|
+
|
|
91
|
+
if sklearn_check_version("1.2"):
|
|
92
|
+
_parameter_constraints: dict = {
|
|
93
|
+
"store_precision": ["boolean"],
|
|
94
|
+
"assume_centered": ["boolean"],
|
|
95
|
+
"batch_size": [Interval(numbers.Integral, 1, None, closed="left"), None],
|
|
96
|
+
"copy": ["boolean"],
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
get_precision = sklearn_EmpiricalCovariance.get_precision
|
|
100
|
+
error_norm = wrap_output_data(sklearn_EmpiricalCovariance.error_norm)
|
|
101
|
+
score = wrap_output_data(sklearn_EmpiricalCovariance.score)
|
|
102
|
+
|
|
103
|
+
def __init__(
|
|
104
|
+
self, *, store_precision=False, assume_centered=False, batch_size=None, copy=True
|
|
105
|
+
):
|
|
106
|
+
self.assume_centered = assume_centered
|
|
107
|
+
self.store_precision = store_precision
|
|
108
|
+
self.batch_size = batch_size
|
|
109
|
+
self.copy = copy
|
|
110
|
+
|
|
111
|
+
def _onedal_supported(self, method_name, *data):
|
|
112
|
+
patching_status = PatchingConditionsChain(
|
|
113
|
+
f"sklearn.covariance.{self.__class__.__name__}.{method_name}"
|
|
114
|
+
)
|
|
115
|
+
return patching_status
|
|
116
|
+
|
|
117
|
+
def _onedal_finalize_fit(self):
|
|
118
|
+
assert hasattr(self, "_onedal_estimator")
|
|
119
|
+
self._onedal_estimator.finalize_fit()
|
|
120
|
+
self._need_to_finalize = False
|
|
121
|
+
|
|
122
|
+
if not daal_check_version((2024, "P", 400)) and self.assume_centered:
|
|
123
|
+
location = self._onedal_estimator.location_[None, :]
|
|
124
|
+
self._onedal_estimator.covariance_ += np.dot(location.T, location)
|
|
125
|
+
self._onedal_estimator.location_ = np.zeros_like(np.squeeze(location))
|
|
126
|
+
if self.store_precision:
|
|
127
|
+
self.precision_ = linalg.pinvh(
|
|
128
|
+
self._onedal_estimator.covariance_, check_finite=False
|
|
129
|
+
)
|
|
130
|
+
else:
|
|
131
|
+
self.precision_ = None
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def covariance_(self):
|
|
135
|
+
if hasattr(self, "_onedal_estimator"):
|
|
136
|
+
if self._need_to_finalize:
|
|
137
|
+
self._onedal_finalize_fit()
|
|
138
|
+
return self._onedal_estimator.covariance_
|
|
139
|
+
else:
|
|
140
|
+
raise AttributeError(
|
|
141
|
+
f"'{self.__class__.__name__}' object has no attribute 'covariance_'"
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def location_(self):
|
|
146
|
+
if hasattr(self, "_onedal_estimator"):
|
|
147
|
+
if self._need_to_finalize:
|
|
148
|
+
self._onedal_finalize_fit()
|
|
149
|
+
return self._onedal_estimator.location_
|
|
150
|
+
else:
|
|
151
|
+
raise AttributeError(
|
|
152
|
+
f"'{self.__class__.__name__}' object has no attribute 'location_'"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def _onedal_partial_fit(self, X, queue=None, check_input=True):
|
|
156
|
+
|
|
157
|
+
first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0
|
|
158
|
+
|
|
159
|
+
# finite check occurs on onedal side
|
|
160
|
+
if check_input:
|
|
161
|
+
if sklearn_check_version("1.2"):
|
|
162
|
+
self._validate_params()
|
|
163
|
+
|
|
164
|
+
if sklearn_check_version("1.0"):
|
|
165
|
+
X = self._validate_data(
|
|
166
|
+
X,
|
|
167
|
+
dtype=[np.float64, np.float32],
|
|
168
|
+
reset=first_pass,
|
|
169
|
+
copy=self.copy,
|
|
170
|
+
force_all_finite=False,
|
|
171
|
+
)
|
|
172
|
+
else:
|
|
173
|
+
X = check_array(
|
|
174
|
+
X,
|
|
175
|
+
dtype=[np.float64, np.float32],
|
|
176
|
+
copy=self.copy,
|
|
177
|
+
force_all_finite=False,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
onedal_params = {
|
|
181
|
+
"method": "dense",
|
|
182
|
+
"bias": True,
|
|
183
|
+
"assume_centered": self.assume_centered,
|
|
184
|
+
}
|
|
185
|
+
if not hasattr(self, "_onedal_estimator"):
|
|
186
|
+
self._onedal_estimator = self._onedal_incremental_covariance(**onedal_params)
|
|
187
|
+
try:
|
|
188
|
+
if first_pass:
|
|
189
|
+
self.n_samples_seen_ = X.shape[0]
|
|
190
|
+
self.n_features_in_ = X.shape[1]
|
|
191
|
+
else:
|
|
192
|
+
self.n_samples_seen_ += X.shape[0]
|
|
193
|
+
|
|
194
|
+
self._onedal_estimator.partial_fit(X, queue)
|
|
195
|
+
finally:
|
|
196
|
+
self._need_to_finalize = True
|
|
197
|
+
|
|
198
|
+
return self
|
|
199
|
+
|
|
200
|
+
def partial_fit(self, X, y=None, check_input=True):
|
|
201
|
+
"""
|
|
202
|
+
Incremental fit with X. All of X is processed as a single batch.
|
|
203
|
+
|
|
204
|
+
Parameters
|
|
205
|
+
----------
|
|
206
|
+
X : array-like of shape (n_samples, n_features)
|
|
207
|
+
Training data, where `n_samples` is the number of samples and
|
|
208
|
+
`n_features` is the number of features.
|
|
209
|
+
|
|
210
|
+
y : Ignored
|
|
211
|
+
Not used, present for API consistency by convention.
|
|
212
|
+
|
|
213
|
+
check_input : bool, default=True
|
|
214
|
+
Run check_array on X.
|
|
215
|
+
|
|
216
|
+
Returns
|
|
217
|
+
-------
|
|
218
|
+
self : object
|
|
219
|
+
Returns the instance itself.
|
|
220
|
+
"""
|
|
221
|
+
return dispatch(
|
|
222
|
+
self,
|
|
223
|
+
"partial_fit",
|
|
224
|
+
{
|
|
225
|
+
"onedal": self.__class__._onedal_partial_fit,
|
|
226
|
+
"sklearn": None,
|
|
227
|
+
},
|
|
228
|
+
X,
|
|
229
|
+
check_input=check_input,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
def fit(self, X, y=None):
|
|
233
|
+
"""
|
|
234
|
+
Fit the model with X, using minibatches of size batch_size.
|
|
235
|
+
|
|
236
|
+
Parameters
|
|
237
|
+
----------
|
|
238
|
+
X : array-like of shape (n_samples, n_features)
|
|
239
|
+
Training data, where `n_samples` is the number of samples and
|
|
240
|
+
`n_features` is the number of features.
|
|
241
|
+
|
|
242
|
+
y : Ignored
|
|
243
|
+
Not used, present for API consistency by convention.
|
|
244
|
+
|
|
245
|
+
Returns
|
|
246
|
+
-------
|
|
247
|
+
self : object
|
|
248
|
+
Returns the instance itself.
|
|
249
|
+
"""
|
|
250
|
+
|
|
251
|
+
return dispatch(
|
|
252
|
+
self,
|
|
253
|
+
"fit",
|
|
254
|
+
{
|
|
255
|
+
"onedal": self.__class__._onedal_fit,
|
|
256
|
+
"sklearn": None,
|
|
257
|
+
},
|
|
258
|
+
X,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
def _onedal_fit(self, X, queue=None):
|
|
262
|
+
self.n_samples_seen_ = 0
|
|
263
|
+
if hasattr(self, "_onedal_estimator"):
|
|
264
|
+
self._onedal_estimator._reset()
|
|
265
|
+
|
|
266
|
+
if sklearn_check_version("1.2"):
|
|
267
|
+
self._validate_params()
|
|
268
|
+
|
|
269
|
+
# finite check occurs on onedal side
|
|
270
|
+
if sklearn_check_version("1.0"):
|
|
271
|
+
X = self._validate_data(
|
|
272
|
+
X, dtype=[np.float64, np.float32], copy=self.copy, force_all_finite=False
|
|
273
|
+
)
|
|
274
|
+
else:
|
|
275
|
+
X = check_array(
|
|
276
|
+
X, dtype=[np.float64, np.float32], copy=self.copy, force_all_finite=False
|
|
277
|
+
)
|
|
278
|
+
self.n_features_in_ = X.shape[1]
|
|
279
|
+
|
|
280
|
+
self.batch_size_ = self.batch_size if self.batch_size else 5 * self.n_features_in_
|
|
281
|
+
|
|
282
|
+
if X.shape[0] == 1:
|
|
283
|
+
warnings.warn(
|
|
284
|
+
"Only one sample available. You may want to reshape your data array"
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
for batch in gen_batches(X.shape[0], self.batch_size_):
|
|
288
|
+
X_batch = X[batch]
|
|
289
|
+
self._onedal_partial_fit(X_batch, queue=queue, check_input=False)
|
|
290
|
+
|
|
291
|
+
self._onedal_finalize_fit()
|
|
292
|
+
|
|
293
|
+
return self
|
|
294
|
+
|
|
295
|
+
# expose sklearnex pairwise_distances if mahalanobis distance eventually supported
|
|
296
|
+
@wrap_output_data
|
|
297
|
+
def mahalanobis(self, X):
|
|
298
|
+
if sklearn_check_version("1.0"):
|
|
299
|
+
self._validate_data(X, reset=False, copy=self.copy)
|
|
300
|
+
else:
|
|
301
|
+
check_array(X, copy=self.copy)
|
|
302
|
+
|
|
303
|
+
precision = self.get_precision()
|
|
304
|
+
with config_context(assume_finite=True):
|
|
305
|
+
# compute mahalanobis distances
|
|
306
|
+
dist = pairwise_distances(
|
|
307
|
+
X, self.location_[np.newaxis, :], metric="mahalanobis", VI=precision
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
return np.reshape(dist, (len(X),)) ** 2
|
|
311
|
+
|
|
312
|
+
_onedal_cpu_supported = _onedal_supported
|
|
313
|
+
_onedal_gpu_supported = _onedal_supported
|
|
314
|
+
|
|
315
|
+
mahalanobis.__doc__ = sklearn_EmpiricalCovariance.mahalanobis.__doc__
|
|
316
|
+
error_norm.__doc__ = sklearn_EmpiricalCovariance.error_norm.__doc__
|
|
317
|
+
score.__doc__ = sklearn_EmpiricalCovariance.score.__doc__
|
|
@@ -17,6 +17,10 @@
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
import pytest
|
|
19
19
|
from numpy.testing import assert_allclose
|
|
20
|
+
from sklearn.covariance.tests.test_covariance import (
|
|
21
|
+
test_covariance,
|
|
22
|
+
test_EmpiricalCovariance_validates_mahalanobis,
|
|
23
|
+
)
|
|
20
24
|
|
|
21
25
|
from onedal.tests.utils._dataframes_support import (
|
|
22
26
|
_convert_to_dataframe,
|
|
@@ -26,13 +30,14 @@ from onedal.tests.utils._dataframes_support import (
|
|
|
26
30
|
|
|
27
31
|
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
28
32
|
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
29
|
-
|
|
33
|
+
@pytest.mark.parametrize("assume_centered", [True, False])
|
|
34
|
+
def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype, assume_centered):
|
|
30
35
|
from sklearnex.covariance import IncrementalEmpiricalCovariance
|
|
31
36
|
|
|
32
37
|
X = np.array([[0, 1], [0, 1]])
|
|
33
38
|
X = X.astype(dtype)
|
|
34
39
|
X_split = np.array_split(X, 2)
|
|
35
|
-
inccov = IncrementalEmpiricalCovariance()
|
|
40
|
+
inccov = IncrementalEmpiricalCovariance(assume_centered=assume_centered)
|
|
36
41
|
|
|
37
42
|
for i in range(2):
|
|
38
43
|
X_split_df = _convert_to_dataframe(
|
|
@@ -40,8 +45,12 @@ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype):
|
|
|
40
45
|
)
|
|
41
46
|
result = inccov.partial_fit(X_split_df)
|
|
42
47
|
|
|
43
|
-
|
|
44
|
-
|
|
48
|
+
if assume_centered:
|
|
49
|
+
expected_covariance = np.array([[0, 0], [0, 1]])
|
|
50
|
+
expected_means = np.array([0, 0])
|
|
51
|
+
else:
|
|
52
|
+
expected_covariance = np.array([[0, 0], [0, 0]])
|
|
53
|
+
expected_means = np.array([0, 1])
|
|
45
54
|
|
|
46
55
|
assert_allclose(expected_covariance, result.covariance_)
|
|
47
56
|
assert_allclose(expected_means, result.location_)
|
|
@@ -49,7 +58,7 @@ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype):
|
|
|
49
58
|
X = np.array([[1, 2], [3, 6]])
|
|
50
59
|
X = X.astype(dtype)
|
|
51
60
|
X_split = np.array_split(X, 2)
|
|
52
|
-
inccov = IncrementalEmpiricalCovariance()
|
|
61
|
+
inccov = IncrementalEmpiricalCovariance(assume_centered=assume_centered)
|
|
53
62
|
|
|
54
63
|
for i in range(2):
|
|
55
64
|
X_split_df = _convert_to_dataframe(
|
|
@@ -57,8 +66,12 @@ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype):
|
|
|
57
66
|
)
|
|
58
67
|
result = inccov.partial_fit(X_split_df)
|
|
59
68
|
|
|
60
|
-
|
|
61
|
-
|
|
69
|
+
if assume_centered:
|
|
70
|
+
expected_covariance = np.array([[5, 10], [10, 20]])
|
|
71
|
+
expected_means = np.array([0, 0])
|
|
72
|
+
else:
|
|
73
|
+
expected_covariance = np.array([[1, 2], [2, 4]])
|
|
74
|
+
expected_means = np.array([2, 4])
|
|
62
75
|
|
|
63
76
|
assert_allclose(expected_covariance, result.covariance_)
|
|
64
77
|
assert_allclose(expected_means, result.location_)
|
|
@@ -87,9 +100,9 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, batch_size, dtype):
|
|
|
87
100
|
|
|
88
101
|
|
|
89
102
|
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
90
|
-
@pytest.mark.parametrize("num_batches", [2,
|
|
91
|
-
@pytest.mark.parametrize("row_count", [100, 1000
|
|
92
|
-
@pytest.mark.parametrize("column_count", [10, 100
|
|
103
|
+
@pytest.mark.parametrize("num_batches", [2, 10])
|
|
104
|
+
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
105
|
+
@pytest.mark.parametrize("column_count", [10, 100])
|
|
93
106
|
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
94
107
|
def test_sklearnex_partial_fit_on_random_data(
|
|
95
108
|
dataframe, queue, num_batches, row_count, column_count, dtype
|
|
@@ -117,12 +130,13 @@ def test_sklearnex_partial_fit_on_random_data(
|
|
|
117
130
|
|
|
118
131
|
|
|
119
132
|
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
120
|
-
@pytest.mark.parametrize("num_batches", [2,
|
|
121
|
-
@pytest.mark.parametrize("row_count", [100, 1000
|
|
122
|
-
@pytest.mark.parametrize("column_count", [10, 100
|
|
133
|
+
@pytest.mark.parametrize("num_batches", [2, 10])
|
|
134
|
+
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
135
|
+
@pytest.mark.parametrize("column_count", [10, 100])
|
|
123
136
|
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
137
|
+
@pytest.mark.parametrize("assume_centered", [True, False])
|
|
124
138
|
def test_sklearnex_fit_on_random_data(
|
|
125
|
-
dataframe, queue, num_batches, row_count, column_count, dtype
|
|
139
|
+
dataframe, queue, num_batches, row_count, column_count, dtype, assume_centered
|
|
126
140
|
):
|
|
127
141
|
from sklearnex.covariance import IncrementalEmpiricalCovariance
|
|
128
142
|
|
|
@@ -132,12 +146,35 @@ def test_sklearnex_fit_on_random_data(
|
|
|
132
146
|
X = X.astype(dtype)
|
|
133
147
|
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
134
148
|
batch_size = row_count // num_batches
|
|
135
|
-
inccov = IncrementalEmpiricalCovariance(
|
|
149
|
+
inccov = IncrementalEmpiricalCovariance(
|
|
150
|
+
batch_size=batch_size, assume_centered=assume_centered
|
|
151
|
+
)
|
|
136
152
|
|
|
137
153
|
result = inccov.fit(X_df)
|
|
138
154
|
|
|
139
|
-
|
|
140
|
-
|
|
155
|
+
if assume_centered:
|
|
156
|
+
expected_covariance = np.dot(X.T, X) / X.shape[0]
|
|
157
|
+
expected_means = np.zeros_like(X[0])
|
|
158
|
+
else:
|
|
159
|
+
expected_covariance = np.cov(X.T, bias=1)
|
|
160
|
+
expected_means = np.mean(X, axis=0)
|
|
141
161
|
|
|
142
162
|
assert_allclose(expected_covariance, result.covariance_, atol=1e-6)
|
|
143
163
|
assert_allclose(expected_means, result.location_, atol=1e-6)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# Monkeypatch IncrementalEmpiricalCovariance into relevant sklearn.covariance tests
|
|
167
|
+
@pytest.mark.allow_sklearn_fallback
|
|
168
|
+
@pytest.mark.parametrize(
|
|
169
|
+
"sklearn_test",
|
|
170
|
+
[
|
|
171
|
+
test_covariance,
|
|
172
|
+
test_EmpiricalCovariance_validates_mahalanobis,
|
|
173
|
+
],
|
|
174
|
+
)
|
|
175
|
+
def test_IncrementalEmpiricalCovariance_against_sklearn(monkeypatch, sklearn_test):
|
|
176
|
+
from sklearnex.covariance import IncrementalEmpiricalCovariance
|
|
177
|
+
|
|
178
|
+
class_name = ".".join([sklearn_test.__module__, "EmpiricalCovariance"])
|
|
179
|
+
monkeypatch.setattr(class_name, IncrementalEmpiricalCovariance)
|
|
180
|
+
sklearn_test()
|
|
@@ -21,6 +21,7 @@ from daal4py.sklearn._utils import daal_check_version
|
|
|
21
21
|
if daal_check_version((2024, "P", 100)):
|
|
22
22
|
import numbers
|
|
23
23
|
from math import sqrt
|
|
24
|
+
from warnings import warn
|
|
24
25
|
|
|
25
26
|
import numpy as np
|
|
26
27
|
from scipy.sparse import issparse
|
|
@@ -35,9 +36,13 @@ if daal_check_version((2024, "P", 100)):
|
|
|
35
36
|
if sklearn_check_version("1.1") and not sklearn_check_version("1.2"):
|
|
36
37
|
from sklearn.utils import check_scalar
|
|
37
38
|
|
|
39
|
+
if sklearn_check_version("1.2"):
|
|
40
|
+
from sklearn.utils._param_validation import StrOptions
|
|
41
|
+
|
|
38
42
|
from sklearn.decomposition import PCA as sklearn_PCA
|
|
39
43
|
|
|
40
44
|
from onedal.decomposition import PCA as onedal_PCA
|
|
45
|
+
from sklearnex.utils import get_namespace
|
|
41
46
|
|
|
42
47
|
@control_n_jobs(decorated_methods=["fit", "transform", "fit_transform"])
|
|
43
48
|
class PCA(sklearn_PCA):
|
|
@@ -45,6 +50,16 @@ if daal_check_version((2024, "P", 100)):
|
|
|
45
50
|
|
|
46
51
|
if sklearn_check_version("1.2"):
|
|
47
52
|
_parameter_constraints: dict = {**sklearn_PCA._parameter_constraints}
|
|
53
|
+
# "onedal_svd" solver uses oneDAL's PCA-SVD algorithm
|
|
54
|
+
# and required for testing purposes to fully enable it in future.
|
|
55
|
+
# "covariance_eigh" solver is added for ability to explicitly request
|
|
56
|
+
# oneDAL's PCA-Covariance algorithm using any sklearn version < 1.5.
|
|
57
|
+
_parameter_constraints["svd_solver"] = [
|
|
58
|
+
StrOptions(
|
|
59
|
+
_parameter_constraints["svd_solver"][0].options
|
|
60
|
+
| {"onedal_svd", "covariance_eigh"}
|
|
61
|
+
)
|
|
62
|
+
]
|
|
48
63
|
|
|
49
64
|
if sklearn_check_version("1.1"):
|
|
50
65
|
|
|
@@ -107,7 +122,7 @@ if daal_check_version((2024, "P", 100)):
|
|
|
107
122
|
target_type=numbers.Integral,
|
|
108
123
|
)
|
|
109
124
|
|
|
110
|
-
|
|
125
|
+
return dispatch(
|
|
111
126
|
self,
|
|
112
127
|
"fit",
|
|
113
128
|
{
|
|
@@ -116,7 +131,6 @@ if daal_check_version((2024, "P", 100)):
|
|
|
116
131
|
},
|
|
117
132
|
X,
|
|
118
133
|
)
|
|
119
|
-
return U, S, Vt
|
|
120
134
|
|
|
121
135
|
def _onedal_fit(self, X, queue=None):
|
|
122
136
|
X = self._validate_data(
|
|
@@ -129,7 +143,7 @@ if daal_check_version((2024, "P", 100)):
|
|
|
129
143
|
onedal_params = {
|
|
130
144
|
"n_components": self.n_components,
|
|
131
145
|
"is_deterministic": True,
|
|
132
|
-
"method": "cov",
|
|
146
|
+
"method": "svd" if self._fit_svd_solver == "onedal_svd" else "cov",
|
|
133
147
|
"whiten": self.whiten,
|
|
134
148
|
}
|
|
135
149
|
self._onedal_estimator = onedal_PCA(**onedal_params)
|
|
@@ -140,7 +154,13 @@ if daal_check_version((2024, "P", 100)):
|
|
|
140
154
|
S = self.singular_values_
|
|
141
155
|
Vt = self.components_
|
|
142
156
|
|
|
143
|
-
|
|
157
|
+
if sklearn_check_version("1.5"):
|
|
158
|
+
xp, _ = get_namespace(X)
|
|
159
|
+
x_is_centered = not self.copy
|
|
160
|
+
|
|
161
|
+
return U, S, Vt, X, x_is_centered, xp
|
|
162
|
+
else:
|
|
163
|
+
return U, S, Vt
|
|
144
164
|
|
|
145
165
|
@wrap_output_data
|
|
146
166
|
def transform(self, X):
|
|
@@ -156,32 +176,39 @@ if daal_check_version((2024, "P", 100)):
|
|
|
156
176
|
|
|
157
177
|
def _onedal_transform(self, X, queue=None):
|
|
158
178
|
check_is_fitted(self)
|
|
179
|
+
if sklearn_check_version("1.0"):
|
|
180
|
+
self._check_feature_names(X, reset=False)
|
|
159
181
|
X = self._validate_data(
|
|
160
182
|
X,
|
|
161
183
|
dtype=[np.float64, np.float32],
|
|
162
184
|
reset=False,
|
|
163
185
|
)
|
|
164
186
|
self._validate_n_features_in_after_fitting(X)
|
|
165
|
-
if sklearn_check_version("1.0"):
|
|
166
|
-
self._check_feature_names(X, reset=False)
|
|
167
187
|
|
|
168
188
|
return self._onedal_estimator.predict(X, queue=queue)
|
|
169
189
|
|
|
170
190
|
def fit_transform(self, X, y=None):
|
|
171
|
-
|
|
172
|
-
|
|
191
|
+
if sklearn_check_version("1.5"):
|
|
192
|
+
U, S, Vt, X_fit, x_is_centered, xp = self._fit(X)
|
|
193
|
+
else:
|
|
194
|
+
U, S, Vt = self._fit(X)
|
|
195
|
+
X_fit = X
|
|
196
|
+
if hasattr(self, "_onedal_estimator"):
|
|
173
197
|
# oneDAL PCA was fit
|
|
174
198
|
return self.transform(X)
|
|
175
|
-
|
|
199
|
+
elif U is not None:
|
|
176
200
|
# Scikit-learn PCA was fit
|
|
177
201
|
U = U[:, : self.n_components_]
|
|
178
202
|
|
|
179
203
|
if self.whiten:
|
|
180
|
-
U *= sqrt(
|
|
204
|
+
U *= sqrt(X_fit.shape[0] - 1)
|
|
181
205
|
else:
|
|
182
206
|
U *= S[: self.n_components_]
|
|
183
207
|
|
|
184
208
|
return U
|
|
209
|
+
else:
|
|
210
|
+
# Scikit-learn PCA["covariance_eigh"] was fit
|
|
211
|
+
return self._transform(X_fit, xp, x_is_centered=x_is_centered)
|
|
185
212
|
|
|
186
213
|
def _onedal_supported(self, method_name, X):
|
|
187
214
|
class_name = self.__class__.__name__
|
|
@@ -199,7 +226,13 @@ if daal_check_version((2024, "P", 100)):
|
|
|
199
226
|
),
|
|
200
227
|
(
|
|
201
228
|
self._is_solver_compatible_with_onedal(shape_tuple),
|
|
202
|
-
|
|
229
|
+
(
|
|
230
|
+
"Only 'covariance_eigh' and 'onedal_svd' "
|
|
231
|
+
"solvers are supported."
|
|
232
|
+
if sklearn_check_version("1.5")
|
|
233
|
+
else "Only 'full', 'covariance_eigh' and 'onedal_svd' "
|
|
234
|
+
"solvers are supported."
|
|
235
|
+
),
|
|
203
236
|
),
|
|
204
237
|
(not issparse(X), "oneDAL PCA does not support sparse data"),
|
|
205
238
|
]
|
|
@@ -254,7 +287,13 @@ if daal_check_version((2024, "P", 100)):
|
|
|
254
287
|
|
|
255
288
|
if self._fit_svd_solver == "auto":
|
|
256
289
|
if sklearn_check_version("1.1"):
|
|
257
|
-
if
|
|
290
|
+
if (
|
|
291
|
+
sklearn_check_version("1.5")
|
|
292
|
+
and shape_tuple[1] <= 1_000
|
|
293
|
+
and shape_tuple[0] >= 10 * shape_tuple[1]
|
|
294
|
+
):
|
|
295
|
+
self._fit_svd_solver = "covariance_eigh"
|
|
296
|
+
elif max(shape_tuple) <= 500 or n_components == "mle":
|
|
258
297
|
self._fit_svd_solver = "full"
|
|
259
298
|
elif 1 <= n_components < 0.8 * n_sf_min:
|
|
260
299
|
self._fit_svd_solver = "randomized"
|
|
@@ -288,7 +327,23 @@ if daal_check_version((2024, "P", 100)):
|
|
|
288
327
|
else:
|
|
289
328
|
self._fit_svd_solver = "full"
|
|
290
329
|
|
|
291
|
-
|
|
330
|
+
# Use oneDAL in next cases:
|
|
331
|
+
# 1. oneDAL SVD solver is explicitly set
|
|
332
|
+
# 2. solver is set or dispatched to "covariance_eigh"
|
|
333
|
+
# 3. solver is set or dispatched to "full" and sklearn version < 1.5
|
|
334
|
+
# 4. solver is set to "auto" and dispatched to "full"
|
|
335
|
+
if self._fit_svd_solver in ["onedal_svd", "covariance_eigh"]:
|
|
336
|
+
return True
|
|
337
|
+
elif not sklearn_check_version("1.5") and self._fit_svd_solver == "full":
|
|
338
|
+
self._fit_svd_solver = "covariance_eigh"
|
|
339
|
+
return True
|
|
340
|
+
elif self.svd_solver == "auto" and self._fit_svd_solver == "full":
|
|
341
|
+
warn(
|
|
342
|
+
"Sklearnex always uses `covariance_eigh` solver instead of `full` "
|
|
343
|
+
"when `svd_solver` parameter is set to `auto` "
|
|
344
|
+
"for performance purposes."
|
|
345
|
+
)
|
|
346
|
+
self._fit_svd_solver = "covariance_eigh"
|
|
292
347
|
return True
|
|
293
348
|
else:
|
|
294
349
|
return False
|