scikit-learn-intelex 2025.1.0__py312-none-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- daal4py/__init__.py +73 -0
- daal4py/__main__.py +58 -0
- daal4py/_daal4py.cpython-312-x86_64-linux-gnu.so +0 -0
- daal4py/doc/third-party-programs.txt +424 -0
- daal4py/mb/__init__.py +19 -0
- daal4py/mb/model_builders.py +377 -0
- daal4py/mpi_transceiver.cpython-312-x86_64-linux-gnu.so +0 -0
- daal4py/sklearn/__init__.py +40 -0
- daal4py/sklearn/_n_jobs_support.py +248 -0
- daal4py/sklearn/_utils.py +245 -0
- daal4py/sklearn/cluster/__init__.py +20 -0
- daal4py/sklearn/cluster/dbscan.py +165 -0
- daal4py/sklearn/cluster/k_means.py +597 -0
- daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
- daal4py/sklearn/decomposition/__init__.py +19 -0
- daal4py/sklearn/decomposition/_pca.py +524 -0
- daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
- daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
- daal4py/sklearn/ensemble/__init__.py +27 -0
- daal4py/sklearn/ensemble/_forest.py +1397 -0
- daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
- daal4py/sklearn/linear_model/__init__.py +29 -0
- daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
- daal4py/sklearn/linear_model/_linear.py +272 -0
- daal4py/sklearn/linear_model/_ridge.py +325 -0
- daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
- daal4py/sklearn/linear_model/linear.py +17 -0
- daal4py/sklearn/linear_model/logistic_loss.py +195 -0
- daal4py/sklearn/linear_model/logistic_path.py +1026 -0
- daal4py/sklearn/linear_model/ridge.py +17 -0
- daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
- daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
- daal4py/sklearn/manifold/__init__.py +19 -0
- daal4py/sklearn/manifold/_t_sne.py +405 -0
- daal4py/sklearn/metrics/__init__.py +20 -0
- daal4py/sklearn/metrics/_pairwise.py +236 -0
- daal4py/sklearn/metrics/_ranking.py +210 -0
- daal4py/sklearn/model_selection/__init__.py +19 -0
- daal4py/sklearn/model_selection/_split.py +309 -0
- daal4py/sklearn/model_selection/tests/test_split.py +56 -0
- daal4py/sklearn/monkeypatch/__init__.py +0 -0
- daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
- daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
- daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
- daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
- daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
- daal4py/sklearn/neighbors/__init__.py +21 -0
- daal4py/sklearn/neighbors/_base.py +503 -0
- daal4py/sklearn/neighbors/_classification.py +139 -0
- daal4py/sklearn/neighbors/_regression.py +74 -0
- daal4py/sklearn/neighbors/_unsupervised.py +55 -0
- daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
- daal4py/sklearn/svm/__init__.py +19 -0
- daal4py/sklearn/svm/svm.py +734 -0
- daal4py/sklearn/utils/__init__.py +21 -0
- daal4py/sklearn/utils/base.py +75 -0
- daal4py/sklearn/utils/tests/test_utils.py +51 -0
- daal4py/sklearn/utils/validation.py +693 -0
- onedal/__init__.py +83 -0
- onedal/_config.py +54 -0
- onedal/_device_offload.py +222 -0
- onedal/_onedal_py_dpc.cpython-312-x86_64-linux-gnu.so +0 -0
- onedal/_onedal_py_host.cpython-312-x86_64-linux-gnu.so +0 -0
- onedal/_onedal_py_spmd_dpc.cpython-312-x86_64-linux-gnu.so +0 -0
- onedal/basic_statistics/__init__.py +20 -0
- onedal/basic_statistics/basic_statistics.py +107 -0
- onedal/basic_statistics/incremental_basic_statistics.py +160 -0
- onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
- onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
- onedal/cluster/__init__.py +27 -0
- onedal/cluster/dbscan.py +110 -0
- onedal/cluster/kmeans.py +564 -0
- onedal/cluster/kmeans_init.py +115 -0
- onedal/cluster/tests/test_dbscan.py +125 -0
- onedal/cluster/tests/test_kmeans.py +88 -0
- onedal/cluster/tests/test_kmeans_init.py +93 -0
- onedal/common/_base.py +38 -0
- onedal/common/_estimator_checks.py +47 -0
- onedal/common/_mixin.py +62 -0
- onedal/common/_policy.py +59 -0
- onedal/common/_spmd_policy.py +30 -0
- onedal/common/hyperparameters.py +125 -0
- onedal/common/tests/test_policy.py +76 -0
- onedal/covariance/__init__.py +20 -0
- onedal/covariance/covariance.py +125 -0
- onedal/covariance/incremental_covariance.py +146 -0
- onedal/covariance/tests/test_covariance.py +50 -0
- onedal/covariance/tests/test_incremental_covariance.py +122 -0
- onedal/datatypes/__init__.py +19 -0
- onedal/datatypes/_data_conversion.py +154 -0
- onedal/datatypes/tests/common.py +126 -0
- onedal/datatypes/tests/test_data.py +414 -0
- onedal/decomposition/__init__.py +20 -0
- onedal/decomposition/incremental_pca.py +204 -0
- onedal/decomposition/pca.py +186 -0
- onedal/decomposition/tests/test_incremental_pca.py +198 -0
- onedal/ensemble/__init__.py +29 -0
- onedal/ensemble/forest.py +727 -0
- onedal/ensemble/tests/test_random_forest.py +97 -0
- onedal/linear_model/__init__.py +27 -0
- onedal/linear_model/incremental_linear_model.py +258 -0
- onedal/linear_model/linear_model.py +329 -0
- onedal/linear_model/logistic_regression.py +249 -0
- onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
- onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
- onedal/linear_model/tests/test_linear_regression.py +250 -0
- onedal/linear_model/tests/test_logistic_regression.py +95 -0
- onedal/linear_model/tests/test_ridge.py +95 -0
- onedal/neighbors/__init__.py +19 -0
- onedal/neighbors/neighbors.py +767 -0
- onedal/neighbors/tests/test_knn_classification.py +49 -0
- onedal/primitives/__init__.py +27 -0
- onedal/primitives/get_tree.py +25 -0
- onedal/primitives/kernel_functions.py +153 -0
- onedal/primitives/tests/test_kernel_functions.py +159 -0
- onedal/spmd/__init__.py +25 -0
- onedal/spmd/_base.py +30 -0
- onedal/spmd/basic_statistics/__init__.py +20 -0
- onedal/spmd/basic_statistics/basic_statistics.py +30 -0
- onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
- onedal/spmd/cluster/__init__.py +28 -0
- onedal/spmd/cluster/dbscan.py +23 -0
- onedal/spmd/cluster/kmeans.py +56 -0
- onedal/spmd/covariance/__init__.py +20 -0
- onedal/spmd/covariance/covariance.py +26 -0
- onedal/spmd/covariance/incremental_covariance.py +82 -0
- onedal/spmd/decomposition/__init__.py +20 -0
- onedal/spmd/decomposition/incremental_pca.py +117 -0
- onedal/spmd/decomposition/pca.py +26 -0
- onedal/spmd/ensemble/__init__.py +19 -0
- onedal/spmd/ensemble/forest.py +28 -0
- onedal/spmd/linear_model/__init__.py +21 -0
- onedal/spmd/linear_model/incremental_linear_model.py +97 -0
- onedal/spmd/linear_model/linear_model.py +30 -0
- onedal/spmd/linear_model/logistic_regression.py +38 -0
- onedal/spmd/neighbors/__init__.py +19 -0
- onedal/spmd/neighbors/neighbors.py +75 -0
- onedal/svm/__init__.py +19 -0
- onedal/svm/svm.py +556 -0
- onedal/svm/tests/test_csr_svm.py +351 -0
- onedal/svm/tests/test_nusvc.py +204 -0
- onedal/svm/tests/test_nusvr.py +210 -0
- onedal/svm/tests/test_svc.py +176 -0
- onedal/svm/tests/test_svr.py +243 -0
- onedal/tests/test_common.py +57 -0
- onedal/tests/utils/_dataframes_support.py +162 -0
- onedal/tests/utils/_device_selection.py +102 -0
- onedal/utils/__init__.py +49 -0
- onedal/utils/_array_api.py +81 -0
- onedal/utils/_dpep_helpers.py +56 -0
- onedal/utils/validation.py +440 -0
- scikit_learn_intelex-2025.1.0.dist-info/LICENSE.txt +202 -0
- scikit_learn_intelex-2025.1.0.dist-info/METADATA +231 -0
- scikit_learn_intelex-2025.1.0.dist-info/RECORD +280 -0
- scikit_learn_intelex-2025.1.0.dist-info/WHEEL +5 -0
- scikit_learn_intelex-2025.1.0.dist-info/top_level.txt +3 -0
- sklearnex/__init__.py +66 -0
- sklearnex/__main__.py +58 -0
- sklearnex/_config.py +116 -0
- sklearnex/_device_offload.py +126 -0
- sklearnex/_utils.py +132 -0
- sklearnex/basic_statistics/__init__.py +20 -0
- sklearnex/basic_statistics/basic_statistics.py +230 -0
- sklearnex/basic_statistics/incremental_basic_statistics.py +345 -0
- sklearnex/basic_statistics/tests/test_basic_statistics.py +270 -0
- sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +404 -0
- sklearnex/cluster/__init__.py +20 -0
- sklearnex/cluster/dbscan.py +197 -0
- sklearnex/cluster/k_means.py +395 -0
- sklearnex/cluster/tests/test_dbscan.py +38 -0
- sklearnex/cluster/tests/test_kmeans.py +159 -0
- sklearnex/conftest.py +82 -0
- sklearnex/covariance/__init__.py +19 -0
- sklearnex/covariance/incremental_covariance.py +398 -0
- sklearnex/covariance/tests/test_incremental_covariance.py +237 -0
- sklearnex/decomposition/__init__.py +19 -0
- sklearnex/decomposition/pca.py +425 -0
- sklearnex/decomposition/tests/test_pca.py +58 -0
- sklearnex/dispatcher.py +543 -0
- sklearnex/doc/third-party-programs.txt +424 -0
- sklearnex/ensemble/__init__.py +29 -0
- sklearnex/ensemble/_forest.py +2029 -0
- sklearnex/ensemble/tests/test_forest.py +135 -0
- sklearnex/glob/__main__.py +72 -0
- sklearnex/glob/dispatcher.py +101 -0
- sklearnex/linear_model/__init__.py +32 -0
- sklearnex/linear_model/coordinate_descent.py +30 -0
- sklearnex/linear_model/incremental_linear.py +482 -0
- sklearnex/linear_model/incremental_ridge.py +425 -0
- sklearnex/linear_model/linear.py +341 -0
- sklearnex/linear_model/logistic_regression.py +413 -0
- sklearnex/linear_model/ridge.py +24 -0
- sklearnex/linear_model/tests/test_incremental_linear.py +207 -0
- sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
- sklearnex/linear_model/tests/test_linear.py +167 -0
- sklearnex/linear_model/tests/test_logreg.py +134 -0
- sklearnex/manifold/__init__.py +19 -0
- sklearnex/manifold/t_sne.py +21 -0
- sklearnex/manifold/tests/test_tsne.py +26 -0
- sklearnex/metrics/__init__.py +23 -0
- sklearnex/metrics/pairwise.py +22 -0
- sklearnex/metrics/ranking.py +20 -0
- sklearnex/metrics/tests/test_metrics.py +39 -0
- sklearnex/model_selection/__init__.py +21 -0
- sklearnex/model_selection/split.py +22 -0
- sklearnex/model_selection/tests/test_model_selection.py +34 -0
- sklearnex/neighbors/__init__.py +27 -0
- sklearnex/neighbors/_lof.py +236 -0
- sklearnex/neighbors/common.py +310 -0
- sklearnex/neighbors/knn_classification.py +231 -0
- sklearnex/neighbors/knn_regression.py +207 -0
- sklearnex/neighbors/knn_unsupervised.py +178 -0
- sklearnex/neighbors/tests/test_neighbors.py +82 -0
- sklearnex/preview/__init__.py +17 -0
- sklearnex/preview/covariance/__init__.py +19 -0
- sklearnex/preview/covariance/covariance.py +138 -0
- sklearnex/preview/covariance/tests/test_covariance.py +66 -0
- sklearnex/preview/decomposition/__init__.py +19 -0
- sklearnex/preview/decomposition/incremental_pca.py +233 -0
- sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
- sklearnex/preview/linear_model/__init__.py +19 -0
- sklearnex/preview/linear_model/ridge.py +424 -0
- sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
- sklearnex/spmd/__init__.py +25 -0
- sklearnex/spmd/basic_statistics/__init__.py +20 -0
- sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
- sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
- sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
- sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
- sklearnex/spmd/cluster/__init__.py +30 -0
- sklearnex/spmd/cluster/dbscan.py +50 -0
- sklearnex/spmd/cluster/kmeans.py +21 -0
- sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
- sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
- sklearnex/spmd/covariance/__init__.py +20 -0
- sklearnex/spmd/covariance/covariance.py +21 -0
- sklearnex/spmd/covariance/incremental_covariance.py +37 -0
- sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
- sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
- sklearnex/spmd/decomposition/__init__.py +20 -0
- sklearnex/spmd/decomposition/incremental_pca.py +30 -0
- sklearnex/spmd/decomposition/pca.py +21 -0
- sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
- sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
- sklearnex/spmd/ensemble/__init__.py +19 -0
- sklearnex/spmd/ensemble/forest.py +71 -0
- sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
- sklearnex/spmd/linear_model/__init__.py +21 -0
- sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
- sklearnex/spmd/linear_model/linear_model.py +21 -0
- sklearnex/spmd/linear_model/logistic_regression.py +21 -0
- sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
- sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
- sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
- sklearnex/spmd/neighbors/__init__.py +19 -0
- sklearnex/spmd/neighbors/neighbors.py +25 -0
- sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
- sklearnex/svm/__init__.py +29 -0
- sklearnex/svm/_common.py +339 -0
- sklearnex/svm/nusvc.py +371 -0
- sklearnex/svm/nusvr.py +170 -0
- sklearnex/svm/svc.py +399 -0
- sklearnex/svm/svr.py +167 -0
- sklearnex/svm/tests/test_svm.py +93 -0
- sklearnex/tests/test_common.py +390 -0
- sklearnex/tests/test_config.py +123 -0
- sklearnex/tests/test_memory_usage.py +379 -0
- sklearnex/tests/test_monkeypatch.py +276 -0
- sklearnex/tests/test_n_jobs_support.py +108 -0
- sklearnex/tests/test_parallel.py +48 -0
- sklearnex/tests/test_patching.py +385 -0
- sklearnex/tests/test_run_to_run_stability.py +321 -0
- sklearnex/tests/utils/__init__.py +44 -0
- sklearnex/tests/utils/base.py +371 -0
- sklearnex/tests/utils/spmd.py +198 -0
- sklearnex/utils/__init__.py +19 -0
- sklearnex/utils/_array_api.py +82 -0
- sklearnex/utils/parallel.py +59 -0
- sklearnex/utils/tests/test_finite.py +89 -0
- sklearnex/utils/validation.py +17 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# ==============================================================================
|
|
2
|
+
# Copyright 2014 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ==============================================================================
|
|
16
|
+
|
|
17
|
+
from ._pca import PCA
|
|
18
|
+
|
|
19
|
+
__all__ = ["PCA"]
|
|
@@ -0,0 +1,524 @@
|
|
|
1
|
+
# ==============================================================================
|
|
2
|
+
# Copyright 2014 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ==============================================================================
|
|
16
|
+
|
|
17
|
+
import numbers
|
|
18
|
+
from math import sqrt
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
from scipy.sparse import issparse
|
|
22
|
+
from sklearn.utils import check_array
|
|
23
|
+
from sklearn.utils.extmath import stable_cumsum
|
|
24
|
+
from sklearn.utils.validation import check_is_fitted
|
|
25
|
+
|
|
26
|
+
import daal4py
|
|
27
|
+
|
|
28
|
+
from .._n_jobs_support import control_n_jobs
|
|
29
|
+
from .._utils import PatchingConditionsChain, getFPType, sklearn_check_version
|
|
30
|
+
|
|
31
|
+
if sklearn_check_version("1.4"):
|
|
32
|
+
from sklearn.utils._array_api import get_namespace
|
|
33
|
+
|
|
34
|
+
if sklearn_check_version("1.3"):
|
|
35
|
+
from sklearn.base import _fit_context
|
|
36
|
+
|
|
37
|
+
if sklearn_check_version("1.1"):
|
|
38
|
+
from sklearn.utils import check_scalar
|
|
39
|
+
|
|
40
|
+
from sklearn.decomposition._pca import PCA as PCA_original
|
|
41
|
+
from sklearn.decomposition._pca import _infer_dimension
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@control_n_jobs(decorated_methods=["fit", "transform"])
|
|
45
|
+
class PCA(PCA_original):
|
|
46
|
+
__doc__ = PCA_original.__doc__
|
|
47
|
+
|
|
48
|
+
if sklearn_check_version("1.2"):
|
|
49
|
+
_parameter_constraints: dict = {**PCA_original._parameter_constraints}
|
|
50
|
+
|
|
51
|
+
if sklearn_check_version("1.1"):
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
n_components=None,
|
|
56
|
+
copy=True,
|
|
57
|
+
whiten=False,
|
|
58
|
+
svd_solver="auto",
|
|
59
|
+
tol=0.0,
|
|
60
|
+
iterated_power="auto",
|
|
61
|
+
n_oversamples=10,
|
|
62
|
+
power_iteration_normalizer="auto",
|
|
63
|
+
random_state=None,
|
|
64
|
+
):
|
|
65
|
+
self.n_components = n_components
|
|
66
|
+
self.copy = copy
|
|
67
|
+
self.whiten = whiten
|
|
68
|
+
self.svd_solver = svd_solver
|
|
69
|
+
self.tol = tol
|
|
70
|
+
self.iterated_power = iterated_power
|
|
71
|
+
self.n_oversamples = n_oversamples
|
|
72
|
+
self.power_iteration_normalizer = power_iteration_normalizer
|
|
73
|
+
self.random_state = random_state
|
|
74
|
+
|
|
75
|
+
else:
|
|
76
|
+
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
n_components=None,
|
|
80
|
+
copy=True,
|
|
81
|
+
whiten=False,
|
|
82
|
+
svd_solver="auto",
|
|
83
|
+
tol=0.0,
|
|
84
|
+
iterated_power="auto",
|
|
85
|
+
random_state=None,
|
|
86
|
+
):
|
|
87
|
+
self.n_components = n_components
|
|
88
|
+
self.copy = copy
|
|
89
|
+
self.whiten = whiten
|
|
90
|
+
self.svd_solver = svd_solver
|
|
91
|
+
self.tol = tol
|
|
92
|
+
self.iterated_power = iterated_power
|
|
93
|
+
self.random_state = random_state
|
|
94
|
+
|
|
95
|
+
def _validate_n_components(self, n_components, n_samples, n_features):
|
|
96
|
+
if n_components == "mle":
|
|
97
|
+
if n_samples < n_features:
|
|
98
|
+
raise ValueError(
|
|
99
|
+
"n_components='mle' is only supported " "if n_samples >= n_features"
|
|
100
|
+
)
|
|
101
|
+
elif not 0 <= n_components <= min(n_samples, n_features):
|
|
102
|
+
raise ValueError(
|
|
103
|
+
"n_components=%r must be between 0 and "
|
|
104
|
+
"min(n_samples, n_features)=%r with "
|
|
105
|
+
"svd_solver='full'" % (n_components, min(n_samples, n_features))
|
|
106
|
+
)
|
|
107
|
+
elif n_components >= 1:
|
|
108
|
+
if not isinstance(n_components, numbers.Integral):
|
|
109
|
+
raise ValueError(
|
|
110
|
+
"n_components=%r must be of type int "
|
|
111
|
+
"when greater than or equal to 1, "
|
|
112
|
+
"was of type=%r" % (n_components, type(n_components))
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def _fit_full_daal4py(self, X, n_components):
|
|
116
|
+
n_samples, n_features = X.shape
|
|
117
|
+
n_sf_min = min(n_samples, n_features)
|
|
118
|
+
|
|
119
|
+
if n_components == "mle":
|
|
120
|
+
daal_n_components = n_features
|
|
121
|
+
elif n_components < 1:
|
|
122
|
+
daal_n_components = n_sf_min
|
|
123
|
+
else:
|
|
124
|
+
daal_n_components = n_components
|
|
125
|
+
|
|
126
|
+
fpType = getFPType(X)
|
|
127
|
+
|
|
128
|
+
covariance_algo = daal4py.covariance(
|
|
129
|
+
fptype=fpType, outputMatrixType="covarianceMatrix"
|
|
130
|
+
)
|
|
131
|
+
covariance_res = covariance_algo.compute(X)
|
|
132
|
+
|
|
133
|
+
self.mean_ = covariance_res.mean.ravel()
|
|
134
|
+
covariance = covariance_res.covariance
|
|
135
|
+
variances_ = np.array([covariance[i, i] for i in range(n_features)])
|
|
136
|
+
|
|
137
|
+
pca_alg = daal4py.pca(
|
|
138
|
+
fptype=fpType,
|
|
139
|
+
method="correlationDense",
|
|
140
|
+
resultsToCompute="eigenvalue",
|
|
141
|
+
isDeterministic=True,
|
|
142
|
+
nComponents=daal_n_components,
|
|
143
|
+
)
|
|
144
|
+
pca_res = pca_alg.compute(X, covariance)
|
|
145
|
+
|
|
146
|
+
components_ = pca_res.eigenvectors
|
|
147
|
+
explained_variance_ = np.maximum(pca_res.eigenvalues.ravel(), 0)
|
|
148
|
+
tot_var = explained_variance_.sum()
|
|
149
|
+
explained_variance_ratio_ = explained_variance_ / tot_var
|
|
150
|
+
|
|
151
|
+
if n_components == "mle":
|
|
152
|
+
n_components = _infer_dimension(explained_variance_, n_samples)
|
|
153
|
+
elif 0 < n_components < 1.0:
|
|
154
|
+
ratio_cumsum = stable_cumsum(explained_variance_ratio_)
|
|
155
|
+
n_components = np.searchsorted(ratio_cumsum, n_components, side="right") + 1
|
|
156
|
+
|
|
157
|
+
if n_components < n_sf_min:
|
|
158
|
+
if explained_variance_.shape[0] == n_sf_min:
|
|
159
|
+
self.noise_variance_ = explained_variance_[n_components:].mean()
|
|
160
|
+
else:
|
|
161
|
+
resid_var_ = variances_.sum()
|
|
162
|
+
resid_var_ -= explained_variance_[:n_components].sum()
|
|
163
|
+
self.noise_variance_ = resid_var_ / (n_sf_min - n_components)
|
|
164
|
+
else:
|
|
165
|
+
self.noise_variance_ = 0.0
|
|
166
|
+
|
|
167
|
+
if sklearn_check_version("1.2"):
|
|
168
|
+
self.n_samples_, self.n_features_in_ = n_samples, n_features
|
|
169
|
+
else:
|
|
170
|
+
self.n_samples_, self.n_features_ = n_samples, n_features
|
|
171
|
+
self.components_ = components_[:n_components]
|
|
172
|
+
self.n_components_ = n_components
|
|
173
|
+
self.explained_variance_ = explained_variance_[:n_components]
|
|
174
|
+
self.explained_variance_ratio_ = explained_variance_ratio_[:n_components]
|
|
175
|
+
self.singular_values_ = np.sqrt((n_samples - 1) * self.explained_variance_)
|
|
176
|
+
|
|
177
|
+
def _fit_full(self, X, n_components):
|
|
178
|
+
n_samples, n_features = X.shape
|
|
179
|
+
self._validate_n_components(n_components, n_samples, n_features)
|
|
180
|
+
|
|
181
|
+
self._fit_full_daal4py(X, min(X.shape))
|
|
182
|
+
|
|
183
|
+
U = None
|
|
184
|
+
V = self.components_
|
|
185
|
+
S = self.singular_values_
|
|
186
|
+
|
|
187
|
+
if n_components == "mle":
|
|
188
|
+
n_components = _infer_dimension(self.explained_variance_, n_samples)
|
|
189
|
+
elif 0 < n_components < 1.0:
|
|
190
|
+
ratio_cumsum = stable_cumsum(self.explained_variance_ratio_)
|
|
191
|
+
n_components = np.searchsorted(ratio_cumsum, n_components, side="right") + 1
|
|
192
|
+
|
|
193
|
+
if n_components < min(n_features, n_samples):
|
|
194
|
+
self.noise_variance_ = self.explained_variance_[n_components:].mean()
|
|
195
|
+
else:
|
|
196
|
+
self.noise_variance_ = 0.0
|
|
197
|
+
|
|
198
|
+
if sklearn_check_version("1.2"):
|
|
199
|
+
self.n_samples_, self.n_features_in_ = n_samples, n_features
|
|
200
|
+
else:
|
|
201
|
+
self.n_samples_, self.n_features_ = n_samples, n_features
|
|
202
|
+
self.components_ = self.components_[:n_components]
|
|
203
|
+
self.n_components_ = n_components
|
|
204
|
+
self.explained_variance_ = self.explained_variance_[:n_components]
|
|
205
|
+
self.explained_variance_ratio_ = self.explained_variance_ratio_[:n_components]
|
|
206
|
+
self.singular_values_ = self.singular_values_[:n_components]
|
|
207
|
+
|
|
208
|
+
return U, S, V
|
|
209
|
+
|
|
210
|
+
def _fit(self, X):
|
|
211
|
+
if sklearn_check_version("1.4"):
|
|
212
|
+
xp, is_array_api_compliant = get_namespace(X)
|
|
213
|
+
|
|
214
|
+
if issparse(X) and self.svd_solver != "arpack":
|
|
215
|
+
raise TypeError(
|
|
216
|
+
'PCA only support sparse inputs with the "arpack" solver, while '
|
|
217
|
+
f'"{self.svd_solver}" was passed. See TruncatedSVD for a possible'
|
|
218
|
+
" alternative."
|
|
219
|
+
)
|
|
220
|
+
# Raise an error for non-Numpy input and arpack solver.
|
|
221
|
+
if self.svd_solver == "arpack" and is_array_api_compliant:
|
|
222
|
+
raise ValueError(
|
|
223
|
+
"PCA with svd_solver='arpack' is not supported for Array API inputs."
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
X = self._validate_data(
|
|
227
|
+
X,
|
|
228
|
+
dtype=[xp.float64, xp.float32],
|
|
229
|
+
accept_sparse=("csr", "csc"),
|
|
230
|
+
ensure_2d=True,
|
|
231
|
+
copy=self.copy,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
else:
|
|
235
|
+
if issparse(X):
|
|
236
|
+
raise TypeError(
|
|
237
|
+
"PCA does not support sparse input. See "
|
|
238
|
+
"TruncatedSVD for a possible alternative."
|
|
239
|
+
)
|
|
240
|
+
X = self._validate_data(
|
|
241
|
+
X, dtype=[np.float64, np.float32], ensure_2d=True, copy=False
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
if self.n_components is None:
|
|
245
|
+
if self.svd_solver != "arpack":
|
|
246
|
+
n_components = min(X.shape)
|
|
247
|
+
else:
|
|
248
|
+
n_components = min(X.shape) - 1
|
|
249
|
+
else:
|
|
250
|
+
n_components = self.n_components
|
|
251
|
+
|
|
252
|
+
self._fit_svd_solver = self.svd_solver
|
|
253
|
+
shape_good_for_daal = X.shape[1] / X.shape[0] < 2
|
|
254
|
+
|
|
255
|
+
if self._fit_svd_solver == "auto":
|
|
256
|
+
if sklearn_check_version("1.1"):
|
|
257
|
+
# Small problem or n_components == 'mle', just call full PCA
|
|
258
|
+
if max(X.shape) <= 500 or n_components == "mle":
|
|
259
|
+
self._fit_svd_solver = "full"
|
|
260
|
+
elif 1 <= n_components < 0.8 * min(X.shape):
|
|
261
|
+
self._fit_svd_solver = "randomized"
|
|
262
|
+
# This is also the case of n_components in (0,1)
|
|
263
|
+
else:
|
|
264
|
+
self._fit_svd_solver = "full"
|
|
265
|
+
else:
|
|
266
|
+
if n_components == "mle":
|
|
267
|
+
self._fit_svd_solver = "full"
|
|
268
|
+
else:
|
|
269
|
+
n, p, k = X.shape[0], X.shape[1], n_components
|
|
270
|
+
# These coefficients are result of training of Logistic Regression
|
|
271
|
+
# (max_iter=10000, solver="liblinear", fit_intercept=False)
|
|
272
|
+
# on different datasets and number of components.
|
|
273
|
+
# X is a dataset with npk, np^2, and n^2 columns.
|
|
274
|
+
# And y is speedup of patched scikit-learn's
|
|
275
|
+
# full PCA against stock scikit-learn's randomized PCA.
|
|
276
|
+
regression_coefs = np.array(
|
|
277
|
+
[
|
|
278
|
+
[9.779873e-11, n * p * k],
|
|
279
|
+
[-1.122062e-11, n * p * p],
|
|
280
|
+
[1.127905e-09, n**2],
|
|
281
|
+
]
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
if (
|
|
285
|
+
n_components >= 1
|
|
286
|
+
and np.dot(regression_coefs[:, 0], regression_coefs[:, 1]) <= 0
|
|
287
|
+
):
|
|
288
|
+
self._fit_svd_solver = "randomized"
|
|
289
|
+
else:
|
|
290
|
+
self._fit_svd_solver = "full"
|
|
291
|
+
|
|
292
|
+
if not shape_good_for_daal or self._fit_svd_solver != "full":
|
|
293
|
+
if sklearn_check_version("1.4"):
|
|
294
|
+
X = self._validate_data(X, copy=self.copy, accept_sparse=("csr", "csc"))
|
|
295
|
+
else:
|
|
296
|
+
X = self._validate_data(X, copy=self.copy)
|
|
297
|
+
|
|
298
|
+
_patching_status = PatchingConditionsChain("sklearn.decomposition.PCA.fit")
|
|
299
|
+
_dal_ready = _patching_status.and_conditions(
|
|
300
|
+
[
|
|
301
|
+
(
|
|
302
|
+
self._fit_svd_solver == "full",
|
|
303
|
+
f"'{self._fit_svd_solver}' SVD solver is not supported. "
|
|
304
|
+
"Only 'full' solver is supported.",
|
|
305
|
+
)
|
|
306
|
+
]
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
if _dal_ready:
|
|
310
|
+
_dal_ready = _patching_status.and_conditions(
|
|
311
|
+
[
|
|
312
|
+
(
|
|
313
|
+
shape_good_for_daal,
|
|
314
|
+
"The shape of X does not satisfy oneDAL requirements: "
|
|
315
|
+
"number of features / number of samples >= 2",
|
|
316
|
+
),
|
|
317
|
+
]
|
|
318
|
+
)
|
|
319
|
+
if _dal_ready:
|
|
320
|
+
result = self._fit_full(X, n_components)
|
|
321
|
+
else:
|
|
322
|
+
result = PCA_original._fit_full(self, X, n_components)
|
|
323
|
+
elif self._fit_svd_solver in ["arpack", "randomized"]:
|
|
324
|
+
result = self._fit_truncated(X, n_components, self._fit_svd_solver)
|
|
325
|
+
else:
|
|
326
|
+
raise ValueError(
|
|
327
|
+
"Unrecognized svd_solver='{0}'" "".format(self._fit_svd_solver)
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
_patching_status.write_log()
|
|
331
|
+
return result
|
|
332
|
+
|
|
333
|
+
def _transform_daal4py(self, X, whiten=False, scale_eigenvalues=True, check_X=True):
|
|
334
|
+
check_is_fitted(self)
|
|
335
|
+
|
|
336
|
+
if sklearn_check_version("1.0"):
|
|
337
|
+
self._check_feature_names(X, reset=False)
|
|
338
|
+
X = check_array(X, dtype=[np.float64, np.float32], force_all_finite=check_X)
|
|
339
|
+
fpType = getFPType(X)
|
|
340
|
+
|
|
341
|
+
tr_data = dict()
|
|
342
|
+
if self.mean_ is not None:
|
|
343
|
+
tr_data["mean"] = self.mean_.reshape((1, -1))
|
|
344
|
+
if whiten:
|
|
345
|
+
if scale_eigenvalues:
|
|
346
|
+
tr_data["eigenvalue"] = (
|
|
347
|
+
self.n_samples_ - 1
|
|
348
|
+
) * self.explained_variance_.reshape((1, -1))
|
|
349
|
+
else:
|
|
350
|
+
tr_data["eigenvalue"] = self.explained_variance_.reshape((1, -1))
|
|
351
|
+
elif scale_eigenvalues:
|
|
352
|
+
tr_data["eigenvalue"] = np.full(
|
|
353
|
+
(1, self.explained_variance_.shape[0]),
|
|
354
|
+
self.n_samples_ - 1.0,
|
|
355
|
+
dtype=X.dtype,
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
if sklearn_check_version("1.2"):
|
|
359
|
+
expected_n_features = self.n_features_in_
|
|
360
|
+
else:
|
|
361
|
+
expected_n_features = self.n_features_
|
|
362
|
+
if X.shape[1] != expected_n_features:
|
|
363
|
+
raise ValueError(
|
|
364
|
+
(
|
|
365
|
+
f"X has {X.shape[1]} features, "
|
|
366
|
+
f"but PCA is expecting {expected_n_features} features as input"
|
|
367
|
+
)
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
tr_res = daal4py.pca_transform(fptype=fpType).compute(
|
|
371
|
+
X, self.components_, tr_data
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
return tr_res.transformedData
|
|
375
|
+
|
|
376
|
+
if sklearn_check_version("1.3"):
|
|
377
|
+
|
|
378
|
+
@_fit_context(prefer_skip_nested_validation=True)
|
|
379
|
+
def fit(self, X, y=None):
|
|
380
|
+
"""Fit the model with X.
|
|
381
|
+
|
|
382
|
+
Parameters
|
|
383
|
+
----------
|
|
384
|
+
X : array-like of shape (n_samples, n_features)
|
|
385
|
+
Training data, where `n_samples` is the number of samples
|
|
386
|
+
and `n_features` is the number of features.
|
|
387
|
+
|
|
388
|
+
y : Ignored
|
|
389
|
+
Ignored.
|
|
390
|
+
|
|
391
|
+
Returns
|
|
392
|
+
-------
|
|
393
|
+
self : object
|
|
394
|
+
Returns the instance itself.
|
|
395
|
+
"""
|
|
396
|
+
self._fit(X)
|
|
397
|
+
return self
|
|
398
|
+
|
|
399
|
+
else:
|
|
400
|
+
|
|
401
|
+
def fit(self, X, y=None):
|
|
402
|
+
"""Fit the model with X.
|
|
403
|
+
|
|
404
|
+
Parameters
|
|
405
|
+
----------
|
|
406
|
+
X : array-like of shape (n_samples, n_features)
|
|
407
|
+
Training data, where `n_samples` is the number of samples
|
|
408
|
+
and `n_features` is the number of features.
|
|
409
|
+
|
|
410
|
+
y : Ignored
|
|
411
|
+
Ignored.
|
|
412
|
+
|
|
413
|
+
Returns
|
|
414
|
+
-------
|
|
415
|
+
self : object
|
|
416
|
+
Returns the instance itself.
|
|
417
|
+
"""
|
|
418
|
+
if sklearn_check_version("1.2"):
|
|
419
|
+
self._validate_params()
|
|
420
|
+
elif sklearn_check_version("1.1"):
|
|
421
|
+
check_scalar(
|
|
422
|
+
self.n_oversamples,
|
|
423
|
+
"n_oversamples",
|
|
424
|
+
min_val=1,
|
|
425
|
+
target_type=numbers.Integral,
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
self._fit(X)
|
|
429
|
+
return self
|
|
430
|
+
|
|
431
|
+
def transform(self, X):
|
|
432
|
+
"""
|
|
433
|
+
Apply dimensionality reduction to X.
|
|
434
|
+
|
|
435
|
+
X is projected on the first principal components previously extracted
|
|
436
|
+
from a training set.
|
|
437
|
+
|
|
438
|
+
Parameters
|
|
439
|
+
----------
|
|
440
|
+
X : array-like of shape (n_samples, n_features)
|
|
441
|
+
New data, where `n_samples` is the number of samples
|
|
442
|
+
and `n_features` is the number of features.
|
|
443
|
+
|
|
444
|
+
Returns
|
|
445
|
+
-------
|
|
446
|
+
X_new : array-like of shape (n_samples, n_components)
|
|
447
|
+
Projection of X in the first principal components, where `n_samples`
|
|
448
|
+
is the number of samples and `n_components` is the number of the components.
|
|
449
|
+
"""
|
|
450
|
+
_patching_status = PatchingConditionsChain("sklearn.decomposition.PCA.transform")
|
|
451
|
+
_dal_ready = _patching_status.and_conditions(
|
|
452
|
+
[
|
|
453
|
+
(self.n_components_ > 0, "Number of components <= 0."),
|
|
454
|
+
(not issparse(X), "oneDAL PCA does not support sparse input"),
|
|
455
|
+
]
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
_patching_status.write_log()
|
|
459
|
+
if _dal_ready:
|
|
460
|
+
return self._transform_daal4py(
|
|
461
|
+
X, whiten=self.whiten, check_X=True, scale_eigenvalues=False
|
|
462
|
+
)
|
|
463
|
+
return PCA_original.transform(self, X)
|
|
464
|
+
|
|
465
|
+
def fit_transform(self, X, y=None):
|
|
466
|
+
"""
|
|
467
|
+
Fit the model with X and apply the dimensionality reduction on X.
|
|
468
|
+
|
|
469
|
+
Parameters
|
|
470
|
+
----------
|
|
471
|
+
X : array-like of shape (n_samples, n_features)
|
|
472
|
+
Training data, where `n_samples` is the number of samples
|
|
473
|
+
and `n_features` is the number of features.
|
|
474
|
+
|
|
475
|
+
y : Ignored
|
|
476
|
+
Ignored.
|
|
477
|
+
|
|
478
|
+
Returns
|
|
479
|
+
-------
|
|
480
|
+
X_new : ndarray of shape (n_samples, n_components)
|
|
481
|
+
Transformed values.
|
|
482
|
+
|
|
483
|
+
Notes
|
|
484
|
+
-----
|
|
485
|
+
This method returns a Fortran-ordered array. To convert it to a
|
|
486
|
+
C-ordered array, use 'np.ascontiguousarray'.
|
|
487
|
+
"""
|
|
488
|
+
|
|
489
|
+
if sklearn_check_version("1.2"):
|
|
490
|
+
self._validate_params()
|
|
491
|
+
|
|
492
|
+
U, S, Vt = self._fit(X)
|
|
493
|
+
|
|
494
|
+
_patching_status = PatchingConditionsChain(
|
|
495
|
+
"sklearn.decomposition.PCA.fit_transform"
|
|
496
|
+
)
|
|
497
|
+
_dal_ready = _patching_status.and_conditions(
|
|
498
|
+
[(U is None, "Stock fitting was used.")]
|
|
499
|
+
)
|
|
500
|
+
if _dal_ready:
|
|
501
|
+
_dal_ready = _patching_status.and_conditions(
|
|
502
|
+
[
|
|
503
|
+
(self.n_components_ > 0, "Number of components <= 0."),
|
|
504
|
+
(not issparse(X), "oneDAL PCA does not support sparse input"),
|
|
505
|
+
]
|
|
506
|
+
)
|
|
507
|
+
if _dal_ready:
|
|
508
|
+
result = self._transform_daal4py(
|
|
509
|
+
X, whiten=self.whiten, check_X=False, scale_eigenvalues=False
|
|
510
|
+
)
|
|
511
|
+
else:
|
|
512
|
+
result = np.empty((self.n_samples_, 0), dtype=X.dtype)
|
|
513
|
+
else:
|
|
514
|
+
U = U[:, : self.n_components_]
|
|
515
|
+
|
|
516
|
+
if self.whiten:
|
|
517
|
+
U *= sqrt(X.shape[0] - 1)
|
|
518
|
+
else:
|
|
519
|
+
U *= S[: self.n_components_]
|
|
520
|
+
|
|
521
|
+
result = U
|
|
522
|
+
|
|
523
|
+
_patching_status.write_log()
|
|
524
|
+
return result
|