scikit-learn-intelex 2025.0.0__py312-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (278) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-312-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-312-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +242 -0
  10. daal4py/sklearn/_utils.py +241 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +192 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +318 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +196 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +155 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +87 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +118 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +693 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +53 -0
  61. onedal/_device_offload.py +229 -0
  62. onedal/_onedal_py_dpc.cpython-312-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-312-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-312-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +160 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
  70. onedal/cluster/__init__.py +27 -0
  71. onedal/cluster/dbscan.py +110 -0
  72. onedal/cluster/kmeans.py +560 -0
  73. onedal/cluster/kmeans_init.py +115 -0
  74. onedal/cluster/tests/test_dbscan.py +125 -0
  75. onedal/cluster/tests/test_kmeans.py +88 -0
  76. onedal/cluster/tests/test_kmeans_init.py +93 -0
  77. onedal/common/_base.py +38 -0
  78. onedal/common/_estimator_checks.py +47 -0
  79. onedal/common/_mixin.py +62 -0
  80. onedal/common/_policy.py +59 -0
  81. onedal/common/_spmd_policy.py +30 -0
  82. onedal/common/hyperparameters.py +116 -0
  83. onedal/common/tests/test_policy.py +75 -0
  84. onedal/covariance/__init__.py +20 -0
  85. onedal/covariance/covariance.py +125 -0
  86. onedal/covariance/incremental_covariance.py +146 -0
  87. onedal/covariance/tests/test_covariance.py +50 -0
  88. onedal/covariance/tests/test_incremental_covariance.py +122 -0
  89. onedal/datatypes/__init__.py +19 -0
  90. onedal/datatypes/_data_conversion.py +95 -0
  91. onedal/datatypes/tests/test_data.py +235 -0
  92. onedal/decomposition/__init__.py +20 -0
  93. onedal/decomposition/incremental_pca.py +204 -0
  94. onedal/decomposition/pca.py +186 -0
  95. onedal/decomposition/tests/test_incremental_pca.py +198 -0
  96. onedal/ensemble/__init__.py +29 -0
  97. onedal/ensemble/forest.py +720 -0
  98. onedal/ensemble/tests/test_random_forest.py +97 -0
  99. onedal/linear_model/__init__.py +27 -0
  100. onedal/linear_model/incremental_linear_model.py +258 -0
  101. onedal/linear_model/linear_model.py +329 -0
  102. onedal/linear_model/logistic_regression.py +249 -0
  103. onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
  104. onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
  105. onedal/linear_model/tests/test_linear_regression.py +149 -0
  106. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  107. onedal/linear_model/tests/test_ridge.py +95 -0
  108. onedal/neighbors/__init__.py +19 -0
  109. onedal/neighbors/neighbors.py +778 -0
  110. onedal/neighbors/tests/test_knn_classification.py +49 -0
  111. onedal/primitives/__init__.py +27 -0
  112. onedal/primitives/get_tree.py +25 -0
  113. onedal/primitives/kernel_functions.py +153 -0
  114. onedal/primitives/tests/test_kernel_functions.py +159 -0
  115. onedal/spmd/__init__.py +25 -0
  116. onedal/spmd/_base.py +30 -0
  117. onedal/spmd/basic_statistics/__init__.py +20 -0
  118. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  119. onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
  120. onedal/spmd/cluster/__init__.py +28 -0
  121. onedal/spmd/cluster/dbscan.py +23 -0
  122. onedal/spmd/cluster/kmeans.py +56 -0
  123. onedal/spmd/covariance/__init__.py +20 -0
  124. onedal/spmd/covariance/covariance.py +26 -0
  125. onedal/spmd/covariance/incremental_covariance.py +82 -0
  126. onedal/spmd/decomposition/__init__.py +20 -0
  127. onedal/spmd/decomposition/incremental_pca.py +117 -0
  128. onedal/spmd/decomposition/pca.py +26 -0
  129. onedal/spmd/ensemble/__init__.py +19 -0
  130. onedal/spmd/ensemble/forest.py +28 -0
  131. onedal/spmd/linear_model/__init__.py +21 -0
  132. onedal/spmd/linear_model/incremental_linear_model.py +97 -0
  133. onedal/spmd/linear_model/linear_model.py +30 -0
  134. onedal/spmd/linear_model/logistic_regression.py +38 -0
  135. onedal/spmd/neighbors/__init__.py +19 -0
  136. onedal/spmd/neighbors/neighbors.py +75 -0
  137. onedal/svm/__init__.py +19 -0
  138. onedal/svm/svm.py +556 -0
  139. onedal/svm/tests/test_csr_svm.py +351 -0
  140. onedal/svm/tests/test_nusvc.py +204 -0
  141. onedal/svm/tests/test_nusvr.py +210 -0
  142. onedal/svm/tests/test_svc.py +168 -0
  143. onedal/svm/tests/test_svr.py +243 -0
  144. onedal/tests/test_common.py +41 -0
  145. onedal/tests/utils/_dataframes_support.py +168 -0
  146. onedal/tests/utils/_device_selection.py +107 -0
  147. onedal/utils/__init__.py +49 -0
  148. onedal/utils/_array_api.py +91 -0
  149. onedal/utils/validation.py +432 -0
  150. scikit_learn_intelex-2025.0.0.dist-info/LICENSE.txt +202 -0
  151. scikit_learn_intelex-2025.0.0.dist-info/METADATA +231 -0
  152. scikit_learn_intelex-2025.0.0.dist-info/RECORD +278 -0
  153. scikit_learn_intelex-2025.0.0.dist-info/WHEEL +5 -0
  154. scikit_learn_intelex-2025.0.0.dist-info/top_level.txt +3 -0
  155. sklearnex/__init__.py +65 -0
  156. sklearnex/__main__.py +58 -0
  157. sklearnex/_config.py +98 -0
  158. sklearnex/_device_offload.py +121 -0
  159. sklearnex/_utils.py +109 -0
  160. sklearnex/basic_statistics/__init__.py +20 -0
  161. sklearnex/basic_statistics/basic_statistics.py +140 -0
  162. sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
  163. sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
  164. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +384 -0
  165. sklearnex/cluster/__init__.py +20 -0
  166. sklearnex/cluster/dbscan.py +192 -0
  167. sklearnex/cluster/k_means.py +383 -0
  168. sklearnex/cluster/tests/test_dbscan.py +38 -0
  169. sklearnex/cluster/tests/test_kmeans.py +153 -0
  170. sklearnex/conftest.py +73 -0
  171. sklearnex/covariance/__init__.py +19 -0
  172. sklearnex/covariance/incremental_covariance.py +368 -0
  173. sklearnex/covariance/tests/test_incremental_covariance.py +226 -0
  174. sklearnex/decomposition/__init__.py +19 -0
  175. sklearnex/decomposition/pca.py +414 -0
  176. sklearnex/decomposition/tests/test_pca.py +58 -0
  177. sklearnex/dispatcher.py +543 -0
  178. sklearnex/doc/third-party-programs.txt +424 -0
  179. sklearnex/ensemble/__init__.py +29 -0
  180. sklearnex/ensemble/_forest.py +2016 -0
  181. sklearnex/ensemble/tests/test_forest.py +120 -0
  182. sklearnex/glob/__main__.py +72 -0
  183. sklearnex/glob/dispatcher.py +101 -0
  184. sklearnex/linear_model/__init__.py +32 -0
  185. sklearnex/linear_model/coordinate_descent.py +30 -0
  186. sklearnex/linear_model/incremental_linear.py +463 -0
  187. sklearnex/linear_model/incremental_ridge.py +418 -0
  188. sklearnex/linear_model/linear.py +302 -0
  189. sklearnex/linear_model/logistic_path.py +17 -0
  190. sklearnex/linear_model/logistic_regression.py +403 -0
  191. sklearnex/linear_model/ridge.py +24 -0
  192. sklearnex/linear_model/tests/test_incremental_linear.py +203 -0
  193. sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
  194. sklearnex/linear_model/tests/test_linear.py +142 -0
  195. sklearnex/linear_model/tests/test_logreg.py +134 -0
  196. sklearnex/manifold/__init__.py +19 -0
  197. sklearnex/manifold/t_sne.py +21 -0
  198. sklearnex/manifold/tests/test_tsne.py +26 -0
  199. sklearnex/metrics/__init__.py +23 -0
  200. sklearnex/metrics/pairwise.py +22 -0
  201. sklearnex/metrics/ranking.py +20 -0
  202. sklearnex/metrics/tests/test_metrics.py +39 -0
  203. sklearnex/model_selection/__init__.py +21 -0
  204. sklearnex/model_selection/split.py +22 -0
  205. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  206. sklearnex/neighbors/__init__.py +27 -0
  207. sklearnex/neighbors/_lof.py +231 -0
  208. sklearnex/neighbors/common.py +310 -0
  209. sklearnex/neighbors/knn_classification.py +226 -0
  210. sklearnex/neighbors/knn_regression.py +203 -0
  211. sklearnex/neighbors/knn_unsupervised.py +170 -0
  212. sklearnex/neighbors/tests/test_neighbors.py +80 -0
  213. sklearnex/preview/__init__.py +17 -0
  214. sklearnex/preview/covariance/__init__.py +19 -0
  215. sklearnex/preview/covariance/covariance.py +133 -0
  216. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  217. sklearnex/preview/decomposition/__init__.py +19 -0
  218. sklearnex/preview/decomposition/incremental_pca.py +228 -0
  219. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  220. sklearnex/preview/linear_model/__init__.py +19 -0
  221. sklearnex/preview/linear_model/ridge.py +419 -0
  222. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  223. sklearnex/spmd/__init__.py +25 -0
  224. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  225. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  226. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  227. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  228. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
  229. sklearnex/spmd/cluster/__init__.py +30 -0
  230. sklearnex/spmd/cluster/dbscan.py +50 -0
  231. sklearnex/spmd/cluster/kmeans.py +21 -0
  232. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  233. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  234. sklearnex/spmd/covariance/__init__.py +20 -0
  235. sklearnex/spmd/covariance/covariance.py +21 -0
  236. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  237. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  238. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  239. sklearnex/spmd/decomposition/__init__.py +20 -0
  240. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  241. sklearnex/spmd/decomposition/pca.py +21 -0
  242. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  243. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  244. sklearnex/spmd/ensemble/__init__.py +19 -0
  245. sklearnex/spmd/ensemble/forest.py +71 -0
  246. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  247. sklearnex/spmd/linear_model/__init__.py +21 -0
  248. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  249. sklearnex/spmd/linear_model/linear_model.py +21 -0
  250. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  251. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
  252. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  253. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +166 -0
  254. sklearnex/spmd/neighbors/__init__.py +19 -0
  255. sklearnex/spmd/neighbors/neighbors.py +25 -0
  256. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  257. sklearnex/svm/__init__.py +29 -0
  258. sklearnex/svm/_common.py +328 -0
  259. sklearnex/svm/nusvc.py +332 -0
  260. sklearnex/svm/nusvr.py +148 -0
  261. sklearnex/svm/svc.py +360 -0
  262. sklearnex/svm/svr.py +149 -0
  263. sklearnex/svm/tests/test_svm.py +93 -0
  264. sklearnex/tests/_utils.py +328 -0
  265. sklearnex/tests/_utils_spmd.py +198 -0
  266. sklearnex/tests/test_common.py +54 -0
  267. sklearnex/tests/test_config.py +43 -0
  268. sklearnex/tests/test_memory_usage.py +291 -0
  269. sklearnex/tests/test_monkeypatch.py +276 -0
  270. sklearnex/tests/test_n_jobs_support.py +103 -0
  271. sklearnex/tests/test_parallel.py +48 -0
  272. sklearnex/tests/test_patching.py +385 -0
  273. sklearnex/tests/test_run_to_run_stability.py +296 -0
  274. sklearnex/utils/__init__.py +19 -0
  275. sklearnex/utils/_array_api.py +82 -0
  276. sklearnex/utils/parallel.py +59 -0
  277. sklearnex/utils/tests/test_finite.py +89 -0
  278. sklearnex/utils/validation.py +17 -0
@@ -0,0 +1,19 @@
1
+ # ==============================================================================
2
+ # Copyright 2014 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ from ._pca import PCA
18
+
19
+ __all__ = ["PCA"]
@@ -0,0 +1,524 @@
1
+ # ==============================================================================
2
+ # Copyright 2014 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import numbers
18
+ from math import sqrt
19
+
20
+ import numpy as np
21
+ from scipy.sparse import issparse
22
+ from sklearn.utils import check_array
23
+ from sklearn.utils.extmath import stable_cumsum
24
+ from sklearn.utils.validation import check_is_fitted
25
+
26
+ import daal4py
27
+
28
+ from .._n_jobs_support import control_n_jobs
29
+ from .._utils import PatchingConditionsChain, getFPType, sklearn_check_version
30
+
31
+ if sklearn_check_version("1.4"):
32
+ from sklearn.utils._array_api import get_namespace
33
+
34
+ if sklearn_check_version("1.3"):
35
+ from sklearn.base import _fit_context
36
+
37
+ if sklearn_check_version("1.1"):
38
+ from sklearn.utils import check_scalar
39
+
40
+ from sklearn.decomposition._pca import PCA as PCA_original
41
+ from sklearn.decomposition._pca import _infer_dimension
42
+
43
+
44
+ @control_n_jobs(decorated_methods=["fit", "transform"])
45
+ class PCA(PCA_original):
46
+ __doc__ = PCA_original.__doc__
47
+
48
+ if sklearn_check_version("1.2"):
49
+ _parameter_constraints: dict = {**PCA_original._parameter_constraints}
50
+
51
+ if sklearn_check_version("1.1"):
52
+
53
+ def __init__(
54
+ self,
55
+ n_components=None,
56
+ copy=True,
57
+ whiten=False,
58
+ svd_solver="auto",
59
+ tol=0.0,
60
+ iterated_power="auto",
61
+ n_oversamples=10,
62
+ power_iteration_normalizer="auto",
63
+ random_state=None,
64
+ ):
65
+ self.n_components = n_components
66
+ self.copy = copy
67
+ self.whiten = whiten
68
+ self.svd_solver = svd_solver
69
+ self.tol = tol
70
+ self.iterated_power = iterated_power
71
+ self.n_oversamples = n_oversamples
72
+ self.power_iteration_normalizer = power_iteration_normalizer
73
+ self.random_state = random_state
74
+
75
+ else:
76
+
77
+ def __init__(
78
+ self,
79
+ n_components=None,
80
+ copy=True,
81
+ whiten=False,
82
+ svd_solver="auto",
83
+ tol=0.0,
84
+ iterated_power="auto",
85
+ random_state=None,
86
+ ):
87
+ self.n_components = n_components
88
+ self.copy = copy
89
+ self.whiten = whiten
90
+ self.svd_solver = svd_solver
91
+ self.tol = tol
92
+ self.iterated_power = iterated_power
93
+ self.random_state = random_state
94
+
95
+ def _validate_n_components(self, n_components, n_samples, n_features):
96
+ if n_components == "mle":
97
+ if n_samples < n_features:
98
+ raise ValueError(
99
+ "n_components='mle' is only supported " "if n_samples >= n_features"
100
+ )
101
+ elif not 0 <= n_components <= min(n_samples, n_features):
102
+ raise ValueError(
103
+ "n_components=%r must be between 0 and "
104
+ "min(n_samples, n_features)=%r with "
105
+ "svd_solver='full'" % (n_components, min(n_samples, n_features))
106
+ )
107
+ elif n_components >= 1:
108
+ if not isinstance(n_components, numbers.Integral):
109
+ raise ValueError(
110
+ "n_components=%r must be of type int "
111
+ "when greater than or equal to 1, "
112
+ "was of type=%r" % (n_components, type(n_components))
113
+ )
114
+
115
+ def _fit_full_daal4py(self, X, n_components):
116
+ n_samples, n_features = X.shape
117
+ n_sf_min = min(n_samples, n_features)
118
+
119
+ if n_components == "mle":
120
+ daal_n_components = n_features
121
+ elif n_components < 1:
122
+ daal_n_components = n_sf_min
123
+ else:
124
+ daal_n_components = n_components
125
+
126
+ fpType = getFPType(X)
127
+
128
+ covariance_algo = daal4py.covariance(
129
+ fptype=fpType, outputMatrixType="covarianceMatrix"
130
+ )
131
+ covariance_res = covariance_algo.compute(X)
132
+
133
+ self.mean_ = covariance_res.mean.ravel()
134
+ covariance = covariance_res.covariance
135
+ variances_ = np.array([covariance[i, i] for i in range(n_features)])
136
+
137
+ pca_alg = daal4py.pca(
138
+ fptype=fpType,
139
+ method="correlationDense",
140
+ resultsToCompute="eigenvalue",
141
+ isDeterministic=True,
142
+ nComponents=daal_n_components,
143
+ )
144
+ pca_res = pca_alg.compute(X, covariance)
145
+
146
+ components_ = pca_res.eigenvectors
147
+ explained_variance_ = np.maximum(pca_res.eigenvalues.ravel(), 0)
148
+ tot_var = explained_variance_.sum()
149
+ explained_variance_ratio_ = explained_variance_ / tot_var
150
+
151
+ if n_components == "mle":
152
+ n_components = _infer_dimension(explained_variance_, n_samples)
153
+ elif 0 < n_components < 1.0:
154
+ ratio_cumsum = stable_cumsum(explained_variance_ratio_)
155
+ n_components = np.searchsorted(ratio_cumsum, n_components, side="right") + 1
156
+
157
+ if n_components < n_sf_min:
158
+ if explained_variance_.shape[0] == n_sf_min:
159
+ self.noise_variance_ = explained_variance_[n_components:].mean()
160
+ else:
161
+ resid_var_ = variances_.sum()
162
+ resid_var_ -= explained_variance_[:n_components].sum()
163
+ self.noise_variance_ = resid_var_ / (n_sf_min - n_components)
164
+ else:
165
+ self.noise_variance_ = 0.0
166
+
167
+ if sklearn_check_version("1.2"):
168
+ self.n_samples_, self.n_features_in_ = n_samples, n_features
169
+ else:
170
+ self.n_samples_, self.n_features_ = n_samples, n_features
171
+ self.components_ = components_[:n_components]
172
+ self.n_components_ = n_components
173
+ self.explained_variance_ = explained_variance_[:n_components]
174
+ self.explained_variance_ratio_ = explained_variance_ratio_[:n_components]
175
+ self.singular_values_ = np.sqrt((n_samples - 1) * self.explained_variance_)
176
+
177
+ def _fit_full(self, X, n_components):
178
+ n_samples, n_features = X.shape
179
+ self._validate_n_components(n_components, n_samples, n_features)
180
+
181
+ self._fit_full_daal4py(X, min(X.shape))
182
+
183
+ U = None
184
+ V = self.components_
185
+ S = self.singular_values_
186
+
187
+ if n_components == "mle":
188
+ n_components = _infer_dimension(self.explained_variance_, n_samples)
189
+ elif 0 < n_components < 1.0:
190
+ ratio_cumsum = stable_cumsum(self.explained_variance_ratio_)
191
+ n_components = np.searchsorted(ratio_cumsum, n_components, side="right") + 1
192
+
193
+ if n_components < min(n_features, n_samples):
194
+ self.noise_variance_ = self.explained_variance_[n_components:].mean()
195
+ else:
196
+ self.noise_variance_ = 0.0
197
+
198
+ if sklearn_check_version("1.2"):
199
+ self.n_samples_, self.n_features_in_ = n_samples, n_features
200
+ else:
201
+ self.n_samples_, self.n_features_ = n_samples, n_features
202
+ self.components_ = self.components_[:n_components]
203
+ self.n_components_ = n_components
204
+ self.explained_variance_ = self.explained_variance_[:n_components]
205
+ self.explained_variance_ratio_ = self.explained_variance_ratio_[:n_components]
206
+ self.singular_values_ = self.singular_values_[:n_components]
207
+
208
+ return U, S, V
209
+
210
+ def _fit(self, X):
211
+ if sklearn_check_version("1.4"):
212
+ xp, is_array_api_compliant = get_namespace(X)
213
+
214
+ if issparse(X) and self.svd_solver != "arpack":
215
+ raise TypeError(
216
+ 'PCA only support sparse inputs with the "arpack" solver, while '
217
+ f'"{self.svd_solver}" was passed. See TruncatedSVD for a possible'
218
+ " alternative."
219
+ )
220
+ # Raise an error for non-Numpy input and arpack solver.
221
+ if self.svd_solver == "arpack" and is_array_api_compliant:
222
+ raise ValueError(
223
+ "PCA with svd_solver='arpack' is not supported for Array API inputs."
224
+ )
225
+
226
+ X = self._validate_data(
227
+ X,
228
+ dtype=[xp.float64, xp.float32],
229
+ accept_sparse=("csr", "csc"),
230
+ ensure_2d=True,
231
+ copy=self.copy,
232
+ )
233
+
234
+ else:
235
+ if issparse(X):
236
+ raise TypeError(
237
+ "PCA does not support sparse input. See "
238
+ "TruncatedSVD for a possible alternative."
239
+ )
240
+ X = self._validate_data(
241
+ X, dtype=[np.float64, np.float32], ensure_2d=True, copy=False
242
+ )
243
+
244
+ if self.n_components is None:
245
+ if self.svd_solver != "arpack":
246
+ n_components = min(X.shape)
247
+ else:
248
+ n_components = min(X.shape) - 1
249
+ else:
250
+ n_components = self.n_components
251
+
252
+ self._fit_svd_solver = self.svd_solver
253
+ shape_good_for_daal = X.shape[1] / X.shape[0] < 2
254
+
255
+ if self._fit_svd_solver == "auto":
256
+ if sklearn_check_version("1.1"):
257
+ # Small problem or n_components == 'mle', just call full PCA
258
+ if max(X.shape) <= 500 or n_components == "mle":
259
+ self._fit_svd_solver = "full"
260
+ elif 1 <= n_components < 0.8 * min(X.shape):
261
+ self._fit_svd_solver = "randomized"
262
+ # This is also the case of n_components in (0,1)
263
+ else:
264
+ self._fit_svd_solver = "full"
265
+ else:
266
+ if n_components == "mle":
267
+ self._fit_svd_solver = "full"
268
+ else:
269
+ n, p, k = X.shape[0], X.shape[1], n_components
270
+ # These coefficients are result of training of Logistic Regression
271
+ # (max_iter=10000, solver="liblinear", fit_intercept=False)
272
+ # on different datasets and number of components.
273
+ # X is a dataset with npk, np^2, and n^2 columns.
274
+ # And y is speedup of patched scikit-learn's
275
+ # full PCA against stock scikit-learn's randomized PCA.
276
+ regression_coefs = np.array(
277
+ [
278
+ [9.779873e-11, n * p * k],
279
+ [-1.122062e-11, n * p * p],
280
+ [1.127905e-09, n**2],
281
+ ]
282
+ )
283
+
284
+ if (
285
+ n_components >= 1
286
+ and np.dot(regression_coefs[:, 0], regression_coefs[:, 1]) <= 0
287
+ ):
288
+ self._fit_svd_solver = "randomized"
289
+ else:
290
+ self._fit_svd_solver = "full"
291
+
292
+ if not shape_good_for_daal or self._fit_svd_solver != "full":
293
+ if sklearn_check_version("1.4"):
294
+ X = self._validate_data(X, copy=self.copy, accept_sparse=("csr", "csc"))
295
+ else:
296
+ X = self._validate_data(X, copy=self.copy)
297
+
298
+ _patching_status = PatchingConditionsChain("sklearn.decomposition.PCA.fit")
299
+ _dal_ready = _patching_status.and_conditions(
300
+ [
301
+ (
302
+ self._fit_svd_solver == "full",
303
+ f"'{self._fit_svd_solver}' SVD solver is not supported. "
304
+ "Only 'full' solver is supported.",
305
+ )
306
+ ]
307
+ )
308
+
309
+ if _dal_ready:
310
+ _dal_ready = _patching_status.and_conditions(
311
+ [
312
+ (
313
+ shape_good_for_daal,
314
+ "The shape of X does not satisfy oneDAL requirements: "
315
+ "number of features / number of samples >= 2",
316
+ ),
317
+ ]
318
+ )
319
+ if _dal_ready:
320
+ result = self._fit_full(X, n_components)
321
+ else:
322
+ result = PCA_original._fit_full(self, X, n_components)
323
+ elif self._fit_svd_solver in ["arpack", "randomized"]:
324
+ result = self._fit_truncated(X, n_components, self._fit_svd_solver)
325
+ else:
326
+ raise ValueError(
327
+ "Unrecognized svd_solver='{0}'" "".format(self._fit_svd_solver)
328
+ )
329
+
330
+ _patching_status.write_log()
331
+ return result
332
+
333
+ def _transform_daal4py(self, X, whiten=False, scale_eigenvalues=True, check_X=True):
334
+ check_is_fitted(self)
335
+
336
+ if sklearn_check_version("1.0"):
337
+ self._check_feature_names(X, reset=False)
338
+ X = check_array(X, dtype=[np.float64, np.float32], force_all_finite=check_X)
339
+ fpType = getFPType(X)
340
+
341
+ tr_data = dict()
342
+ if self.mean_ is not None:
343
+ tr_data["mean"] = self.mean_.reshape((1, -1))
344
+ if whiten:
345
+ if scale_eigenvalues:
346
+ tr_data["eigenvalue"] = (
347
+ self.n_samples_ - 1
348
+ ) * self.explained_variance_.reshape((1, -1))
349
+ else:
350
+ tr_data["eigenvalue"] = self.explained_variance_.reshape((1, -1))
351
+ elif scale_eigenvalues:
352
+ tr_data["eigenvalue"] = np.full(
353
+ (1, self.explained_variance_.shape[0]),
354
+ self.n_samples_ - 1.0,
355
+ dtype=X.dtype,
356
+ )
357
+
358
+ if sklearn_check_version("1.2"):
359
+ expected_n_features = self.n_features_in_
360
+ else:
361
+ expected_n_features = self.n_features_
362
+ if X.shape[1] != expected_n_features:
363
+ raise ValueError(
364
+ (
365
+ f"X has {X.shape[1]} features, "
366
+ f"but PCA is expecting {expected_n_features} features as input"
367
+ )
368
+ )
369
+
370
+ tr_res = daal4py.pca_transform(fptype=fpType).compute(
371
+ X, self.components_, tr_data
372
+ )
373
+
374
+ return tr_res.transformedData
375
+
376
+ if sklearn_check_version("1.3"):
377
+
378
+ @_fit_context(prefer_skip_nested_validation=True)
379
+ def fit(self, X, y=None):
380
+ """Fit the model with X.
381
+
382
+ Parameters
383
+ ----------
384
+ X : array-like of shape (n_samples, n_features)
385
+ Training data, where `n_samples` is the number of samples
386
+ and `n_features` is the number of features.
387
+
388
+ y : Ignored
389
+ Ignored.
390
+
391
+ Returns
392
+ -------
393
+ self : object
394
+ Returns the instance itself.
395
+ """
396
+ self._fit(X)
397
+ return self
398
+
399
+ else:
400
+
401
+ def fit(self, X, y=None):
402
+ """Fit the model with X.
403
+
404
+ Parameters
405
+ ----------
406
+ X : array-like of shape (n_samples, n_features)
407
+ Training data, where `n_samples` is the number of samples
408
+ and `n_features` is the number of features.
409
+
410
+ y : Ignored
411
+ Ignored.
412
+
413
+ Returns
414
+ -------
415
+ self : object
416
+ Returns the instance itself.
417
+ """
418
+ if sklearn_check_version("1.2"):
419
+ self._validate_params()
420
+ elif sklearn_check_version("1.1"):
421
+ check_scalar(
422
+ self.n_oversamples,
423
+ "n_oversamples",
424
+ min_val=1,
425
+ target_type=numbers.Integral,
426
+ )
427
+
428
+ self._fit(X)
429
+ return self
430
+
431
+ def transform(self, X):
432
+ """
433
+ Apply dimensionality reduction to X.
434
+
435
+ X is projected on the first principal components previously extracted
436
+ from a training set.
437
+
438
+ Parameters
439
+ ----------
440
+ X : array-like of shape (n_samples, n_features)
441
+ New data, where `n_samples` is the number of samples
442
+ and `n_features` is the number of features.
443
+
444
+ Returns
445
+ -------
446
+ X_new : array-like of shape (n_samples, n_components)
447
+ Projection of X in the first principal components, where `n_samples`
448
+ is the number of samples and `n_components` is the number of the components.
449
+ """
450
+ _patching_status = PatchingConditionsChain("sklearn.decomposition.PCA.transform")
451
+ _dal_ready = _patching_status.and_conditions(
452
+ [
453
+ (self.n_components_ > 0, "Number of components <= 0."),
454
+ (not issparse(X), "oneDAL PCA does not support sparse input"),
455
+ ]
456
+ )
457
+
458
+ _patching_status.write_log()
459
+ if _dal_ready:
460
+ return self._transform_daal4py(
461
+ X, whiten=self.whiten, check_X=True, scale_eigenvalues=False
462
+ )
463
+ return PCA_original.transform(self, X)
464
+
465
+ def fit_transform(self, X, y=None):
466
+ """
467
+ Fit the model with X and apply the dimensionality reduction on X.
468
+
469
+ Parameters
470
+ ----------
471
+ X : array-like of shape (n_samples, n_features)
472
+ Training data, where `n_samples` is the number of samples
473
+ and `n_features` is the number of features.
474
+
475
+ y : Ignored
476
+ Ignored.
477
+
478
+ Returns
479
+ -------
480
+ X_new : ndarray of shape (n_samples, n_components)
481
+ Transformed values.
482
+
483
+ Notes
484
+ -----
485
+ This method returns a Fortran-ordered array. To convert it to a
486
+ C-ordered array, use 'np.ascontiguousarray'.
487
+ """
488
+
489
+ if sklearn_check_version("1.2"):
490
+ self._validate_params()
491
+
492
+ U, S, Vt = self._fit(X)
493
+
494
+ _patching_status = PatchingConditionsChain(
495
+ "sklearn.decomposition.PCA.fit_transform"
496
+ )
497
+ _dal_ready = _patching_status.and_conditions(
498
+ [(U is None, "Stock fitting was used.")]
499
+ )
500
+ if _dal_ready:
501
+ _dal_ready = _patching_status.and_conditions(
502
+ [
503
+ (self.n_components_ > 0, "Number of components <= 0."),
504
+ (not issparse(X), "oneDAL PCA does not support sparse input"),
505
+ ]
506
+ )
507
+ if _dal_ready:
508
+ result = self._transform_daal4py(
509
+ X, whiten=self.whiten, check_X=False, scale_eigenvalues=False
510
+ )
511
+ else:
512
+ result = np.empty((self.n_samples_, 0), dtype=X.dtype)
513
+ else:
514
+ U = U[:, : self.n_components_]
515
+
516
+ if self.whiten:
517
+ U *= sqrt(X.shape[0] - 1)
518
+ else:
519
+ U *= S[: self.n_components_]
520
+
521
+ result = U
522
+
523
+ _patching_status.write_log()
524
+ return result