scikit-learn-intelex 2025.1.0__py39-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (280) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-39-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-39-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +248 -0
  10. daal4py/sklearn/_utils.py +245 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +236 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +693 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +54 -0
  61. onedal/_device_offload.py +222 -0
  62. onedal/_onedal_py_dpc.cpython-39-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-39-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-39-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +160 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
  70. onedal/cluster/__init__.py +27 -0
  71. onedal/cluster/dbscan.py +110 -0
  72. onedal/cluster/kmeans.py +564 -0
  73. onedal/cluster/kmeans_init.py +115 -0
  74. onedal/cluster/tests/test_dbscan.py +125 -0
  75. onedal/cluster/tests/test_kmeans.py +88 -0
  76. onedal/cluster/tests/test_kmeans_init.py +93 -0
  77. onedal/common/_base.py +38 -0
  78. onedal/common/_estimator_checks.py +47 -0
  79. onedal/common/_mixin.py +62 -0
  80. onedal/common/_policy.py +59 -0
  81. onedal/common/_spmd_policy.py +30 -0
  82. onedal/common/hyperparameters.py +125 -0
  83. onedal/common/tests/test_policy.py +76 -0
  84. onedal/covariance/__init__.py +20 -0
  85. onedal/covariance/covariance.py +125 -0
  86. onedal/covariance/incremental_covariance.py +146 -0
  87. onedal/covariance/tests/test_covariance.py +50 -0
  88. onedal/covariance/tests/test_incremental_covariance.py +122 -0
  89. onedal/datatypes/__init__.py +19 -0
  90. onedal/datatypes/_data_conversion.py +154 -0
  91. onedal/datatypes/tests/common.py +126 -0
  92. onedal/datatypes/tests/test_data.py +414 -0
  93. onedal/decomposition/__init__.py +20 -0
  94. onedal/decomposition/incremental_pca.py +204 -0
  95. onedal/decomposition/pca.py +186 -0
  96. onedal/decomposition/tests/test_incremental_pca.py +198 -0
  97. onedal/ensemble/__init__.py +29 -0
  98. onedal/ensemble/forest.py +727 -0
  99. onedal/ensemble/tests/test_random_forest.py +97 -0
  100. onedal/linear_model/__init__.py +27 -0
  101. onedal/linear_model/incremental_linear_model.py +258 -0
  102. onedal/linear_model/linear_model.py +329 -0
  103. onedal/linear_model/logistic_regression.py +249 -0
  104. onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
  105. onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
  106. onedal/linear_model/tests/test_linear_regression.py +250 -0
  107. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  108. onedal/linear_model/tests/test_ridge.py +95 -0
  109. onedal/neighbors/__init__.py +19 -0
  110. onedal/neighbors/neighbors.py +767 -0
  111. onedal/neighbors/tests/test_knn_classification.py +49 -0
  112. onedal/primitives/__init__.py +27 -0
  113. onedal/primitives/get_tree.py +25 -0
  114. onedal/primitives/kernel_functions.py +153 -0
  115. onedal/primitives/tests/test_kernel_functions.py +159 -0
  116. onedal/spmd/__init__.py +25 -0
  117. onedal/spmd/_base.py +30 -0
  118. onedal/spmd/basic_statistics/__init__.py +20 -0
  119. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  120. onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
  121. onedal/spmd/cluster/__init__.py +28 -0
  122. onedal/spmd/cluster/dbscan.py +23 -0
  123. onedal/spmd/cluster/kmeans.py +56 -0
  124. onedal/spmd/covariance/__init__.py +20 -0
  125. onedal/spmd/covariance/covariance.py +26 -0
  126. onedal/spmd/covariance/incremental_covariance.py +82 -0
  127. onedal/spmd/decomposition/__init__.py +20 -0
  128. onedal/spmd/decomposition/incremental_pca.py +117 -0
  129. onedal/spmd/decomposition/pca.py +26 -0
  130. onedal/spmd/ensemble/__init__.py +19 -0
  131. onedal/spmd/ensemble/forest.py +28 -0
  132. onedal/spmd/linear_model/__init__.py +21 -0
  133. onedal/spmd/linear_model/incremental_linear_model.py +97 -0
  134. onedal/spmd/linear_model/linear_model.py +30 -0
  135. onedal/spmd/linear_model/logistic_regression.py +38 -0
  136. onedal/spmd/neighbors/__init__.py +19 -0
  137. onedal/spmd/neighbors/neighbors.py +75 -0
  138. onedal/svm/__init__.py +19 -0
  139. onedal/svm/svm.py +556 -0
  140. onedal/svm/tests/test_csr_svm.py +351 -0
  141. onedal/svm/tests/test_nusvc.py +204 -0
  142. onedal/svm/tests/test_nusvr.py +210 -0
  143. onedal/svm/tests/test_svc.py +176 -0
  144. onedal/svm/tests/test_svr.py +243 -0
  145. onedal/tests/test_common.py +57 -0
  146. onedal/tests/utils/_dataframes_support.py +162 -0
  147. onedal/tests/utils/_device_selection.py +102 -0
  148. onedal/utils/__init__.py +49 -0
  149. onedal/utils/_array_api.py +81 -0
  150. onedal/utils/_dpep_helpers.py +56 -0
  151. onedal/utils/validation.py +440 -0
  152. scikit_learn_intelex-2025.1.0.dist-info/LICENSE.txt +202 -0
  153. scikit_learn_intelex-2025.1.0.dist-info/METADATA +231 -0
  154. scikit_learn_intelex-2025.1.0.dist-info/RECORD +280 -0
  155. scikit_learn_intelex-2025.1.0.dist-info/WHEEL +5 -0
  156. scikit_learn_intelex-2025.1.0.dist-info/top_level.txt +3 -0
  157. sklearnex/__init__.py +66 -0
  158. sklearnex/__main__.py +58 -0
  159. sklearnex/_config.py +116 -0
  160. sklearnex/_device_offload.py +126 -0
  161. sklearnex/_utils.py +132 -0
  162. sklearnex/basic_statistics/__init__.py +20 -0
  163. sklearnex/basic_statistics/basic_statistics.py +230 -0
  164. sklearnex/basic_statistics/incremental_basic_statistics.py +345 -0
  165. sklearnex/basic_statistics/tests/test_basic_statistics.py +270 -0
  166. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +404 -0
  167. sklearnex/cluster/__init__.py +20 -0
  168. sklearnex/cluster/dbscan.py +197 -0
  169. sklearnex/cluster/k_means.py +395 -0
  170. sklearnex/cluster/tests/test_dbscan.py +38 -0
  171. sklearnex/cluster/tests/test_kmeans.py +159 -0
  172. sklearnex/conftest.py +82 -0
  173. sklearnex/covariance/__init__.py +19 -0
  174. sklearnex/covariance/incremental_covariance.py +398 -0
  175. sklearnex/covariance/tests/test_incremental_covariance.py +237 -0
  176. sklearnex/decomposition/__init__.py +19 -0
  177. sklearnex/decomposition/pca.py +425 -0
  178. sklearnex/decomposition/tests/test_pca.py +58 -0
  179. sklearnex/dispatcher.py +543 -0
  180. sklearnex/doc/third-party-programs.txt +424 -0
  181. sklearnex/ensemble/__init__.py +29 -0
  182. sklearnex/ensemble/_forest.py +2029 -0
  183. sklearnex/ensemble/tests/test_forest.py +135 -0
  184. sklearnex/glob/__main__.py +72 -0
  185. sklearnex/glob/dispatcher.py +101 -0
  186. sklearnex/linear_model/__init__.py +32 -0
  187. sklearnex/linear_model/coordinate_descent.py +30 -0
  188. sklearnex/linear_model/incremental_linear.py +482 -0
  189. sklearnex/linear_model/incremental_ridge.py +425 -0
  190. sklearnex/linear_model/linear.py +341 -0
  191. sklearnex/linear_model/logistic_regression.py +413 -0
  192. sklearnex/linear_model/ridge.py +24 -0
  193. sklearnex/linear_model/tests/test_incremental_linear.py +207 -0
  194. sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
  195. sklearnex/linear_model/tests/test_linear.py +167 -0
  196. sklearnex/linear_model/tests/test_logreg.py +134 -0
  197. sklearnex/manifold/__init__.py +19 -0
  198. sklearnex/manifold/t_sne.py +21 -0
  199. sklearnex/manifold/tests/test_tsne.py +26 -0
  200. sklearnex/metrics/__init__.py +23 -0
  201. sklearnex/metrics/pairwise.py +22 -0
  202. sklearnex/metrics/ranking.py +20 -0
  203. sklearnex/metrics/tests/test_metrics.py +39 -0
  204. sklearnex/model_selection/__init__.py +21 -0
  205. sklearnex/model_selection/split.py +22 -0
  206. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  207. sklearnex/neighbors/__init__.py +27 -0
  208. sklearnex/neighbors/_lof.py +236 -0
  209. sklearnex/neighbors/common.py +310 -0
  210. sklearnex/neighbors/knn_classification.py +231 -0
  211. sklearnex/neighbors/knn_regression.py +207 -0
  212. sklearnex/neighbors/knn_unsupervised.py +178 -0
  213. sklearnex/neighbors/tests/test_neighbors.py +82 -0
  214. sklearnex/preview/__init__.py +17 -0
  215. sklearnex/preview/covariance/__init__.py +19 -0
  216. sklearnex/preview/covariance/covariance.py +138 -0
  217. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  218. sklearnex/preview/decomposition/__init__.py +19 -0
  219. sklearnex/preview/decomposition/incremental_pca.py +233 -0
  220. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  221. sklearnex/preview/linear_model/__init__.py +19 -0
  222. sklearnex/preview/linear_model/ridge.py +424 -0
  223. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  224. sklearnex/spmd/__init__.py +25 -0
  225. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  226. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  227. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  228. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  229. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
  230. sklearnex/spmd/cluster/__init__.py +30 -0
  231. sklearnex/spmd/cluster/dbscan.py +50 -0
  232. sklearnex/spmd/cluster/kmeans.py +21 -0
  233. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  234. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  235. sklearnex/spmd/covariance/__init__.py +20 -0
  236. sklearnex/spmd/covariance/covariance.py +21 -0
  237. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  238. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  239. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  240. sklearnex/spmd/decomposition/__init__.py +20 -0
  241. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  242. sklearnex/spmd/decomposition/pca.py +21 -0
  243. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  244. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  245. sklearnex/spmd/ensemble/__init__.py +19 -0
  246. sklearnex/spmd/ensemble/forest.py +71 -0
  247. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  248. sklearnex/spmd/linear_model/__init__.py +21 -0
  249. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  250. sklearnex/spmd/linear_model/linear_model.py +21 -0
  251. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  252. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
  253. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  254. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
  255. sklearnex/spmd/neighbors/__init__.py +19 -0
  256. sklearnex/spmd/neighbors/neighbors.py +25 -0
  257. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  258. sklearnex/svm/__init__.py +29 -0
  259. sklearnex/svm/_common.py +339 -0
  260. sklearnex/svm/nusvc.py +371 -0
  261. sklearnex/svm/nusvr.py +170 -0
  262. sklearnex/svm/svc.py +399 -0
  263. sklearnex/svm/svr.py +167 -0
  264. sklearnex/svm/tests/test_svm.py +93 -0
  265. sklearnex/tests/test_common.py +390 -0
  266. sklearnex/tests/test_config.py +123 -0
  267. sklearnex/tests/test_memory_usage.py +379 -0
  268. sklearnex/tests/test_monkeypatch.py +276 -0
  269. sklearnex/tests/test_n_jobs_support.py +108 -0
  270. sklearnex/tests/test_parallel.py +48 -0
  271. sklearnex/tests/test_patching.py +385 -0
  272. sklearnex/tests/test_run_to_run_stability.py +321 -0
  273. sklearnex/tests/utils/__init__.py +44 -0
  274. sklearnex/tests/utils/base.py +371 -0
  275. sklearnex/tests/utils/spmd.py +198 -0
  276. sklearnex/utils/__init__.py +19 -0
  277. sklearnex/utils/_array_api.py +82 -0
  278. sklearnex/utils/parallel.py +59 -0
  279. sklearnex/utils/tests/test_finite.py +89 -0
  280. sklearnex/utils/validation.py +17 -0
@@ -0,0 +1,266 @@
1
+ # ===============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.testing import assert_allclose
20
+
21
+ from daal4py.sklearn._utils import daal_check_version
22
+ from onedal.tests.utils._dataframes_support import (
23
+ _as_numpy,
24
+ _convert_to_dataframe,
25
+ get_dataframes_and_queues,
26
+ )
27
+ from sklearnex.preview.decomposition import IncrementalPCA
28
+
29
+
30
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
31
+ def test_sklearnex_import(dataframe, queue):
32
+ X = [[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]
33
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
34
+ incpca = IncrementalPCA(n_components=2)
35
+ result = incpca.fit(X)
36
+ assert "sklearnex" in incpca.__module__
37
+ assert hasattr(incpca, "_onedal_estimator")
38
+ assert_allclose(_as_numpy(result.singular_values_), [6.30061232, 0.54980396])
39
+
40
+
41
+ def check_pca_on_gold_data(incpca, dtype, whiten, transformed_data):
42
+ expected_n_samples_seen_ = 6
43
+ expected_n_features_in_ = 2
44
+ expected_n_components_ = 2
45
+ expected_components_ = np.array([[0.83849224, 0.54491354], [-0.54491354, 0.83849224]])
46
+ expected_singular_values_ = np.array([6.30061232, 0.54980396])
47
+ expected_mean_ = np.array([0, 0])
48
+ expected_var_ = np.array([5.6, 2.4])
49
+ expected_explained_variance_ = np.array([7.93954312, 0.06045688])
50
+ expected_explained_variance_ratio_ = np.array([0.99244289, 0.00755711])
51
+ expected_noise_variance_ = 0.0
52
+ expected_transformed_data = (
53
+ np.array(
54
+ [
55
+ [-0.49096647, -1.19399271],
56
+ [-0.78854479, 1.02218579],
57
+ [-1.27951125, -0.17180692],
58
+ [0.49096647, 1.19399271],
59
+ [0.78854479, -1.02218579],
60
+ [1.27951125, 0.17180692],
61
+ ]
62
+ )
63
+ if whiten
64
+ else np.array(
65
+ [
66
+ [-1.38340578, -0.2935787],
67
+ [-2.22189802, 0.25133484],
68
+ [-3.6053038, -0.04224385],
69
+ [1.38340578, 0.2935787],
70
+ [2.22189802, -0.25133484],
71
+ [3.6053038, 0.04224385],
72
+ ]
73
+ )
74
+ )
75
+
76
+ tol = 1e-7
77
+ if dtype == np.float32:
78
+ tol = 7e-6 if whiten else 1e-6
79
+
80
+ assert incpca.n_samples_seen_ == expected_n_samples_seen_
81
+ assert incpca.n_features_in_ == expected_n_features_in_
82
+ assert incpca.n_components_ == expected_n_components_
83
+
84
+ assert_allclose(incpca.singular_values_, expected_singular_values_, atol=tol)
85
+ assert_allclose(incpca.mean_, expected_mean_, atol=tol)
86
+ assert_allclose(incpca.var_, expected_var_, atol=tol)
87
+ assert_allclose(incpca.explained_variance_, expected_explained_variance_, atol=tol)
88
+ assert_allclose(
89
+ incpca.explained_variance_ratio_, expected_explained_variance_ratio_, atol=tol
90
+ )
91
+ assert np.abs(incpca.noise_variance_ - expected_noise_variance_) < tol
92
+ if daal_check_version((2024, "P", 500)):
93
+ assert_allclose(incpca.components_, expected_components_, atol=tol)
94
+ assert_allclose(_as_numpy(transformed_data), expected_transformed_data, atol=tol)
95
+ else:
96
+ for i in range(incpca.n_components_):
97
+ abs_dot_product = np.abs(
98
+ np.dot(incpca.components_[i], expected_components_[i])
99
+ )
100
+ assert np.abs(abs_dot_product - 1.0) < tol
101
+
102
+ if np.dot(incpca.components_[i], expected_components_[i]) < 0:
103
+ assert_allclose(
104
+ _as_numpy(-transformed_data[i]),
105
+ expected_transformed_data[i],
106
+ atol=tol,
107
+ )
108
+ else:
109
+ assert_allclose(
110
+ _as_numpy(transformed_data[i]), expected_transformed_data[i], atol=tol
111
+ )
112
+
113
+
114
+ def check_pca(incpca, dtype, whiten, data, transformed_data):
115
+ tol = 3e-3 if dtype == np.float32 else 2e-6
116
+
117
+ n_components = incpca.n_components_
118
+
119
+ expected_n_samples_seen = data.shape[0]
120
+ expected_n_features_in = data.shape[1]
121
+ n_samples_seen = incpca.n_samples_seen_
122
+ n_features_in = incpca.n_features_in_
123
+ assert n_samples_seen == expected_n_samples_seen
124
+ assert n_features_in == expected_n_features_in
125
+
126
+ components = incpca.components_
127
+ singular_values = incpca.singular_values_
128
+ centered_data = data - np.mean(data, axis=0)
129
+ cov_eigenvalues, cov_eigenvectors = np.linalg.eig(
130
+ centered_data.T @ centered_data / (n_samples_seen - 1)
131
+ )
132
+ cov_eigenvalues = np.nan_to_num(cov_eigenvalues)
133
+ cov_eigenvalues[cov_eigenvalues < 0] = 0
134
+ eigenvalues_order = np.argsort(cov_eigenvalues)[::-1]
135
+ sorted_eigenvalues = cov_eigenvalues[eigenvalues_order]
136
+ sorted_eigenvectors = cov_eigenvectors[:, eigenvalues_order]
137
+ expected_singular_values = np.sqrt(sorted_eigenvalues * (n_samples_seen - 1))[
138
+ :n_components
139
+ ]
140
+ expected_components = sorted_eigenvectors.T[:n_components]
141
+
142
+ assert_allclose(singular_values, expected_singular_values, atol=tol)
143
+ for i in range(n_components):
144
+ component_length = np.dot(components[i], components[i])
145
+ assert np.abs(component_length - 1.0) < tol
146
+ abs_dot_product = np.abs(np.dot(components[i], expected_components[i]))
147
+ assert np.abs(abs_dot_product - 1.0) < tol
148
+
149
+ expected_mean = np.mean(data, axis=0)
150
+ assert_allclose(incpca.mean_, expected_mean, atol=tol)
151
+
152
+ expected_var = np.var(_as_numpy(data), ddof=1, axis=0)
153
+ assert_allclose(incpca.var_, expected_var, atol=tol)
154
+
155
+ expected_explained_variance = sorted_eigenvalues[:n_components]
156
+ assert_allclose(incpca.explained_variance_, expected_explained_variance, atol=tol)
157
+
158
+ expected_explained_variance_ratio = expected_explained_variance / np.sum(
159
+ sorted_eigenvalues
160
+ )
161
+ assert_allclose(
162
+ incpca.explained_variance_ratio_, expected_explained_variance_ratio, atol=tol
163
+ )
164
+
165
+ expected_noise_variance = (
166
+ np.mean(sorted_eigenvalues[n_components:])
167
+ if len(sorted_eigenvalues) > n_components
168
+ else 0.0
169
+ )
170
+ # TODO Fix noise variance computation (It is necessary to update C++ side)
171
+ # assert np.abs(incpca.noise_variance_ - expected_noise_variance) < tol
172
+
173
+ expected_transformed_data = centered_data @ components.T
174
+ if whiten:
175
+ scale = np.sqrt(incpca.explained_variance_)
176
+ min_scale = np.finfo(scale.dtype).eps
177
+ scale[scale < min_scale] = np.inf
178
+ expected_transformed_data /= scale
179
+
180
+ if not (whiten and n_components == n_samples_seen):
181
+ assert_allclose(_as_numpy(transformed_data), expected_transformed_data, atol=tol)
182
+
183
+
184
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
185
+ @pytest.mark.parametrize("whiten", [True, False])
186
+ @pytest.mark.parametrize("num_blocks", [1, 2, 3])
187
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
188
+ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, whiten, num_blocks, dtype):
189
+
190
+ X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
191
+ X = X.astype(dtype=dtype)
192
+ X_split = np.array_split(X, num_blocks)
193
+ incpca = IncrementalPCA(whiten=whiten)
194
+
195
+ for i in range(num_blocks):
196
+ X_split_df = _convert_to_dataframe(
197
+ X_split[i], sycl_queue=queue, target_df=dataframe
198
+ )
199
+ incpca.partial_fit(X_split_df)
200
+
201
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
202
+ transformed_data = incpca.transform(X_df)
203
+ check_pca_on_gold_data(incpca, dtype, whiten, transformed_data)
204
+
205
+
206
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
207
+ @pytest.mark.parametrize("whiten", [True, False])
208
+ @pytest.mark.parametrize("num_blocks", [1, 2, 3])
209
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
210
+ def test_sklearnex_fit_on_gold_data(dataframe, queue, whiten, num_blocks, dtype):
211
+
212
+ X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
213
+ X = X.astype(dtype=dtype)
214
+ incpca = IncrementalPCA(whiten=whiten, batch_size=X.shape[0] // num_blocks)
215
+
216
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
217
+ incpca.fit(X_df)
218
+ transformed_data = incpca.transform(X_df)
219
+
220
+ check_pca_on_gold_data(incpca, dtype, whiten, transformed_data)
221
+
222
+
223
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
224
+ @pytest.mark.parametrize("whiten", [True, False])
225
+ @pytest.mark.parametrize("num_blocks", [1, 2, 3])
226
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
227
+ def test_sklearnex_fit_transform_on_gold_data(
228
+ dataframe, queue, whiten, num_blocks, dtype
229
+ ):
230
+
231
+ X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
232
+ X = X.astype(dtype=dtype)
233
+ incpca = IncrementalPCA(whiten=whiten, batch_size=X.shape[0] // num_blocks)
234
+
235
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
236
+ transformed_data = incpca.fit_transform(X_df)
237
+
238
+ check_pca_on_gold_data(incpca, dtype, whiten, transformed_data)
239
+
240
+
241
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
242
+ @pytest.mark.parametrize("n_components", [None, 1, 5])
243
+ @pytest.mark.parametrize("whiten", [True, False])
244
+ @pytest.mark.parametrize("num_blocks", [1, 10])
245
+ @pytest.mark.parametrize("row_count", [100, 1000])
246
+ @pytest.mark.parametrize("column_count", [10, 100])
247
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
248
+ def test_sklearnex_partial_fit_on_random_data(
249
+ dataframe, queue, n_components, whiten, num_blocks, row_count, column_count, dtype
250
+ ):
251
+ seed = 81
252
+ gen = np.random.default_rng(seed)
253
+ X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
254
+ X = X.astype(dtype=dtype)
255
+ X_split = np.array_split(X, num_blocks)
256
+ incpca = IncrementalPCA(n_components=n_components, whiten=whiten)
257
+
258
+ for i in range(num_blocks):
259
+ X_split_df = _convert_to_dataframe(
260
+ X_split[i], sycl_queue=queue, target_df=dataframe
261
+ )
262
+ incpca.partial_fit(X_split_df)
263
+
264
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
265
+ transformed_data = incpca.transform(X_df)
266
+ check_pca(incpca, dtype, whiten, X, transformed_data)
@@ -0,0 +1,19 @@
1
+ # ===============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ from .ridge import Ridge
18
+
19
+ __all__ = ["Ridge"]