scikit-learn-intelex 2025.0.0__py39-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (278) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-39-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-39-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +242 -0
  10. daal4py/sklearn/_utils.py +241 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +192 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +318 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +196 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +155 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +87 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +118 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +693 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +53 -0
  61. onedal/_device_offload.py +229 -0
  62. onedal/_onedal_py_dpc.cpython-39-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-39-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-39-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +160 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
  70. onedal/cluster/__init__.py +27 -0
  71. onedal/cluster/dbscan.py +110 -0
  72. onedal/cluster/kmeans.py +560 -0
  73. onedal/cluster/kmeans_init.py +115 -0
  74. onedal/cluster/tests/test_dbscan.py +125 -0
  75. onedal/cluster/tests/test_kmeans.py +88 -0
  76. onedal/cluster/tests/test_kmeans_init.py +93 -0
  77. onedal/common/_base.py +38 -0
  78. onedal/common/_estimator_checks.py +47 -0
  79. onedal/common/_mixin.py +62 -0
  80. onedal/common/_policy.py +59 -0
  81. onedal/common/_spmd_policy.py +30 -0
  82. onedal/common/hyperparameters.py +116 -0
  83. onedal/common/tests/test_policy.py +75 -0
  84. onedal/covariance/__init__.py +20 -0
  85. onedal/covariance/covariance.py +125 -0
  86. onedal/covariance/incremental_covariance.py +146 -0
  87. onedal/covariance/tests/test_covariance.py +50 -0
  88. onedal/covariance/tests/test_incremental_covariance.py +122 -0
  89. onedal/datatypes/__init__.py +19 -0
  90. onedal/datatypes/_data_conversion.py +95 -0
  91. onedal/datatypes/tests/test_data.py +235 -0
  92. onedal/decomposition/__init__.py +20 -0
  93. onedal/decomposition/incremental_pca.py +204 -0
  94. onedal/decomposition/pca.py +186 -0
  95. onedal/decomposition/tests/test_incremental_pca.py +198 -0
  96. onedal/ensemble/__init__.py +29 -0
  97. onedal/ensemble/forest.py +720 -0
  98. onedal/ensemble/tests/test_random_forest.py +97 -0
  99. onedal/linear_model/__init__.py +27 -0
  100. onedal/linear_model/incremental_linear_model.py +258 -0
  101. onedal/linear_model/linear_model.py +329 -0
  102. onedal/linear_model/logistic_regression.py +249 -0
  103. onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
  104. onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
  105. onedal/linear_model/tests/test_linear_regression.py +149 -0
  106. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  107. onedal/linear_model/tests/test_ridge.py +95 -0
  108. onedal/neighbors/__init__.py +19 -0
  109. onedal/neighbors/neighbors.py +778 -0
  110. onedal/neighbors/tests/test_knn_classification.py +49 -0
  111. onedal/primitives/__init__.py +27 -0
  112. onedal/primitives/get_tree.py +25 -0
  113. onedal/primitives/kernel_functions.py +153 -0
  114. onedal/primitives/tests/test_kernel_functions.py +159 -0
  115. onedal/spmd/__init__.py +25 -0
  116. onedal/spmd/_base.py +30 -0
  117. onedal/spmd/basic_statistics/__init__.py +20 -0
  118. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  119. onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
  120. onedal/spmd/cluster/__init__.py +28 -0
  121. onedal/spmd/cluster/dbscan.py +23 -0
  122. onedal/spmd/cluster/kmeans.py +56 -0
  123. onedal/spmd/covariance/__init__.py +20 -0
  124. onedal/spmd/covariance/covariance.py +26 -0
  125. onedal/spmd/covariance/incremental_covariance.py +82 -0
  126. onedal/spmd/decomposition/__init__.py +20 -0
  127. onedal/spmd/decomposition/incremental_pca.py +117 -0
  128. onedal/spmd/decomposition/pca.py +26 -0
  129. onedal/spmd/ensemble/__init__.py +19 -0
  130. onedal/spmd/ensemble/forest.py +28 -0
  131. onedal/spmd/linear_model/__init__.py +21 -0
  132. onedal/spmd/linear_model/incremental_linear_model.py +97 -0
  133. onedal/spmd/linear_model/linear_model.py +30 -0
  134. onedal/spmd/linear_model/logistic_regression.py +38 -0
  135. onedal/spmd/neighbors/__init__.py +19 -0
  136. onedal/spmd/neighbors/neighbors.py +75 -0
  137. onedal/svm/__init__.py +19 -0
  138. onedal/svm/svm.py +556 -0
  139. onedal/svm/tests/test_csr_svm.py +351 -0
  140. onedal/svm/tests/test_nusvc.py +204 -0
  141. onedal/svm/tests/test_nusvr.py +210 -0
  142. onedal/svm/tests/test_svc.py +168 -0
  143. onedal/svm/tests/test_svr.py +243 -0
  144. onedal/tests/test_common.py +41 -0
  145. onedal/tests/utils/_dataframes_support.py +168 -0
  146. onedal/tests/utils/_device_selection.py +107 -0
  147. onedal/utils/__init__.py +49 -0
  148. onedal/utils/_array_api.py +91 -0
  149. onedal/utils/validation.py +432 -0
  150. scikit_learn_intelex-2025.0.0.dist-info/LICENSE.txt +202 -0
  151. scikit_learn_intelex-2025.0.0.dist-info/METADATA +231 -0
  152. scikit_learn_intelex-2025.0.0.dist-info/RECORD +278 -0
  153. scikit_learn_intelex-2025.0.0.dist-info/WHEEL +5 -0
  154. scikit_learn_intelex-2025.0.0.dist-info/top_level.txt +3 -0
  155. sklearnex/__init__.py +65 -0
  156. sklearnex/__main__.py +58 -0
  157. sklearnex/_config.py +98 -0
  158. sklearnex/_device_offload.py +121 -0
  159. sklearnex/_utils.py +109 -0
  160. sklearnex/basic_statistics/__init__.py +20 -0
  161. sklearnex/basic_statistics/basic_statistics.py +140 -0
  162. sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
  163. sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
  164. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +384 -0
  165. sklearnex/cluster/__init__.py +20 -0
  166. sklearnex/cluster/dbscan.py +192 -0
  167. sklearnex/cluster/k_means.py +383 -0
  168. sklearnex/cluster/tests/test_dbscan.py +38 -0
  169. sklearnex/cluster/tests/test_kmeans.py +153 -0
  170. sklearnex/conftest.py +73 -0
  171. sklearnex/covariance/__init__.py +19 -0
  172. sklearnex/covariance/incremental_covariance.py +368 -0
  173. sklearnex/covariance/tests/test_incremental_covariance.py +226 -0
  174. sklearnex/decomposition/__init__.py +19 -0
  175. sklearnex/decomposition/pca.py +414 -0
  176. sklearnex/decomposition/tests/test_pca.py +58 -0
  177. sklearnex/dispatcher.py +543 -0
  178. sklearnex/doc/third-party-programs.txt +424 -0
  179. sklearnex/ensemble/__init__.py +29 -0
  180. sklearnex/ensemble/_forest.py +2016 -0
  181. sklearnex/ensemble/tests/test_forest.py +120 -0
  182. sklearnex/glob/__main__.py +72 -0
  183. sklearnex/glob/dispatcher.py +101 -0
  184. sklearnex/linear_model/__init__.py +32 -0
  185. sklearnex/linear_model/coordinate_descent.py +30 -0
  186. sklearnex/linear_model/incremental_linear.py +463 -0
  187. sklearnex/linear_model/incremental_ridge.py +418 -0
  188. sklearnex/linear_model/linear.py +302 -0
  189. sklearnex/linear_model/logistic_path.py +17 -0
  190. sklearnex/linear_model/logistic_regression.py +403 -0
  191. sklearnex/linear_model/ridge.py +24 -0
  192. sklearnex/linear_model/tests/test_incremental_linear.py +203 -0
  193. sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
  194. sklearnex/linear_model/tests/test_linear.py +142 -0
  195. sklearnex/linear_model/tests/test_logreg.py +134 -0
  196. sklearnex/manifold/__init__.py +19 -0
  197. sklearnex/manifold/t_sne.py +21 -0
  198. sklearnex/manifold/tests/test_tsne.py +26 -0
  199. sklearnex/metrics/__init__.py +23 -0
  200. sklearnex/metrics/pairwise.py +22 -0
  201. sklearnex/metrics/ranking.py +20 -0
  202. sklearnex/metrics/tests/test_metrics.py +39 -0
  203. sklearnex/model_selection/__init__.py +21 -0
  204. sklearnex/model_selection/split.py +22 -0
  205. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  206. sklearnex/neighbors/__init__.py +27 -0
  207. sklearnex/neighbors/_lof.py +231 -0
  208. sklearnex/neighbors/common.py +310 -0
  209. sklearnex/neighbors/knn_classification.py +226 -0
  210. sklearnex/neighbors/knn_regression.py +203 -0
  211. sklearnex/neighbors/knn_unsupervised.py +170 -0
  212. sklearnex/neighbors/tests/test_neighbors.py +80 -0
  213. sklearnex/preview/__init__.py +17 -0
  214. sklearnex/preview/covariance/__init__.py +19 -0
  215. sklearnex/preview/covariance/covariance.py +133 -0
  216. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  217. sklearnex/preview/decomposition/__init__.py +19 -0
  218. sklearnex/preview/decomposition/incremental_pca.py +228 -0
  219. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  220. sklearnex/preview/linear_model/__init__.py +19 -0
  221. sklearnex/preview/linear_model/ridge.py +419 -0
  222. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  223. sklearnex/spmd/__init__.py +25 -0
  224. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  225. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  226. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  227. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  228. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
  229. sklearnex/spmd/cluster/__init__.py +30 -0
  230. sklearnex/spmd/cluster/dbscan.py +50 -0
  231. sklearnex/spmd/cluster/kmeans.py +21 -0
  232. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  233. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  234. sklearnex/spmd/covariance/__init__.py +20 -0
  235. sklearnex/spmd/covariance/covariance.py +21 -0
  236. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  237. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  238. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  239. sklearnex/spmd/decomposition/__init__.py +20 -0
  240. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  241. sklearnex/spmd/decomposition/pca.py +21 -0
  242. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  243. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  244. sklearnex/spmd/ensemble/__init__.py +19 -0
  245. sklearnex/spmd/ensemble/forest.py +71 -0
  246. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  247. sklearnex/spmd/linear_model/__init__.py +21 -0
  248. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  249. sklearnex/spmd/linear_model/linear_model.py +21 -0
  250. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  251. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
  252. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  253. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +166 -0
  254. sklearnex/spmd/neighbors/__init__.py +19 -0
  255. sklearnex/spmd/neighbors/neighbors.py +25 -0
  256. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  257. sklearnex/svm/__init__.py +29 -0
  258. sklearnex/svm/_common.py +328 -0
  259. sklearnex/svm/nusvc.py +332 -0
  260. sklearnex/svm/nusvr.py +148 -0
  261. sklearnex/svm/svc.py +360 -0
  262. sklearnex/svm/svr.py +149 -0
  263. sklearnex/svm/tests/test_svm.py +93 -0
  264. sklearnex/tests/_utils.py +328 -0
  265. sklearnex/tests/_utils_spmd.py +198 -0
  266. sklearnex/tests/test_common.py +54 -0
  267. sklearnex/tests/test_config.py +43 -0
  268. sklearnex/tests/test_memory_usage.py +291 -0
  269. sklearnex/tests/test_monkeypatch.py +276 -0
  270. sklearnex/tests/test_n_jobs_support.py +103 -0
  271. sklearnex/tests/test_parallel.py +48 -0
  272. sklearnex/tests/test_patching.py +385 -0
  273. sklearnex/tests/test_run_to_run_stability.py +296 -0
  274. sklearnex/utils/__init__.py +19 -0
  275. sklearnex/utils/_array_api.py +82 -0
  276. sklearnex/utils/parallel.py +59 -0
  277. sklearnex/utils/tests/test_finite.py +89 -0
  278. sklearnex/utils/validation.py +17 -0
@@ -0,0 +1,50 @@
1
+ # ==============================================================================
2
+ # Copyright 2023 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ from abc import ABC
18
+
19
+ from onedal.spmd.cluster import DBSCAN as onedal_DBSCAN
20
+
21
+ from ...cluster import DBSCAN as DBSCAN_Batch
22
+
23
+
24
+ class BaseDBSCANspmd(ABC):
25
+ def _onedal_dbscan(self, **onedal_params):
26
+ return onedal_DBSCAN(**onedal_params)
27
+
28
+
29
+ class DBSCAN(BaseDBSCANspmd, DBSCAN_Batch):
30
+ __doc__ = DBSCAN_Batch.__doc__
31
+
32
+ def _onedal_cpu_supported(self, method_name, *data):
33
+ # TODO:
34
+ # check which methods supported SPMD interface on CPU.
35
+ ready = super()._onedal_cpu_supported(method_name, *data)
36
+ if not ready:
37
+ raise RuntimeError(
38
+ f"Method {method_name} in {self.__class__.__name__} "
39
+ "is not supported with given inputs."
40
+ )
41
+ return ready
42
+
43
+ def _onedal_gpu_supported(self, method_name, *data):
44
+ ready = super()._onedal_gpu_supported(method_name, *data)
45
+ if not ready:
46
+ raise RuntimeError(
47
+ f"Method {method_name} in {self.__class__.__name__} "
48
+ "is not supported with given inputs."
49
+ )
50
+ return ready
@@ -0,0 +1,21 @@
1
+ # ==============================================================================
2
+ # Copyright 2023 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ from onedal.spmd.cluster import KMeans
18
+
19
+ # TODO:
20
+ # Currently it uses `onedal` module interface.
21
+ # Add sklearnex dispatching.
@@ -0,0 +1,97 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+
20
+ from onedal.tests.utils._dataframes_support import (
21
+ _convert_to_dataframe,
22
+ get_dataframes_and_queues,
23
+ )
24
+ from sklearnex.tests._utils_spmd import (
25
+ _generate_clustering_data,
26
+ _get_local_tensor,
27
+ _mpi_libs_and_gpu_available,
28
+ _spmd_assert_allclose,
29
+ )
30
+
31
+
32
+ @pytest.mark.skipif(
33
+ not _mpi_libs_and_gpu_available,
34
+ reason="GPU device and MPI libs required for test",
35
+ )
36
+ @pytest.mark.parametrize(
37
+ "dataframe,queue",
38
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
39
+ )
40
+ @pytest.mark.mpi
41
+ def test_dbscan_spmd_gold(dataframe, queue):
42
+ # Import spmd and batch algo
43
+ from sklearnex.cluster import DBSCAN as DBSCAN_Batch
44
+ from sklearnex.spmd.cluster import DBSCAN as DBSCAN_SPMD
45
+
46
+ data = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]])
47
+
48
+ local_dpt_data = _convert_to_dataframe(
49
+ _get_local_tensor(data), sycl_queue=queue, target_df=dataframe
50
+ )
51
+
52
+ # Ensure labels from fit of batch algo matches spmd
53
+ spmd_model = DBSCAN_SPMD(eps=3, min_samples=2).fit(local_dpt_data)
54
+ batch_model = DBSCAN_Batch(eps=3, min_samples=2).fit(data)
55
+
56
+ _spmd_assert_allclose(spmd_model.labels_, batch_model.labels_)
57
+
58
+
59
+ @pytest.mark.skipif(
60
+ not _mpi_libs_and_gpu_available,
61
+ reason="GPU device and MPI libs required for test",
62
+ )
63
+ @pytest.mark.parametrize("n_samples", [200, 10000])
64
+ @pytest.mark.parametrize("n_features_and_eps", [(5, 3), (5, 10), (25, 10)])
65
+ @pytest.mark.parametrize("centers", [10, None])
66
+ @pytest.mark.parametrize("min_samples", [2, 5, 15])
67
+ @pytest.mark.parametrize(
68
+ "dataframe,queue",
69
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
70
+ )
71
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
72
+ @pytest.mark.mpi
73
+ def test_dbscan_spmd_synthetic(
74
+ n_samples, n_features_and_eps, centers, min_samples, dataframe, queue, dtype
75
+ ):
76
+ n_features, eps = n_features_and_eps
77
+ # Import spmd and batch algo
78
+ from sklearnex.cluster import DBSCAN as DBSCAN_Batch
79
+ from sklearnex.spmd.cluster import DBSCAN as DBSCAN_SPMD
80
+
81
+ data, _ = _generate_clustering_data(
82
+ n_samples, n_features, centers=centers, dtype=dtype
83
+ )
84
+
85
+ local_dpt_data = _convert_to_dataframe(
86
+ _get_local_tensor(data), sycl_queue=queue, target_df=dataframe
87
+ )
88
+
89
+ # Ensure labels from fit of batch algo matches spmd
90
+ spmd_model = DBSCAN_SPMD(eps=eps, min_samples=min_samples).fit(local_dpt_data)
91
+ batch_model = DBSCAN_Batch(eps=eps, min_samples=min_samples).fit(data)
92
+
93
+ _spmd_assert_allclose(spmd_model.labels_, batch_model.labels_)
94
+
95
+ # Ensure meaningful test setup
96
+ if np.all(batch_model.labels_ == -1):
97
+ raise ValueError("No labels given - try raising epsilon")
@@ -0,0 +1,172 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.testing import assert_allclose
20
+
21
+ from onedal.tests.utils._dataframes_support import (
22
+ _convert_to_dataframe,
23
+ get_dataframes_and_queues,
24
+ )
25
+ from sklearnex.tests._utils_spmd import (
26
+ _assert_kmeans_labels_allclose,
27
+ _assert_unordered_allclose,
28
+ _generate_clustering_data,
29
+ _get_local_tensor,
30
+ _mpi_libs_and_gpu_available,
31
+ )
32
+
33
+
34
+ @pytest.mark.skipif(
35
+ not _mpi_libs_and_gpu_available,
36
+ reason="GPU device and MPI libs required for test",
37
+ )
38
+ @pytest.mark.parametrize(
39
+ "dataframe,queue",
40
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
41
+ )
42
+ @pytest.mark.mpi
43
+ def test_kmeans_spmd_gold(dataframe, queue):
44
+ # Import spmd and batch algo
45
+ from sklearnex.cluster import KMeans as KMeans_Batch
46
+ from sklearnex.spmd.cluster import KMeans as KMeans_SPMD
47
+
48
+ X_train = np.array(
49
+ [
50
+ [1, 2],
51
+ [2, 2],
52
+ [2, 3],
53
+ [8, 7],
54
+ [8, 8],
55
+ [25, 80],
56
+ [5, 65],
57
+ [2, 8],
58
+ [1, 3],
59
+ [2, 2],
60
+ [1, 3],
61
+ [2, 2],
62
+ ]
63
+ )
64
+ X_test = np.array([[0, 0], [12, 3], [2, 2], [7, 8]])
65
+
66
+ local_dpt_X_train = _convert_to_dataframe(
67
+ _get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe
68
+ )
69
+ local_dpt_X_test = _convert_to_dataframe(
70
+ _get_local_tensor(X_test), sycl_queue=queue, target_df=dataframe
71
+ )
72
+
73
+ # Ensure labels from fit of batch algo matches spmd
74
+ spmd_model = KMeans_SPMD(n_clusters=2, random_state=0).fit(local_dpt_X_train)
75
+ batch_model = KMeans_Batch(n_clusters=2, random_state=0).fit(X_train)
76
+
77
+ _assert_unordered_allclose(spmd_model.cluster_centers_, batch_model.cluster_centers_)
78
+ _assert_kmeans_labels_allclose(
79
+ spmd_model.labels_,
80
+ batch_model.labels_,
81
+ spmd_model.cluster_centers_,
82
+ batch_model.cluster_centers_,
83
+ )
84
+ assert_allclose(spmd_model.n_iter_, batch_model.n_iter_, atol=1)
85
+
86
+ # Ensure predictions of batch algo match spmd
87
+ spmd_result = spmd_model.predict(local_dpt_X_test)
88
+ batch_result = batch_model.predict(X_test)
89
+
90
+ _assert_kmeans_labels_allclose(
91
+ spmd_result,
92
+ batch_result,
93
+ spmd_model.cluster_centers_,
94
+ batch_model.cluster_centers_,
95
+ )
96
+
97
+
98
+ @pytest.mark.skipif(
99
+ not _mpi_libs_and_gpu_available,
100
+ reason="GPU device and MPI libs required for test",
101
+ )
102
+ @pytest.mark.parametrize("n_samples", [200, 10000])
103
+ @pytest.mark.parametrize("n_features", [5, 25])
104
+ @pytest.mark.parametrize("n_clusters", [2, 5, 15])
105
+ @pytest.mark.parametrize(
106
+ "dataframe,queue",
107
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
108
+ )
109
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
110
+ @pytest.mark.mpi
111
+ def test_kmeans_spmd_synthetic(
112
+ n_samples, n_features, n_clusters, dataframe, queue, dtype
113
+ ):
114
+ # Import spmd and batch algo
115
+ from sklearnex.cluster import KMeans as KMeans_Batch
116
+ from sklearnex.spmd.cluster import KMeans as KMeans_SPMD
117
+
118
+ # TODO: investigate issues when centers != n_clusters (spmd and batch results don't match for all values of K)
119
+ X_train, X_test = _generate_clustering_data(
120
+ n_samples, n_features, centers=n_clusters, dtype=dtype
121
+ )
122
+
123
+ local_dpt_X_train = _convert_to_dataframe(
124
+ _get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe
125
+ )
126
+ local_dpt_X_test = _convert_to_dataframe(
127
+ _get_local_tensor(X_test), sycl_queue=queue, target_df=dataframe
128
+ )
129
+
130
+ # Validate KMeans init
131
+ spmd_model_init = KMeans_SPMD(n_clusters=n_clusters, max_iter=1, random_state=0).fit(
132
+ local_dpt_X_train
133
+ )
134
+ batch_model_init = KMeans_Batch(
135
+ n_clusters=n_clusters, max_iter=1, random_state=0
136
+ ).fit(X_train)
137
+ # TODO: centers do not match up after init
138
+ # _assert_unordered_allclose(spmd_model_init.cluster_centers_, batch_model_init.cluster_centers_)
139
+
140
+ # Ensure labels from fit of batch algo matches spmd, using same init
141
+ spmd_model = KMeans_SPMD(
142
+ n_clusters=n_clusters, init=spmd_model_init.cluster_centers_, random_state=0
143
+ ).fit(local_dpt_X_train)
144
+ batch_model = KMeans_Batch(
145
+ n_clusters=n_clusters, init=spmd_model_init.cluster_centers_, random_state=0
146
+ ).fit(X_train)
147
+
148
+ atol = 1e-5 if dtype == np.float32 else 1e-7
149
+ _assert_unordered_allclose(
150
+ spmd_model.cluster_centers_, batch_model.cluster_centers_, atol=atol
151
+ )
152
+ _assert_kmeans_labels_allclose(
153
+ spmd_model.labels_,
154
+ batch_model.labels_,
155
+ spmd_model.cluster_centers_,
156
+ batch_model.cluster_centers_,
157
+ atol=atol,
158
+ )
159
+ # TODO: KMeans iterations are not aligned
160
+ # assert_allclose(spmd_model.n_iter_, batch_model.n_iter_, atol=1)
161
+
162
+ # Ensure predictions of batch algo match spmd
163
+ spmd_result = spmd_model.predict(local_dpt_X_test)
164
+ batch_result = batch_model.predict(X_test)
165
+
166
+ _assert_kmeans_labels_allclose(
167
+ spmd_result,
168
+ batch_result,
169
+ spmd_model.cluster_centers_,
170
+ batch_model.cluster_centers_,
171
+ atol=atol,
172
+ )
@@ -0,0 +1,20 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ from .covariance import EmpiricalCovariance
18
+ from .incremental_covariance import IncrementalEmpiricalCovariance
19
+
20
+ __all__ = ["EmpiricalCovariance", "IncrementalEmpiricalCovariance"]
@@ -0,0 +1,21 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ from onedal.spmd.covariance import EmpiricalCovariance
18
+
19
+ # TODO:
20
+ # Currently it uses `onedal` module interface.
21
+ # Add sklearnex dispatching.
@@ -0,0 +1,37 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ from onedal.spmd.covariance import (
18
+ IncrementalEmpiricalCovariance as onedalSPMD_IncrementalEmpiricalCovariance,
19
+ )
20
+
21
+ from ...covariance import (
22
+ IncrementalEmpiricalCovariance as base_IncrementalEmpiricalCovariance,
23
+ )
24
+
25
+
26
+ class IncrementalEmpiricalCovariance(base_IncrementalEmpiricalCovariance):
27
+ """
28
+ Incremental distributed estimator for covariance.
29
+ Allows to distributely compute empirical covariance estimated by maximum
30
+ likelihood method if data are splitted into batches.
31
+
32
+ API is the same as for `sklearnex.covariance.IncrementalEmpiricalCovariance`
33
+ """
34
+
35
+ _onedal_incremental_covariance = staticmethod(
36
+ onedalSPMD_IncrementalEmpiricalCovariance
37
+ )
@@ -0,0 +1,107 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.testing import assert_allclose
20
+
21
+ from onedal.tests.utils._dataframes_support import (
22
+ _convert_to_dataframe,
23
+ get_dataframes_and_queues,
24
+ )
25
+ from sklearnex.tests._utils_spmd import (
26
+ _generate_statistic_data,
27
+ _get_local_tensor,
28
+ _mpi_libs_and_gpu_available,
29
+ )
30
+
31
+
32
+ @pytest.mark.skipif(
33
+ not _mpi_libs_and_gpu_available,
34
+ reason="GPU device and MPI libs required for test",
35
+ )
36
+ @pytest.mark.parametrize(
37
+ "dataframe,queue",
38
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
39
+ )
40
+ @pytest.mark.mpi
41
+ def test_covariance_spmd_gold(dataframe, queue):
42
+ # Import spmd and batch algo
43
+ from onedal.covariance import EmpiricalCovariance as EmpiricalCovariance_Batch
44
+ from sklearnex.spmd.covariance import EmpiricalCovariance as EmpiricalCovariance_SPMD
45
+
46
+ # Create gold data and convert to dataframe
47
+ data = np.array(
48
+ [
49
+ [0.0, 0.0, 0.0],
50
+ [0.0, 1.0, 2.0],
51
+ [0.0, 2.0, 4.0],
52
+ [0.0, 3.0, 8.0],
53
+ [0.0, 4.0, 16.0],
54
+ [0.0, 5.0, 32.0],
55
+ [0.0, 6.0, 64.0],
56
+ ]
57
+ )
58
+
59
+ local_dpt_data = _convert_to_dataframe(
60
+ _get_local_tensor(data), sycl_queue=queue, target_df=dataframe
61
+ )
62
+
63
+ # Ensure results of batch algo match spmd
64
+ spmd_result = EmpiricalCovariance_SPMD().fit(local_dpt_data)
65
+ batch_result = EmpiricalCovariance_Batch().fit(data)
66
+
67
+ assert_allclose(spmd_result.covariance_, batch_result.covariance_)
68
+ assert_allclose(spmd_result.location_, batch_result.location_)
69
+
70
+
71
+ @pytest.mark.skipif(
72
+ not _mpi_libs_and_gpu_available,
73
+ reason="GPU device and MPI libs required for test",
74
+ )
75
+ @pytest.mark.parametrize("n_samples", [100, 10000])
76
+ @pytest.mark.parametrize("n_features", [10, 100])
77
+ @pytest.mark.parametrize("assume_centered", [True, False])
78
+ @pytest.mark.parametrize(
79
+ "dataframe,queue",
80
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
81
+ )
82
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
83
+ @pytest.mark.mpi
84
+ def test_covariance_spmd_synthetic(
85
+ n_samples, n_features, assume_centered, dataframe, queue, dtype
86
+ ):
87
+ # Import spmd and batch algo
88
+ # TODO: Align sklearnex spmd to sklearnex estimator with bias and swap onedal with sklearnex
89
+ from onedal.covariance import EmpiricalCovariance as EmpiricalCovariance_Batch
90
+ from sklearnex.spmd.covariance import EmpiricalCovariance as EmpiricalCovariance_SPMD
91
+
92
+ # Generate data and convert to dataframe
93
+ data = _generate_statistic_data(n_samples, n_features, dtype=dtype)
94
+
95
+ local_dpt_data = _convert_to_dataframe(
96
+ _get_local_tensor(data), sycl_queue=queue, target_df=dataframe
97
+ )
98
+
99
+ # Ensure results of batch algo match spmd
100
+ spmd_result = EmpiricalCovariance_SPMD(assume_centered=assume_centered).fit(
101
+ local_dpt_data
102
+ )
103
+ batch_result = EmpiricalCovariance_Batch(assume_centered=assume_centered).fit(data)
104
+
105
+ atol = 1e-5 if dtype == np.float32 else 1e-7
106
+ assert_allclose(spmd_result.covariance_, batch_result.covariance_, atol=atol)
107
+ assert_allclose(spmd_result.location_, batch_result.location_, atol=atol)