scikit-learn-intelex 2025.1.0__py310-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (280) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-310-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-310-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +248 -0
  10. daal4py/sklearn/_utils.py +245 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +236 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +693 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +54 -0
  61. onedal/_device_offload.py +222 -0
  62. onedal/_onedal_py_dpc.cpython-310-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-310-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-310-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +160 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
  70. onedal/cluster/__init__.py +27 -0
  71. onedal/cluster/dbscan.py +110 -0
  72. onedal/cluster/kmeans.py +564 -0
  73. onedal/cluster/kmeans_init.py +115 -0
  74. onedal/cluster/tests/test_dbscan.py +125 -0
  75. onedal/cluster/tests/test_kmeans.py +88 -0
  76. onedal/cluster/tests/test_kmeans_init.py +93 -0
  77. onedal/common/_base.py +38 -0
  78. onedal/common/_estimator_checks.py +47 -0
  79. onedal/common/_mixin.py +62 -0
  80. onedal/common/_policy.py +59 -0
  81. onedal/common/_spmd_policy.py +30 -0
  82. onedal/common/hyperparameters.py +125 -0
  83. onedal/common/tests/test_policy.py +76 -0
  84. onedal/covariance/__init__.py +20 -0
  85. onedal/covariance/covariance.py +125 -0
  86. onedal/covariance/incremental_covariance.py +146 -0
  87. onedal/covariance/tests/test_covariance.py +50 -0
  88. onedal/covariance/tests/test_incremental_covariance.py +122 -0
  89. onedal/datatypes/__init__.py +19 -0
  90. onedal/datatypes/_data_conversion.py +154 -0
  91. onedal/datatypes/tests/common.py +126 -0
  92. onedal/datatypes/tests/test_data.py +414 -0
  93. onedal/decomposition/__init__.py +20 -0
  94. onedal/decomposition/incremental_pca.py +204 -0
  95. onedal/decomposition/pca.py +186 -0
  96. onedal/decomposition/tests/test_incremental_pca.py +198 -0
  97. onedal/ensemble/__init__.py +29 -0
  98. onedal/ensemble/forest.py +727 -0
  99. onedal/ensemble/tests/test_random_forest.py +97 -0
  100. onedal/linear_model/__init__.py +27 -0
  101. onedal/linear_model/incremental_linear_model.py +258 -0
  102. onedal/linear_model/linear_model.py +329 -0
  103. onedal/linear_model/logistic_regression.py +249 -0
  104. onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
  105. onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
  106. onedal/linear_model/tests/test_linear_regression.py +250 -0
  107. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  108. onedal/linear_model/tests/test_ridge.py +95 -0
  109. onedal/neighbors/__init__.py +19 -0
  110. onedal/neighbors/neighbors.py +767 -0
  111. onedal/neighbors/tests/test_knn_classification.py +49 -0
  112. onedal/primitives/__init__.py +27 -0
  113. onedal/primitives/get_tree.py +25 -0
  114. onedal/primitives/kernel_functions.py +153 -0
  115. onedal/primitives/tests/test_kernel_functions.py +159 -0
  116. onedal/spmd/__init__.py +25 -0
  117. onedal/spmd/_base.py +30 -0
  118. onedal/spmd/basic_statistics/__init__.py +20 -0
  119. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  120. onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
  121. onedal/spmd/cluster/__init__.py +28 -0
  122. onedal/spmd/cluster/dbscan.py +23 -0
  123. onedal/spmd/cluster/kmeans.py +56 -0
  124. onedal/spmd/covariance/__init__.py +20 -0
  125. onedal/spmd/covariance/covariance.py +26 -0
  126. onedal/spmd/covariance/incremental_covariance.py +82 -0
  127. onedal/spmd/decomposition/__init__.py +20 -0
  128. onedal/spmd/decomposition/incremental_pca.py +117 -0
  129. onedal/spmd/decomposition/pca.py +26 -0
  130. onedal/spmd/ensemble/__init__.py +19 -0
  131. onedal/spmd/ensemble/forest.py +28 -0
  132. onedal/spmd/linear_model/__init__.py +21 -0
  133. onedal/spmd/linear_model/incremental_linear_model.py +97 -0
  134. onedal/spmd/linear_model/linear_model.py +30 -0
  135. onedal/spmd/linear_model/logistic_regression.py +38 -0
  136. onedal/spmd/neighbors/__init__.py +19 -0
  137. onedal/spmd/neighbors/neighbors.py +75 -0
  138. onedal/svm/__init__.py +19 -0
  139. onedal/svm/svm.py +556 -0
  140. onedal/svm/tests/test_csr_svm.py +351 -0
  141. onedal/svm/tests/test_nusvc.py +204 -0
  142. onedal/svm/tests/test_nusvr.py +210 -0
  143. onedal/svm/tests/test_svc.py +176 -0
  144. onedal/svm/tests/test_svr.py +243 -0
  145. onedal/tests/test_common.py +57 -0
  146. onedal/tests/utils/_dataframes_support.py +162 -0
  147. onedal/tests/utils/_device_selection.py +102 -0
  148. onedal/utils/__init__.py +49 -0
  149. onedal/utils/_array_api.py +81 -0
  150. onedal/utils/_dpep_helpers.py +56 -0
  151. onedal/utils/validation.py +440 -0
  152. scikit_learn_intelex-2025.1.0.dist-info/LICENSE.txt +202 -0
  153. scikit_learn_intelex-2025.1.0.dist-info/METADATA +231 -0
  154. scikit_learn_intelex-2025.1.0.dist-info/RECORD +280 -0
  155. scikit_learn_intelex-2025.1.0.dist-info/WHEEL +5 -0
  156. scikit_learn_intelex-2025.1.0.dist-info/top_level.txt +3 -0
  157. sklearnex/__init__.py +66 -0
  158. sklearnex/__main__.py +58 -0
  159. sklearnex/_config.py +116 -0
  160. sklearnex/_device_offload.py +126 -0
  161. sklearnex/_utils.py +132 -0
  162. sklearnex/basic_statistics/__init__.py +20 -0
  163. sklearnex/basic_statistics/basic_statistics.py +230 -0
  164. sklearnex/basic_statistics/incremental_basic_statistics.py +345 -0
  165. sklearnex/basic_statistics/tests/test_basic_statistics.py +270 -0
  166. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +404 -0
  167. sklearnex/cluster/__init__.py +20 -0
  168. sklearnex/cluster/dbscan.py +197 -0
  169. sklearnex/cluster/k_means.py +395 -0
  170. sklearnex/cluster/tests/test_dbscan.py +38 -0
  171. sklearnex/cluster/tests/test_kmeans.py +159 -0
  172. sklearnex/conftest.py +82 -0
  173. sklearnex/covariance/__init__.py +19 -0
  174. sklearnex/covariance/incremental_covariance.py +398 -0
  175. sklearnex/covariance/tests/test_incremental_covariance.py +237 -0
  176. sklearnex/decomposition/__init__.py +19 -0
  177. sklearnex/decomposition/pca.py +425 -0
  178. sklearnex/decomposition/tests/test_pca.py +58 -0
  179. sklearnex/dispatcher.py +543 -0
  180. sklearnex/doc/third-party-programs.txt +424 -0
  181. sklearnex/ensemble/__init__.py +29 -0
  182. sklearnex/ensemble/_forest.py +2029 -0
  183. sklearnex/ensemble/tests/test_forest.py +135 -0
  184. sklearnex/glob/__main__.py +72 -0
  185. sklearnex/glob/dispatcher.py +101 -0
  186. sklearnex/linear_model/__init__.py +32 -0
  187. sklearnex/linear_model/coordinate_descent.py +30 -0
  188. sklearnex/linear_model/incremental_linear.py +482 -0
  189. sklearnex/linear_model/incremental_ridge.py +425 -0
  190. sklearnex/linear_model/linear.py +341 -0
  191. sklearnex/linear_model/logistic_regression.py +413 -0
  192. sklearnex/linear_model/ridge.py +24 -0
  193. sklearnex/linear_model/tests/test_incremental_linear.py +207 -0
  194. sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
  195. sklearnex/linear_model/tests/test_linear.py +167 -0
  196. sklearnex/linear_model/tests/test_logreg.py +134 -0
  197. sklearnex/manifold/__init__.py +19 -0
  198. sklearnex/manifold/t_sne.py +21 -0
  199. sklearnex/manifold/tests/test_tsne.py +26 -0
  200. sklearnex/metrics/__init__.py +23 -0
  201. sklearnex/metrics/pairwise.py +22 -0
  202. sklearnex/metrics/ranking.py +20 -0
  203. sklearnex/metrics/tests/test_metrics.py +39 -0
  204. sklearnex/model_selection/__init__.py +21 -0
  205. sklearnex/model_selection/split.py +22 -0
  206. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  207. sklearnex/neighbors/__init__.py +27 -0
  208. sklearnex/neighbors/_lof.py +236 -0
  209. sklearnex/neighbors/common.py +310 -0
  210. sklearnex/neighbors/knn_classification.py +231 -0
  211. sklearnex/neighbors/knn_regression.py +207 -0
  212. sklearnex/neighbors/knn_unsupervised.py +178 -0
  213. sklearnex/neighbors/tests/test_neighbors.py +82 -0
  214. sklearnex/preview/__init__.py +17 -0
  215. sklearnex/preview/covariance/__init__.py +19 -0
  216. sklearnex/preview/covariance/covariance.py +138 -0
  217. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  218. sklearnex/preview/decomposition/__init__.py +19 -0
  219. sklearnex/preview/decomposition/incremental_pca.py +233 -0
  220. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  221. sklearnex/preview/linear_model/__init__.py +19 -0
  222. sklearnex/preview/linear_model/ridge.py +424 -0
  223. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  224. sklearnex/spmd/__init__.py +25 -0
  225. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  226. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  227. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  228. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  229. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
  230. sklearnex/spmd/cluster/__init__.py +30 -0
  231. sklearnex/spmd/cluster/dbscan.py +50 -0
  232. sklearnex/spmd/cluster/kmeans.py +21 -0
  233. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  234. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  235. sklearnex/spmd/covariance/__init__.py +20 -0
  236. sklearnex/spmd/covariance/covariance.py +21 -0
  237. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  238. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  239. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  240. sklearnex/spmd/decomposition/__init__.py +20 -0
  241. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  242. sklearnex/spmd/decomposition/pca.py +21 -0
  243. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  244. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  245. sklearnex/spmd/ensemble/__init__.py +19 -0
  246. sklearnex/spmd/ensemble/forest.py +71 -0
  247. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  248. sklearnex/spmd/linear_model/__init__.py +21 -0
  249. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  250. sklearnex/spmd/linear_model/linear_model.py +21 -0
  251. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  252. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
  253. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  254. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
  255. sklearnex/spmd/neighbors/__init__.py +19 -0
  256. sklearnex/spmd/neighbors/neighbors.py +25 -0
  257. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  258. sklearnex/svm/__init__.py +29 -0
  259. sklearnex/svm/_common.py +339 -0
  260. sklearnex/svm/nusvc.py +371 -0
  261. sklearnex/svm/nusvr.py +170 -0
  262. sklearnex/svm/svc.py +399 -0
  263. sklearnex/svm/svr.py +167 -0
  264. sklearnex/svm/tests/test_svm.py +93 -0
  265. sklearnex/tests/test_common.py +390 -0
  266. sklearnex/tests/test_config.py +123 -0
  267. sklearnex/tests/test_memory_usage.py +379 -0
  268. sklearnex/tests/test_monkeypatch.py +276 -0
  269. sklearnex/tests/test_n_jobs_support.py +108 -0
  270. sklearnex/tests/test_parallel.py +48 -0
  271. sklearnex/tests/test_patching.py +385 -0
  272. sklearnex/tests/test_run_to_run_stability.py +321 -0
  273. sklearnex/tests/utils/__init__.py +44 -0
  274. sklearnex/tests/utils/base.py +371 -0
  275. sklearnex/tests/utils/spmd.py +198 -0
  276. sklearnex/utils/__init__.py +19 -0
  277. sklearnex/utils/_array_api.py +82 -0
  278. sklearnex/utils/parallel.py +59 -0
  279. sklearnex/utils/tests/test_finite.py +89 -0
  280. sklearnex/utils/validation.py +17 -0
@@ -0,0 +1,107 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.testing import assert_allclose
20
+
21
+ from onedal.basic_statistics.tests.test_basic_statistics import options_and_tests
22
+ from onedal.tests.utils._dataframes_support import (
23
+ _convert_to_dataframe,
24
+ get_dataframes_and_queues,
25
+ )
26
+ from sklearnex.tests.utils.spmd import (
27
+ _generate_statistic_data,
28
+ _get_local_tensor,
29
+ _mpi_libs_and_gpu_available,
30
+ )
31
+
32
+
33
+ @pytest.mark.skipif(
34
+ not _mpi_libs_and_gpu_available,
35
+ reason="GPU device and MPI libs required for test",
36
+ )
37
+ @pytest.mark.parametrize(
38
+ "dataframe,queue",
39
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
40
+ )
41
+ @pytest.mark.mpi
42
+ def test_basic_stats_spmd_gold(dataframe, queue):
43
+ # Import spmd and batch algo
44
+ from onedal.basic_statistics import BasicStatistics as BasicStatistics_Batch
45
+ from sklearnex.spmd.basic_statistics import BasicStatistics as BasicStatistics_SPMD
46
+
47
+ # Create gold data and convert to dataframe
48
+ data = np.array(
49
+ [
50
+ [0.0, 0.0, 0.0],
51
+ [0.0, 1.0, 2.0],
52
+ [0.0, 2.0, 4.0],
53
+ [0.0, 3.0, 8.0],
54
+ [0.0, 4.0, 16.0],
55
+ [0.0, 5.0, 32.0],
56
+ [0.0, 6.0, 64.0],
57
+ ]
58
+ )
59
+
60
+ local_dpt_data = _convert_to_dataframe(
61
+ _get_local_tensor(data), sycl_queue=queue, target_df=dataframe
62
+ )
63
+
64
+ # Ensure results of batch algo match spmd
65
+ spmd_result = BasicStatistics_SPMD().fit(local_dpt_data)
66
+ batch_result = BasicStatistics_Batch().fit(data)
67
+
68
+ for option in (opt[0] for opt in options_and_tests):
69
+ assert_allclose(getattr(spmd_result, option), getattr(batch_result, option))
70
+
71
+
72
+ @pytest.mark.skipif(
73
+ not _mpi_libs_and_gpu_available,
74
+ reason="GPU device and MPI libs required for test",
75
+ )
76
+ @pytest.mark.parametrize("n_samples", [100, 10000])
77
+ @pytest.mark.parametrize("n_features", [10, 100])
78
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
79
+ @pytest.mark.parametrize(
80
+ "dataframe,queue",
81
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
82
+ )
83
+ @pytest.mark.mpi
84
+ def test_basic_stats_spmd_synthetic(n_samples, n_features, dataframe, queue, dtype):
85
+ # Import spmd and batch algo
86
+ from onedal.basic_statistics import BasicStatistics as BasicStatistics_Batch
87
+ from sklearnex.spmd.basic_statistics import BasicStatistics as BasicStatistics_SPMD
88
+
89
+ # Generate data and convert to dataframe
90
+ data = _generate_statistic_data(n_samples, n_features, dtype=dtype)
91
+
92
+ local_dpt_data = _convert_to_dataframe(
93
+ _get_local_tensor(data), sycl_queue=queue, target_df=dataframe
94
+ )
95
+
96
+ # Ensure results of batch algo match spmd
97
+ spmd_result = BasicStatistics_SPMD().fit(local_dpt_data)
98
+ batch_result = BasicStatistics_Batch().fit(data)
99
+
100
+ tol = 1e-5 if dtype == np.float32 else 1e-7
101
+ for option in (opt[0] for opt in options_and_tests):
102
+ assert_allclose(
103
+ getattr(spmd_result, option),
104
+ getattr(batch_result, option),
105
+ atol=tol,
106
+ rtol=tol,
107
+ )
@@ -0,0 +1,307 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.testing import assert_allclose
20
+
21
+ from onedal.basic_statistics.tests.test_basic_statistics import options_and_tests
22
+ from onedal.tests.utils._dataframes_support import (
23
+ _convert_to_dataframe,
24
+ get_dataframes_and_queues,
25
+ )
26
+ from sklearnex.tests.utils.spmd import (
27
+ _generate_statistic_data,
28
+ _get_local_tensor,
29
+ _mpi_libs_and_gpu_available,
30
+ )
31
+
32
+
33
+ @pytest.mark.skipif(
34
+ not _mpi_libs_and_gpu_available,
35
+ reason="GPU device and MPI libs required for test",
36
+ )
37
+ @pytest.mark.parametrize(
38
+ "dataframe,queue",
39
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
40
+ )
41
+ @pytest.mark.parametrize("weighted", [True, False])
42
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
43
+ @pytest.mark.mpi
44
+ def test_incremental_basic_statistics_fit_spmd_gold(dataframe, queue, weighted, dtype):
45
+ # Import spmd and batch algo
46
+ from sklearnex.basic_statistics import IncrementalBasicStatistics
47
+ from sklearnex.spmd.basic_statistics import (
48
+ IncrementalBasicStatistics as IncrementalBasicStatistics_SPMD,
49
+ )
50
+
51
+ # Create gold data and process into dpt
52
+ data = np.array(
53
+ [
54
+ [0.0, 0.0, 0.0],
55
+ [0.0, 1.0, 2.0],
56
+ [0.0, 2.0, 4.0],
57
+ [0.0, 3.0, 8.0],
58
+ [0.0, 4.0, 16.0],
59
+ [0.0, 5.0, 32.0],
60
+ [0.0, 6.0, 64.0],
61
+ [0.0, 7.0, 128.0],
62
+ ],
63
+ dtype=dtype,
64
+ )
65
+ dpt_data = _convert_to_dataframe(data, sycl_queue=queue, target_df=dataframe)
66
+
67
+ local_dpt_data = _convert_to_dataframe(
68
+ _get_local_tensor(data), sycl_queue=queue, target_df=dataframe
69
+ )
70
+
71
+ if weighted:
72
+ # Create weights array containing the weight for each sample in the data
73
+ weights = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], dtype=dtype)
74
+ dpt_weights = _convert_to_dataframe(
75
+ weights, sycl_queue=queue, target_df=dataframe
76
+ )
77
+ local_dpt_weights = _convert_to_dataframe(
78
+ _get_local_tensor(weights), sycl_queue=queue, target_df=dataframe
79
+ )
80
+
81
+ # ensure results of batch algo match spmd
82
+
83
+ incbs_spmd = IncrementalBasicStatistics_SPMD().fit(
84
+ local_dpt_data, sample_weight=local_dpt_weights if weighted else None
85
+ )
86
+ incbs = IncrementalBasicStatistics().fit(
87
+ dpt_data, sample_weight=dpt_weights if weighted else None
88
+ )
89
+
90
+ for option, _, _ in options_and_tests:
91
+ assert_allclose(
92
+ getattr(incbs_spmd, option),
93
+ getattr(incbs, option),
94
+ err_msg=f"Result for {option} is incorrect",
95
+ )
96
+
97
+
98
+ @pytest.mark.skipif(
99
+ not _mpi_libs_and_gpu_available,
100
+ reason="GPU device and MPI libs required for test",
101
+ )
102
+ @pytest.mark.parametrize(
103
+ "dataframe,queue",
104
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
105
+ )
106
+ @pytest.mark.parametrize("num_blocks", [1, 2])
107
+ @pytest.mark.parametrize("weighted", [True, False])
108
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
109
+ @pytest.mark.mpi
110
+ def test_incremental_basic_statistics_partial_fit_spmd_gold(
111
+ dataframe, queue, num_blocks, weighted, dtype
112
+ ):
113
+ # Import spmd and batch algo
114
+ from sklearnex.basic_statistics import IncrementalBasicStatistics
115
+ from sklearnex.spmd.basic_statistics import (
116
+ IncrementalBasicStatistics as IncrementalBasicStatistics_SPMD,
117
+ )
118
+
119
+ # Create gold data and process into dpt
120
+ data = np.array(
121
+ [
122
+ [0.0, 0.0, 0.0],
123
+ [0.0, 1.0, 2.0],
124
+ [0.0, 2.0, 4.0],
125
+ [0.0, 3.0, 8.0],
126
+ [0.0, 4.0, 16.0],
127
+ [0.0, 5.0, 32.0],
128
+ [0.0, 6.0, 64.0],
129
+ [0.0, 7.0, 128.0],
130
+ ],
131
+ dtype=dtype,
132
+ )
133
+ dpt_data = _convert_to_dataframe(data, sycl_queue=queue, target_df=dataframe)
134
+ local_data = _get_local_tensor(data)
135
+ split_local_data = np.array_split(local_data, num_blocks)
136
+
137
+ if weighted:
138
+ # Create weights array containing the weight for each sample in the data
139
+ weights = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], dtype=dtype)
140
+ dpt_weights = _convert_to_dataframe(
141
+ weights, sycl_queue=queue, target_df=dataframe
142
+ )
143
+ local_weights = _get_local_tensor(weights)
144
+ split_local_weights = np.array_split(local_weights, num_blocks)
145
+
146
+ incbs_spmd = IncrementalBasicStatistics_SPMD()
147
+ incbs = IncrementalBasicStatistics()
148
+
149
+ for i in range(num_blocks):
150
+ local_dpt_data = _convert_to_dataframe(
151
+ split_local_data[i], sycl_queue=queue, target_df=dataframe
152
+ )
153
+ if weighted:
154
+ local_dpt_weights = _convert_to_dataframe(
155
+ split_local_weights[i], sycl_queue=queue, target_df=dataframe
156
+ )
157
+ incbs_spmd.partial_fit(
158
+ local_dpt_data, sample_weight=local_dpt_weights if weighted else None
159
+ )
160
+
161
+ incbs.fit(dpt_data, sample_weight=dpt_weights if weighted else None)
162
+
163
+ for option, _, _ in options_and_tests:
164
+ assert_allclose(
165
+ getattr(incbs_spmd, option),
166
+ getattr(incbs, option),
167
+ err_msg=f"Result for {option} is incorrect",
168
+ )
169
+
170
+
171
+ @pytest.mark.skipif(
172
+ not _mpi_libs_and_gpu_available,
173
+ reason="GPU device and MPI libs required for test",
174
+ )
175
+ @pytest.mark.parametrize(
176
+ "dataframe,queue",
177
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
178
+ )
179
+ @pytest.mark.parametrize("num_blocks", [1, 2])
180
+ @pytest.mark.parametrize("weighted", [True, False])
181
+ @pytest.mark.parametrize("result_option", options_and_tests)
182
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
183
+ @pytest.mark.mpi
184
+ def test_incremental_basic_statistics_single_option_partial_fit_spmd_gold(
185
+ dataframe, queue, num_blocks, weighted, result_option, dtype
186
+ ):
187
+ # Import spmd and batch algo
188
+ from sklearnex.basic_statistics import IncrementalBasicStatistics
189
+ from sklearnex.spmd.basic_statistics import (
190
+ IncrementalBasicStatistics as IncrementalBasicStatistics_SPMD,
191
+ )
192
+
193
+ # Create gold data and process into dpt
194
+ data = np.array(
195
+ [
196
+ [0.0, 0.0, 0.0],
197
+ [0.0, 1.0, 2.0],
198
+ [0.0, 2.0, 4.0],
199
+ [0.0, 3.0, 8.0],
200
+ [0.0, 4.0, 16.0],
201
+ [0.0, 5.0, 32.0],
202
+ [0.0, 6.0, 64.0],
203
+ [0.0, 7.0, 128.0],
204
+ ],
205
+ dtype=dtype,
206
+ )
207
+ dpt_data = _convert_to_dataframe(data, sycl_queue=queue, target_df=dataframe)
208
+ local_data = _get_local_tensor(data)
209
+ split_local_data = np.array_split(local_data, num_blocks)
210
+
211
+ if weighted:
212
+ # Create weights array containing the weight for each sample in the data
213
+ weights = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], dtype=dtype)
214
+ dpt_weights = _convert_to_dataframe(
215
+ weights, sycl_queue=queue, target_df=dataframe
216
+ )
217
+ local_weights = _get_local_tensor(weights)
218
+ split_local_weights = np.array_split(local_weights, num_blocks)
219
+
220
+ option, _, _ = result_option
221
+ incbs_spmd = IncrementalBasicStatistics_SPMD(result_options=option)
222
+ incbs = IncrementalBasicStatistics(result_options=option)
223
+
224
+ for i in range(num_blocks):
225
+ local_dpt_data = _convert_to_dataframe(
226
+ split_local_data[i], sycl_queue=queue, target_df=dataframe
227
+ )
228
+ if weighted:
229
+ local_dpt_weights = _convert_to_dataframe(
230
+ split_local_weights[i], sycl_queue=queue, target_df=dataframe
231
+ )
232
+ incbs_spmd.partial_fit(
233
+ local_dpt_data, sample_weight=local_dpt_weights if weighted else None
234
+ )
235
+
236
+ incbs.fit(dpt_data, sample_weight=dpt_weights if weighted else None)
237
+
238
+ assert_allclose(getattr(incbs_spmd, option), getattr(incbs, option))
239
+
240
+
241
+ @pytest.mark.skipif(
242
+ not _mpi_libs_and_gpu_available,
243
+ reason="GPU device and MPI libs required for test",
244
+ )
245
+ @pytest.mark.parametrize(
246
+ "dataframe,queue",
247
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
248
+ )
249
+ @pytest.mark.parametrize("num_blocks", [1, 2])
250
+ @pytest.mark.parametrize("weighted", [True, False])
251
+ @pytest.mark.parametrize("n_samples", [100, 10000])
252
+ @pytest.mark.parametrize("n_features", [10, 100])
253
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
254
+ @pytest.mark.mpi
255
+ def test_incremental_basic_statistics_partial_fit_spmd_synthetic(
256
+ dataframe, queue, num_blocks, weighted, n_samples, n_features, dtype
257
+ ):
258
+ # Import spmd and batch algo
259
+ from sklearnex.basic_statistics import IncrementalBasicStatistics
260
+ from sklearnex.spmd.basic_statistics import (
261
+ IncrementalBasicStatistics as IncrementalBasicStatistics_SPMD,
262
+ )
263
+
264
+ tol = 2e-3 if dtype == np.float32 else 1e-7
265
+
266
+ # Create gold data and process into dpt
267
+ data = _generate_statistic_data(n_samples, n_features, dtype=dtype)
268
+ local_data = _get_local_tensor(data)
269
+ split_local_data = np.array_split(local_data, num_blocks)
270
+ split_data = np.array_split(data, num_blocks)
271
+
272
+ if weighted:
273
+ # Create weights array containing the weight for each sample in the data
274
+ weights = _generate_statistic_data(n_samples, dtype=dtype)
275
+ local_weights = _get_local_tensor(weights)
276
+ split_local_weights = np.array_split(local_weights, num_blocks)
277
+ split_weights = np.array_split(weights, num_blocks)
278
+
279
+ incbs_spmd = IncrementalBasicStatistics_SPMD()
280
+ incbs = IncrementalBasicStatistics()
281
+
282
+ for i in range(num_blocks):
283
+ local_dpt_data = _convert_to_dataframe(
284
+ split_local_data[i], sycl_queue=queue, target_df=dataframe
285
+ )
286
+ dpt_data = _convert_to_dataframe(
287
+ split_data[i], sycl_queue=queue, target_df=dataframe
288
+ )
289
+ if weighted:
290
+ local_dpt_weights = _convert_to_dataframe(
291
+ split_local_weights[i], sycl_queue=queue, target_df=dataframe
292
+ )
293
+ dpt_weights = _convert_to_dataframe(
294
+ split_weights[i], sycl_queue=queue, target_df=dataframe
295
+ )
296
+ incbs_spmd.partial_fit(
297
+ local_dpt_data, sample_weight=local_dpt_weights if weighted else None
298
+ )
299
+ incbs.partial_fit(dpt_data, sample_weight=dpt_weights if weighted else None)
300
+
301
+ for option, _, _ in options_and_tests:
302
+ assert_allclose(
303
+ getattr(incbs_spmd, option),
304
+ getattr(incbs, option),
305
+ atol=tol,
306
+ err_msg=f"Result for {option} is incorrect",
307
+ )
@@ -0,0 +1,30 @@
1
+ # ==============================================================================
2
+ # Copyright 2023 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ from daal4py.sklearn._utils import daal_check_version
18
+
19
+ from .dbscan import DBSCAN
20
+
21
+ if daal_check_version((2023, "P", 200)):
22
+ from .kmeans import KMeans
23
+
24
+ __all__ = ["DBSCAN", "KMeans"]
25
+ else:
26
+ # TODO:
27
+ # update versioning for DBSCAN.
28
+ __all__ = [
29
+ "DBSCAN",
30
+ ]
@@ -0,0 +1,50 @@
1
+ # ==============================================================================
2
+ # Copyright 2023 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ from abc import ABC
18
+
19
+ from onedal.spmd.cluster import DBSCAN as onedal_DBSCAN
20
+
21
+ from ...cluster import DBSCAN as DBSCAN_Batch
22
+
23
+
24
+ class BaseDBSCANspmd(ABC):
25
+ def _onedal_dbscan(self, **onedal_params):
26
+ return onedal_DBSCAN(**onedal_params)
27
+
28
+
29
+ class DBSCAN(BaseDBSCANspmd, DBSCAN_Batch):
30
+ __doc__ = DBSCAN_Batch.__doc__
31
+
32
+ def _onedal_cpu_supported(self, method_name, *data):
33
+ # TODO:
34
+ # check which methods supported SPMD interface on CPU.
35
+ ready = super()._onedal_cpu_supported(method_name, *data)
36
+ if not ready:
37
+ raise RuntimeError(
38
+ f"Method {method_name} in {self.__class__.__name__} "
39
+ "is not supported with given inputs."
40
+ )
41
+ return ready
42
+
43
+ def _onedal_gpu_supported(self, method_name, *data):
44
+ ready = super()._onedal_gpu_supported(method_name, *data)
45
+ if not ready:
46
+ raise RuntimeError(
47
+ f"Method {method_name} in {self.__class__.__name__} "
48
+ "is not supported with given inputs."
49
+ )
50
+ return ready
@@ -0,0 +1,21 @@
1
+ # ==============================================================================
2
+ # Copyright 2023 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ from onedal.spmd.cluster import KMeans
18
+
19
+ # TODO:
20
+ # Currently it uses `onedal` module interface.
21
+ # Add sklearnex dispatching.
@@ -0,0 +1,97 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+
20
+ from onedal.tests.utils._dataframes_support import (
21
+ _convert_to_dataframe,
22
+ get_dataframes_and_queues,
23
+ )
24
+ from sklearnex.tests.utils.spmd import (
25
+ _generate_clustering_data,
26
+ _get_local_tensor,
27
+ _mpi_libs_and_gpu_available,
28
+ _spmd_assert_allclose,
29
+ )
30
+
31
+
32
+ @pytest.mark.skipif(
33
+ not _mpi_libs_and_gpu_available,
34
+ reason="GPU device and MPI libs required for test",
35
+ )
36
+ @pytest.mark.parametrize(
37
+ "dataframe,queue",
38
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
39
+ )
40
+ @pytest.mark.mpi
41
+ def test_dbscan_spmd_gold(dataframe, queue):
42
+ # Import spmd and batch algo
43
+ from sklearnex.cluster import DBSCAN as DBSCAN_Batch
44
+ from sklearnex.spmd.cluster import DBSCAN as DBSCAN_SPMD
45
+
46
+ data = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]])
47
+
48
+ local_dpt_data = _convert_to_dataframe(
49
+ _get_local_tensor(data), sycl_queue=queue, target_df=dataframe
50
+ )
51
+
52
+ # Ensure labels from fit of batch algo matches spmd
53
+ spmd_model = DBSCAN_SPMD(eps=3, min_samples=2).fit(local_dpt_data)
54
+ batch_model = DBSCAN_Batch(eps=3, min_samples=2).fit(data)
55
+
56
+ _spmd_assert_allclose(spmd_model.labels_, batch_model.labels_)
57
+
58
+
59
+ @pytest.mark.skipif(
60
+ not _mpi_libs_and_gpu_available,
61
+ reason="GPU device and MPI libs required for test",
62
+ )
63
+ @pytest.mark.parametrize("n_samples", [200, 10000])
64
+ @pytest.mark.parametrize("n_features_and_eps", [(5, 3), (5, 10), (25, 10)])
65
+ @pytest.mark.parametrize("centers", [10, None])
66
+ @pytest.mark.parametrize("min_samples", [2, 5, 15])
67
+ @pytest.mark.parametrize(
68
+ "dataframe,queue",
69
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
70
+ )
71
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
72
+ @pytest.mark.mpi
73
+ def test_dbscan_spmd_synthetic(
74
+ n_samples, n_features_and_eps, centers, min_samples, dataframe, queue, dtype
75
+ ):
76
+ n_features, eps = n_features_and_eps
77
+ # Import spmd and batch algo
78
+ from sklearnex.cluster import DBSCAN as DBSCAN_Batch
79
+ from sklearnex.spmd.cluster import DBSCAN as DBSCAN_SPMD
80
+
81
+ data, _ = _generate_clustering_data(
82
+ n_samples, n_features, centers=centers, dtype=dtype
83
+ )
84
+
85
+ local_dpt_data = _convert_to_dataframe(
86
+ _get_local_tensor(data), sycl_queue=queue, target_df=dataframe
87
+ )
88
+
89
+ # Ensure labels from fit of batch algo matches spmd
90
+ spmd_model = DBSCAN_SPMD(eps=eps, min_samples=min_samples).fit(local_dpt_data)
91
+ batch_model = DBSCAN_Batch(eps=eps, min_samples=min_samples).fit(data)
92
+
93
+ _spmd_assert_allclose(spmd_model.labels_, batch_model.labels_)
94
+
95
+ # Ensure meaningful test setup
96
+ if np.all(batch_model.labels_ == -1):
97
+ raise ValueError("No labels given - try raising epsilon")