scikit-learn-intelex 2025.1.0__py311-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (280) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-311-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-311-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +248 -0
  10. daal4py/sklearn/_utils.py +245 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +236 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +693 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +54 -0
  61. onedal/_device_offload.py +222 -0
  62. onedal/_onedal_py_dpc.cpython-311-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-311-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-311-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +160 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
  70. onedal/cluster/__init__.py +27 -0
  71. onedal/cluster/dbscan.py +110 -0
  72. onedal/cluster/kmeans.py +564 -0
  73. onedal/cluster/kmeans_init.py +115 -0
  74. onedal/cluster/tests/test_dbscan.py +125 -0
  75. onedal/cluster/tests/test_kmeans.py +88 -0
  76. onedal/cluster/tests/test_kmeans_init.py +93 -0
  77. onedal/common/_base.py +38 -0
  78. onedal/common/_estimator_checks.py +47 -0
  79. onedal/common/_mixin.py +62 -0
  80. onedal/common/_policy.py +59 -0
  81. onedal/common/_spmd_policy.py +30 -0
  82. onedal/common/hyperparameters.py +125 -0
  83. onedal/common/tests/test_policy.py +76 -0
  84. onedal/covariance/__init__.py +20 -0
  85. onedal/covariance/covariance.py +125 -0
  86. onedal/covariance/incremental_covariance.py +146 -0
  87. onedal/covariance/tests/test_covariance.py +50 -0
  88. onedal/covariance/tests/test_incremental_covariance.py +122 -0
  89. onedal/datatypes/__init__.py +19 -0
  90. onedal/datatypes/_data_conversion.py +154 -0
  91. onedal/datatypes/tests/common.py +126 -0
  92. onedal/datatypes/tests/test_data.py +414 -0
  93. onedal/decomposition/__init__.py +20 -0
  94. onedal/decomposition/incremental_pca.py +204 -0
  95. onedal/decomposition/pca.py +186 -0
  96. onedal/decomposition/tests/test_incremental_pca.py +198 -0
  97. onedal/ensemble/__init__.py +29 -0
  98. onedal/ensemble/forest.py +727 -0
  99. onedal/ensemble/tests/test_random_forest.py +97 -0
  100. onedal/linear_model/__init__.py +27 -0
  101. onedal/linear_model/incremental_linear_model.py +258 -0
  102. onedal/linear_model/linear_model.py +329 -0
  103. onedal/linear_model/logistic_regression.py +249 -0
  104. onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
  105. onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
  106. onedal/linear_model/tests/test_linear_regression.py +250 -0
  107. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  108. onedal/linear_model/tests/test_ridge.py +95 -0
  109. onedal/neighbors/__init__.py +19 -0
  110. onedal/neighbors/neighbors.py +767 -0
  111. onedal/neighbors/tests/test_knn_classification.py +49 -0
  112. onedal/primitives/__init__.py +27 -0
  113. onedal/primitives/get_tree.py +25 -0
  114. onedal/primitives/kernel_functions.py +153 -0
  115. onedal/primitives/tests/test_kernel_functions.py +159 -0
  116. onedal/spmd/__init__.py +25 -0
  117. onedal/spmd/_base.py +30 -0
  118. onedal/spmd/basic_statistics/__init__.py +20 -0
  119. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  120. onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
  121. onedal/spmd/cluster/__init__.py +28 -0
  122. onedal/spmd/cluster/dbscan.py +23 -0
  123. onedal/spmd/cluster/kmeans.py +56 -0
  124. onedal/spmd/covariance/__init__.py +20 -0
  125. onedal/spmd/covariance/covariance.py +26 -0
  126. onedal/spmd/covariance/incremental_covariance.py +82 -0
  127. onedal/spmd/decomposition/__init__.py +20 -0
  128. onedal/spmd/decomposition/incremental_pca.py +117 -0
  129. onedal/spmd/decomposition/pca.py +26 -0
  130. onedal/spmd/ensemble/__init__.py +19 -0
  131. onedal/spmd/ensemble/forest.py +28 -0
  132. onedal/spmd/linear_model/__init__.py +21 -0
  133. onedal/spmd/linear_model/incremental_linear_model.py +97 -0
  134. onedal/spmd/linear_model/linear_model.py +30 -0
  135. onedal/spmd/linear_model/logistic_regression.py +38 -0
  136. onedal/spmd/neighbors/__init__.py +19 -0
  137. onedal/spmd/neighbors/neighbors.py +75 -0
  138. onedal/svm/__init__.py +19 -0
  139. onedal/svm/svm.py +556 -0
  140. onedal/svm/tests/test_csr_svm.py +351 -0
  141. onedal/svm/tests/test_nusvc.py +204 -0
  142. onedal/svm/tests/test_nusvr.py +210 -0
  143. onedal/svm/tests/test_svc.py +176 -0
  144. onedal/svm/tests/test_svr.py +243 -0
  145. onedal/tests/test_common.py +57 -0
  146. onedal/tests/utils/_dataframes_support.py +162 -0
  147. onedal/tests/utils/_device_selection.py +102 -0
  148. onedal/utils/__init__.py +49 -0
  149. onedal/utils/_array_api.py +81 -0
  150. onedal/utils/_dpep_helpers.py +56 -0
  151. onedal/utils/validation.py +440 -0
  152. scikit_learn_intelex-2025.1.0.dist-info/LICENSE.txt +202 -0
  153. scikit_learn_intelex-2025.1.0.dist-info/METADATA +231 -0
  154. scikit_learn_intelex-2025.1.0.dist-info/RECORD +280 -0
  155. scikit_learn_intelex-2025.1.0.dist-info/WHEEL +5 -0
  156. scikit_learn_intelex-2025.1.0.dist-info/top_level.txt +3 -0
  157. sklearnex/__init__.py +66 -0
  158. sklearnex/__main__.py +58 -0
  159. sklearnex/_config.py +116 -0
  160. sklearnex/_device_offload.py +126 -0
  161. sklearnex/_utils.py +132 -0
  162. sklearnex/basic_statistics/__init__.py +20 -0
  163. sklearnex/basic_statistics/basic_statistics.py +230 -0
  164. sklearnex/basic_statistics/incremental_basic_statistics.py +345 -0
  165. sklearnex/basic_statistics/tests/test_basic_statistics.py +270 -0
  166. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +404 -0
  167. sklearnex/cluster/__init__.py +20 -0
  168. sklearnex/cluster/dbscan.py +197 -0
  169. sklearnex/cluster/k_means.py +395 -0
  170. sklearnex/cluster/tests/test_dbscan.py +38 -0
  171. sklearnex/cluster/tests/test_kmeans.py +159 -0
  172. sklearnex/conftest.py +82 -0
  173. sklearnex/covariance/__init__.py +19 -0
  174. sklearnex/covariance/incremental_covariance.py +398 -0
  175. sklearnex/covariance/tests/test_incremental_covariance.py +237 -0
  176. sklearnex/decomposition/__init__.py +19 -0
  177. sklearnex/decomposition/pca.py +425 -0
  178. sklearnex/decomposition/tests/test_pca.py +58 -0
  179. sklearnex/dispatcher.py +543 -0
  180. sklearnex/doc/third-party-programs.txt +424 -0
  181. sklearnex/ensemble/__init__.py +29 -0
  182. sklearnex/ensemble/_forest.py +2029 -0
  183. sklearnex/ensemble/tests/test_forest.py +135 -0
  184. sklearnex/glob/__main__.py +72 -0
  185. sklearnex/glob/dispatcher.py +101 -0
  186. sklearnex/linear_model/__init__.py +32 -0
  187. sklearnex/linear_model/coordinate_descent.py +30 -0
  188. sklearnex/linear_model/incremental_linear.py +482 -0
  189. sklearnex/linear_model/incremental_ridge.py +425 -0
  190. sklearnex/linear_model/linear.py +341 -0
  191. sklearnex/linear_model/logistic_regression.py +413 -0
  192. sklearnex/linear_model/ridge.py +24 -0
  193. sklearnex/linear_model/tests/test_incremental_linear.py +207 -0
  194. sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
  195. sklearnex/linear_model/tests/test_linear.py +167 -0
  196. sklearnex/linear_model/tests/test_logreg.py +134 -0
  197. sklearnex/manifold/__init__.py +19 -0
  198. sklearnex/manifold/t_sne.py +21 -0
  199. sklearnex/manifold/tests/test_tsne.py +26 -0
  200. sklearnex/metrics/__init__.py +23 -0
  201. sklearnex/metrics/pairwise.py +22 -0
  202. sklearnex/metrics/ranking.py +20 -0
  203. sklearnex/metrics/tests/test_metrics.py +39 -0
  204. sklearnex/model_selection/__init__.py +21 -0
  205. sklearnex/model_selection/split.py +22 -0
  206. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  207. sklearnex/neighbors/__init__.py +27 -0
  208. sklearnex/neighbors/_lof.py +236 -0
  209. sklearnex/neighbors/common.py +310 -0
  210. sklearnex/neighbors/knn_classification.py +231 -0
  211. sklearnex/neighbors/knn_regression.py +207 -0
  212. sklearnex/neighbors/knn_unsupervised.py +178 -0
  213. sklearnex/neighbors/tests/test_neighbors.py +82 -0
  214. sklearnex/preview/__init__.py +17 -0
  215. sklearnex/preview/covariance/__init__.py +19 -0
  216. sklearnex/preview/covariance/covariance.py +138 -0
  217. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  218. sklearnex/preview/decomposition/__init__.py +19 -0
  219. sklearnex/preview/decomposition/incremental_pca.py +233 -0
  220. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  221. sklearnex/preview/linear_model/__init__.py +19 -0
  222. sklearnex/preview/linear_model/ridge.py +424 -0
  223. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  224. sklearnex/spmd/__init__.py +25 -0
  225. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  226. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  227. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  228. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  229. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
  230. sklearnex/spmd/cluster/__init__.py +30 -0
  231. sklearnex/spmd/cluster/dbscan.py +50 -0
  232. sklearnex/spmd/cluster/kmeans.py +21 -0
  233. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  234. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  235. sklearnex/spmd/covariance/__init__.py +20 -0
  236. sklearnex/spmd/covariance/covariance.py +21 -0
  237. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  238. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  239. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  240. sklearnex/spmd/decomposition/__init__.py +20 -0
  241. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  242. sklearnex/spmd/decomposition/pca.py +21 -0
  243. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  244. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  245. sklearnex/spmd/ensemble/__init__.py +19 -0
  246. sklearnex/spmd/ensemble/forest.py +71 -0
  247. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  248. sklearnex/spmd/linear_model/__init__.py +21 -0
  249. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  250. sklearnex/spmd/linear_model/linear_model.py +21 -0
  251. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  252. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
  253. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  254. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
  255. sklearnex/spmd/neighbors/__init__.py +19 -0
  256. sklearnex/spmd/neighbors/neighbors.py +25 -0
  257. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  258. sklearnex/svm/__init__.py +29 -0
  259. sklearnex/svm/_common.py +339 -0
  260. sklearnex/svm/nusvc.py +371 -0
  261. sklearnex/svm/nusvr.py +170 -0
  262. sklearnex/svm/svc.py +399 -0
  263. sklearnex/svm/svr.py +167 -0
  264. sklearnex/svm/tests/test_svm.py +93 -0
  265. sklearnex/tests/test_common.py +390 -0
  266. sklearnex/tests/test_config.py +123 -0
  267. sklearnex/tests/test_memory_usage.py +379 -0
  268. sklearnex/tests/test_monkeypatch.py +276 -0
  269. sklearnex/tests/test_n_jobs_support.py +108 -0
  270. sklearnex/tests/test_parallel.py +48 -0
  271. sklearnex/tests/test_patching.py +385 -0
  272. sklearnex/tests/test_run_to_run_stability.py +321 -0
  273. sklearnex/tests/utils/__init__.py +44 -0
  274. sklearnex/tests/utils/base.py +371 -0
  275. sklearnex/tests/utils/spmd.py +198 -0
  276. sklearnex/utils/__init__.py +19 -0
  277. sklearnex/utils/_array_api.py +82 -0
  278. sklearnex/utils/parallel.py +59 -0
  279. sklearnex/utils/tests/test_finite.py +89 -0
  280. sklearnex/utils/validation.py +17 -0
@@ -0,0 +1,125 @@
1
+ # ===============================================================================
2
+ # Copyright 2023 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from sklearn.cluster import DBSCAN as DBSCAN_SKLEARN
20
+ from sklearn.cluster.tests.common import generate_clustered_data
21
+
22
+ from onedal.cluster import DBSCAN as ONEDAL_DBSCAN
23
+ from onedal.tests.utils._device_selection import get_queues
24
+
25
+
26
+ def generate_data(
27
+ low: int, high: int, samples_number: int, sample_dimension: tuple
28
+ ) -> tuple:
29
+ generator = np.random.RandomState()
30
+ table_size = (samples_number, sample_dimension)
31
+ return generator.uniform(low=low, high=high, size=table_size), generator.uniform(
32
+ size=samples_number
33
+ )
34
+
35
+
36
+ def check_labels_equals(left_labels: np.ndarray, right_labels: np.ndarray) -> bool:
37
+ if left_labels.shape != right_labels.shape:
38
+ raise Exception("Shapes not equal")
39
+ if len(left_labels.shape) != 1:
40
+ raise Exception("Shapes size not equals 1")
41
+ if len(set(left_labels)) != len(set(right_labels)):
42
+ raise Exception("Cluster counts not equal")
43
+ dict_checker = {}
44
+ for index_sample in range(left_labels.shape[0]):
45
+ if left_labels[index_sample] not in dict_checker:
46
+ dict_checker[left_labels[index_sample]] = right_labels[index_sample]
47
+ elif dict_checker[left_labels[index_sample]] != right_labels[index_sample]:
48
+ raise Exception("Wrong clustering")
49
+ return True
50
+
51
+
52
+ def _test_dbscan_big_data_numpy_gen(
53
+ queue,
54
+ eps: float,
55
+ min_samples: int,
56
+ metric: str,
57
+ use_weights: bool,
58
+ low=-100.0,
59
+ high=100.0,
60
+ samples_number=1000,
61
+ sample_dimension=4,
62
+ ):
63
+ data, weights = generate_data(
64
+ low=low,
65
+ high=high,
66
+ samples_number=samples_number,
67
+ sample_dimension=sample_dimension,
68
+ )
69
+ if use_weights is False:
70
+ weights = None
71
+ initialized_daal_dbscan = ONEDAL_DBSCAN(
72
+ eps=eps, min_samples=min_samples, metric=metric
73
+ ).fit(X=data, sample_weight=weights, queue=queue)
74
+ initialized_sklearn_dbscan = DBSCAN_SKLEARN(
75
+ metric=metric, eps=eps, min_samples=min_samples
76
+ ).fit(X=data, sample_weight=weights)
77
+ check_labels_equals(
78
+ initialized_daal_dbscan.labels_, initialized_sklearn_dbscan.labels_
79
+ )
80
+
81
+
82
+ @pytest.mark.parametrize(
83
+ "metric",
84
+ [
85
+ "euclidean",
86
+ ],
87
+ )
88
+ @pytest.mark.parametrize("use_weights", [True, False])
89
+ @pytest.mark.parametrize("queue", get_queues())
90
+ def test_dbscan_big_data_numpy_gen(queue, metric, use_weights: bool):
91
+ eps = 35.0
92
+ min_samples = 6
93
+ _test_dbscan_big_data_numpy_gen(
94
+ queue, eps=eps, min_samples=min_samples, metric=metric, use_weights=use_weights
95
+ )
96
+
97
+
98
+ def _test_across_grid_parameter_numpy_gen(queue, metric, use_weights: bool):
99
+ eps_begin = 0.05
100
+ eps_end = 0.5
101
+ eps_step = 0.05
102
+ min_samples_begin = 5
103
+ min_samples_end = 15
104
+ min_samples_step = 1
105
+ for eps in np.arange(eps_begin, eps_end, eps_step):
106
+ for min_samples in range(min_samples_begin, min_samples_end, min_samples_step):
107
+ _test_dbscan_big_data_numpy_gen(
108
+ queue,
109
+ eps=eps,
110
+ min_samples=min_samples,
111
+ metric=metric,
112
+ use_weights=use_weights,
113
+ )
114
+
115
+
116
+ @pytest.mark.parametrize(
117
+ "metric",
118
+ [
119
+ "euclidean",
120
+ ],
121
+ )
122
+ @pytest.mark.parametrize("use_weights", [True, False])
123
+ @pytest.mark.parametrize("queue", get_queues())
124
+ def test_across_grid_parameter_numpy_gen(queue, metric, use_weights: bool):
125
+ _test_across_grid_parameter_numpy_gen(queue, metric=metric, use_weights=use_weights)
@@ -0,0 +1,88 @@
1
+ # ==============================================================================
2
+ # Copyright 2023 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.testing import assert_array_equal
20
+
21
+ from daal4py.sklearn._utils import daal_check_version
22
+
23
+ if daal_check_version((2023, "P", 200)):
24
+ from sklearn.cluster import kmeans_plusplus as init_external
25
+ from sklearn.neighbors import NearestNeighbors
26
+
27
+ from onedal.cluster import KMeans
28
+ from onedal.cluster import kmeans_plusplus as init_internal
29
+ from onedal.tests.utils._device_selection import get_queues
30
+
31
+ def generate_dataset(n_dim, n_cluster, n_points=None, seed=777, dtype=np.float32):
32
+ # We need some reference value of points for each cluster
33
+ n_points = (n_dim * n_cluster) if n_points is None else n_points
34
+
35
+ # Creating generator and generating cluster points
36
+ gen = np.random.Generator(np.random.MT19937(seed))
37
+ cs = gen.uniform(low=-1.0, high=+1.0, size=(n_cluster, n_dim))
38
+
39
+ # Finding variances for each cluster using 3 sigma criteria
40
+ # It ensures that point is in the Voronoi cell of cluster
41
+ nn = NearestNeighbors(n_neighbors=2)
42
+ d, i = nn.fit(cs).kneighbors(cs)
43
+ assert_array_equal(i[:, 0], np.arange(n_cluster))
44
+ vs = d[:, 1] / 3
45
+
46
+ # Generating dataset
47
+ def gen_one(c):
48
+ params = {"loc": cs[c, :], "scale": vs[c], "size": (n_points, n_dim)}
49
+ return gen.normal(**params)
50
+
51
+ data = [gen_one(c) for c in range(n_cluster)]
52
+ data = np.concatenate(data, axis=0)
53
+ gen.shuffle(data, axis=0)
54
+
55
+ data = data.astype(dtype)
56
+
57
+ return (cs, vs, data)
58
+
59
+ @pytest.mark.parametrize("queue", get_queues())
60
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
61
+ @pytest.mark.parametrize("n_dim", [3, 4, 17, 24])
62
+ @pytest.mark.parametrize("n_cluster", [9, 11, 32])
63
+ @pytest.mark.parametrize("pipeline", ["implicit", "external", "internal"])
64
+ def test_generated_dataset(queue, dtype, n_dim, n_cluster, pipeline):
65
+ seed = 777 * n_dim * n_cluster
66
+ cs, vs, X = generate_dataset(n_dim, n_cluster, seed=seed, dtype=dtype)
67
+
68
+ if pipeline == "external":
69
+ init_data, _ = init_external(X, n_cluster)
70
+ m = KMeans(n_cluster, init=init_data, max_iter=5)
71
+ elif pipeline == "internal":
72
+ init_data, _ = init_internal(X, n_cluster, queue=queue)
73
+ m = KMeans(n_cluster, init=init_data, max_iter=5)
74
+ else:
75
+ m = KMeans(n_cluster, init="k-means++", max_iter=5)
76
+
77
+ m.fit(X, queue=queue)
78
+
79
+ rs_centroids = m.cluster_centers_
80
+ nn = NearestNeighbors(n_neighbors=1)
81
+ d, i = nn.fit(rs_centroids).kneighbors(cs)
82
+ # We have applied 3 sigma rule once
83
+ desired_accuracy = int(0.9973 * n_cluster)
84
+ correctness = d.reshape(-1) <= (vs * 3)
85
+ exp_accuracy = np.count_nonzero(correctness)
86
+
87
+ # TODO: investigate accuracy with kmeans++ init and remove - 1
88
+ assert desired_accuracy - 1 <= exp_accuracy
@@ -0,0 +1,93 @@
1
+ # ===============================================================================
2
+ # Copyright 2023 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.testing import assert_array_equal
20
+
21
+ from daal4py.sklearn._utils import daal_check_version
22
+
23
+ if daal_check_version((2023, "P", 200)):
24
+ from sklearn.datasets import load_breast_cancer
25
+ from sklearn.metrics import davies_bouldin_score
26
+
27
+ from onedal.cluster import KMeans, kmeans_plusplus
28
+ from onedal.tests.utils._device_selection import get_queues
29
+
30
+ @pytest.mark.parametrize("queue", get_queues())
31
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
32
+ @pytest.mark.parametrize("n_cluster", [2, 5, 11, 128])
33
+ def test_breast_cancer(queue, dtype, n_cluster):
34
+ X, _ = load_breast_cancer(return_X_y=True)
35
+ X = np.asarray(X).astype(dtype=dtype)
36
+ init_data, _ = kmeans_plusplus(X, n_cluster, random_state=777, queue=queue)
37
+ m = KMeans(n_cluster, init=init_data, max_iter=1)
38
+ res = davies_bouldin_score(X, m.fit(X).predict(X))
39
+ thr = 0.45 if n_cluster < 20 else 0.55
40
+ assert res > thr
41
+
42
+ from sklearn.neighbors import NearestNeighbors
43
+
44
+ def generate_dataset(n_dim, n_cluster, n_points=None, seed=777, dtype=np.float32):
45
+ # We need some reference value of points for each cluster
46
+ n_points = (n_dim * n_cluster) if n_points is None else n_points
47
+
48
+ # Creating generator and generating cluster points
49
+ gen = np.random.Generator(np.random.MT19937(seed))
50
+ cs = gen.uniform(low=-1.0, high=+1.0, size=(n_cluster, n_dim))
51
+
52
+ # Finding variances for each cluster using 3 sigma criteria
53
+ # It ensures that point is in the Voronoi cell of cluster
54
+ nn = NearestNeighbors(n_neighbors=2)
55
+ d, i = nn.fit(cs).kneighbors(cs)
56
+ assert_array_equal(i[:, 0], np.arange(n_cluster))
57
+ vs = d[:, 1] / 3
58
+
59
+ # Generating dataset
60
+ def gen_one(c):
61
+ params = {"loc": cs[c, :], "scale": vs[c], "size": (n_points, n_dim)}
62
+ return gen.normal(**params)
63
+
64
+ data = [gen_one(c) for c in range(n_cluster)]
65
+ data = np.concatenate(data, axis=0)
66
+ gen.shuffle(data, axis=0)
67
+
68
+ data = data.astype(dtype)
69
+
70
+ return (cs, vs, data)
71
+
72
+ @pytest.mark.parametrize("queue", get_queues())
73
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
74
+ @pytest.mark.parametrize("n_dim", [3, 12, 17])
75
+ @pytest.mark.parametrize("n_cluster", [2, 15, 61])
76
+ def test_generated_dataset(queue, dtype, n_dim, n_cluster):
77
+ seed = 777 * n_dim * n_cluster
78
+ cs, vs, X = generate_dataset(n_dim, n_cluster, seed=seed, dtype=dtype)
79
+
80
+ init_data, _ = kmeans_plusplus(X, n_cluster, random_state=seed, queue=queue)
81
+ m = KMeans(n_cluster, init=init_data, max_iter=3, algorithm="lloyd").fit(X)
82
+
83
+ rs_centroids = m.cluster_centers_
84
+ nn = NearestNeighbors(n_neighbors=1)
85
+ d, i = nn.fit(rs_centroids).kneighbors(cs)
86
+ # We have applied 2 sigma rule once
87
+ desired_accuracy = int(0.9973 * n_cluster)
88
+ if d.dtype == np.float64:
89
+ desired_accuracy = desired_accuracy - 1
90
+ correctness = d.reshape(-1) <= (vs * 3)
91
+ exp_accuracy = np.count_nonzero(correctness)
92
+
93
+ assert desired_accuracy <= exp_accuracy
onedal/common/_base.py ADDED
@@ -0,0 +1,38 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ from abc import ABC
18
+
19
+ from onedal import _backend
20
+
21
+ from ._policy import _get_policy
22
+
23
+
24
+ def _get_backend(backend, module, submodule=None, method=None, *args, **kwargs):
25
+ result = getattr(backend, module)
26
+ if submodule:
27
+ result = getattr(result, submodule)
28
+ if method:
29
+ return getattr(result, method)(*args, **kwargs)
30
+ return result
31
+
32
+
33
+ class BaseEstimator(ABC):
34
+ def _get_backend(self, module, submodule=None, method=None, *args, **kwargs):
35
+ return _get_backend(_backend, module, submodule, method, *args, **kwargs)
36
+
37
+ def _get_policy(self, queue, *data):
38
+ return _get_policy(queue, *data)
@@ -0,0 +1,47 @@
1
+ # ===============================================================================
2
+ # Copyright 2022 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+
18
+ def _check_is_fitted(estimator, attributes=None, *, msg=None):
19
+ if msg is None:
20
+ msg = (
21
+ "This %(name)s instance is not fitted yet. Call 'fit' with "
22
+ "appropriate arguments before using this estimator."
23
+ )
24
+
25
+ if not (
26
+ hasattr(estimator, "fit")
27
+ or (hasattr(estimator, "partial_fit") and hasattr(estimator, "finalize_fit"))
28
+ ):
29
+ raise TypeError("%s is not an estimator instance." % (estimator))
30
+
31
+ if attributes is not None:
32
+ if not isinstance(attributes, (list, tuple)):
33
+ attributes = [attributes]
34
+ attrs = all([hasattr(estimator, attr) for attr in attributes])
35
+ else:
36
+ attrs = [v for v in vars(estimator) if v.endswith("_") and not v.startswith("__")]
37
+
38
+ if not attrs:
39
+ raise AttributeError(msg % {"name": type(estimator).__name__})
40
+
41
+
42
+ def _is_classifier(estimator):
43
+ return getattr(estimator, "_estimator_type", None) == "classifier"
44
+
45
+
46
+ def _is_regressor(estimator):
47
+ return getattr(estimator, "_estimator_type", None) == "regressor"
@@ -0,0 +1,62 @@
1
+ # ==============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+
18
+ class ClusterMixin:
19
+ _estimator_type = "clusterer"
20
+
21
+ def fit_predict(self, X, y=None, queue=None, **kwargs):
22
+ self.fit(X, queue=queue, **kwargs)
23
+ return self.labels_
24
+
25
+ def _more_tags(self):
26
+ return {"preserves_dtype": []}
27
+
28
+
29
+ class ClassifierMixin:
30
+ _estimator_type = "classifier"
31
+
32
+ def score(self, X, y, sample_weight=None, queue=None):
33
+ from sklearn.metrics import accuracy_score
34
+
35
+ return accuracy_score(
36
+ y, self.predict(X, queue=queue), sample_weight=sample_weight
37
+ )
38
+
39
+ def _more_tags(self):
40
+ return {"requires_y": True}
41
+
42
+
43
+ class RegressorMixin:
44
+ _estimator_type = "regressor"
45
+
46
+ def score(self, X, y, sample_weight=None, queue=None):
47
+ from sklearn.metrics import r2_score
48
+
49
+ return r2_score(y, self.predict(X, queue=queue), sample_weight=sample_weight)
50
+
51
+ def _more_tags(self):
52
+ return {"requires_y": True}
53
+
54
+
55
+ class TransformerMixin:
56
+ _estimator_type = "transformer"
57
+
58
+ def fit_transform(self, X, y=None, queue=None, **fit_params):
59
+ if y is None:
60
+ return self.fit(X, queue=queue, **fit_params).transform(X, queue=queue)
61
+ else:
62
+ return self.fit(X, y, queue=queue, **fit_params).transform(X, queue=queue)
@@ -0,0 +1,59 @@
1
+ # ==============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import sys
18
+
19
+ from onedal import _backend, _is_dpc_backend
20
+
21
+
22
+ def _get_policy(queue, *data):
23
+ data_queue = _get_queue(*data)
24
+ if _is_dpc_backend:
25
+ if queue is None:
26
+ if data_queue is None:
27
+ return _HostInteropPolicy()
28
+ return _DataParallelInteropPolicy(data_queue)
29
+ return _DataParallelInteropPolicy(queue)
30
+ else:
31
+ if not (data_queue is None and queue is None):
32
+ raise RuntimeError(
33
+ "Operation using the requested SYCL queue requires the DPC backend"
34
+ )
35
+ return _HostInteropPolicy()
36
+
37
+
38
+ def _get_queue(*data):
39
+ if len(data) > 0 and hasattr(data[0], "__sycl_usm_array_interface__"):
40
+ # Assume that all data reside on the same device
41
+ return data[0].__sycl_usm_array_interface__["syclobj"]
42
+ return None
43
+
44
+
45
+ class _HostInteropPolicy(_backend.host_policy):
46
+ def __init__(self):
47
+ super().__init__()
48
+
49
+
50
+ if _is_dpc_backend:
51
+ from onedal._device_offload import DummySyclQueue
52
+
53
+ class _DataParallelInteropPolicy(_backend.data_parallel_policy):
54
+ def __init__(self, queue):
55
+ self._queue = queue
56
+ if isinstance(queue, DummySyclQueue):
57
+ super().__init__(self._queue.sycl_device.get_filter_string())
58
+ return
59
+ super().__init__(self._queue)
@@ -0,0 +1,30 @@
1
+ # ==============================================================================
2
+ # Copyright 2023 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ from onedal import _is_spmd_backend
18
+
19
+ if _is_spmd_backend:
20
+ from onedal import _spmd_backend
21
+
22
+ class _SPMDDataParallelInteropPolicy(_spmd_backend.spmd_data_parallel_policy):
23
+ def __init__(self, queue):
24
+ self._queue = queue
25
+ super().__init__(self._queue)
26
+
27
+ def _get_spmd_policy(queue):
28
+ # TODO:
29
+ # cases when queue is None
30
+ return _SPMDDataParallelInteropPolicy(queue)
@@ -0,0 +1,125 @@
1
+ # ==============================================================================
2
+ # Copyright 2023 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import logging
18
+ from typing import Any, Dict, Tuple
19
+ from warnings import warn
20
+
21
+ from daal4py.sklearn._utils import daal_check_version
22
+ from onedal import _backend
23
+
24
+ if not daal_check_version((2024, "P", 0)):
25
+ warn("Hyperparameters are supported in oneDAL starting from 2024.0.0 version.")
26
+ hyperparameters_map = {}
27
+ else:
28
+ _hparams_reserved_words = [
29
+ "algorithm",
30
+ "op",
31
+ "setters",
32
+ "getters",
33
+ "backend",
34
+ "is_default",
35
+ "to_dict",
36
+ ]
37
+
38
+ class HyperParameters:
39
+ """Class for simplified interaction with oneDAL hyperparameters.
40
+ Overrides `__getattribute__` and `__setattr__` to utilize getters and setters
41
+ of hyperparameter class from onedal backend.
42
+ """
43
+
44
+ def __init__(self, algorithm, op, setters, getters, backend):
45
+ self.algorithm = algorithm
46
+ self.op = op
47
+ self.setters = setters
48
+ self.getters = getters
49
+ self.backend = backend
50
+ self.is_default = True
51
+
52
+ def __getattribute__(self, __name):
53
+ if __name in _hparams_reserved_words:
54
+ if __name == "backend":
55
+ # `backend` attribute accessed only for oneDAL kernel calls
56
+ logging.getLogger("sklearnex").debug(
57
+ "Using next hyperparameters for "
58
+ f"'{self.algorithm}.{self.op}': {self.to_dict()}"
59
+ )
60
+ return super().__getattribute__(__name)
61
+ elif __name in self.getters.keys():
62
+ return self.getters[__name]()
63
+ try:
64
+ # try to return attribute from base class
65
+ # required to read builtin attributes like __class__, __doc__, etc.
66
+ # which are used in debuggers
67
+ return super().__getattribute__(__name)
68
+ except AttributeError:
69
+ # raise an AttributeError with a hyperparameter-specific message
70
+ # for easier debugging
71
+ raise AttributeError(
72
+ f"Unknown attribute '{__name}' in "
73
+ f"'{self.algorithm}.{self.op}' hyperparameters"
74
+ )
75
+
76
+ def __setattr__(self, __name, __value):
77
+ if __name in _hparams_reserved_words:
78
+ super().__setattr__(__name, __value)
79
+ elif __name in self.setters.keys():
80
+ self.is_default = False
81
+ self.setters[__name](__value)
82
+ else:
83
+ raise ValueError(
84
+ f"Unknown attribute '{__name}' in "
85
+ f"'{self.algorithm}.{self.op}' hyperparameters"
86
+ )
87
+
88
+ def to_dict(self):
89
+ return {name: getter() for name, getter in self.getters.items()}
90
+
91
+ def get_methods_with_prefix(obj, prefix):
92
+ return {
93
+ method.replace(prefix, ""): getattr(obj, method)
94
+ for method in filter(lambda f: f.startswith(prefix), dir(obj))
95
+ }
96
+
97
+ hyperparameters_backend: Dict[Tuple[str, str], Any] = {
98
+ (
99
+ "linear_regression",
100
+ "train",
101
+ ): _backend.linear_model.regression.train_hyperparameters(),
102
+ ("covariance", "compute"): _backend.covariance.compute_hyperparameters(),
103
+ }
104
+ if daal_check_version((2024, "P", 300)):
105
+ df_infer_hp = _backend.decision_forest.infer_hyperparameters
106
+ hyperparameters_backend[("decision_forest", "infer")] = df_infer_hp()
107
+ hyperparameters_map = {}
108
+
109
+ for (algorithm, op), hyperparameters in hyperparameters_backend.items():
110
+ setters = get_methods_with_prefix(hyperparameters, "set_")
111
+ getters = get_methods_with_prefix(hyperparameters, "get_")
112
+
113
+ if set(setters.keys()) != set(getters.keys()):
114
+ raise ValueError(
115
+ f"Setters and getters in '{algorithm}.{op}' "
116
+ "hyperparameters wrapper do not correspond."
117
+ )
118
+
119
+ hyperparameters_map[(algorithm, op)] = HyperParameters(
120
+ algorithm, op, setters, getters, hyperparameters
121
+ )
122
+
123
+
124
+ def get_hyperparameters(algorithm, op):
125
+ return hyperparameters_map.get((algorithm, op), None)