scikit-learn-intelex 2025.0.0__py311-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (278) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-311-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-311-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +242 -0
  10. daal4py/sklearn/_utils.py +241 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +192 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +318 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +196 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +155 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +87 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +118 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +693 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +53 -0
  61. onedal/_device_offload.py +229 -0
  62. onedal/_onedal_py_dpc.cpython-311-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-311-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-311-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +160 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
  70. onedal/cluster/__init__.py +27 -0
  71. onedal/cluster/dbscan.py +110 -0
  72. onedal/cluster/kmeans.py +560 -0
  73. onedal/cluster/kmeans_init.py +115 -0
  74. onedal/cluster/tests/test_dbscan.py +125 -0
  75. onedal/cluster/tests/test_kmeans.py +88 -0
  76. onedal/cluster/tests/test_kmeans_init.py +93 -0
  77. onedal/common/_base.py +38 -0
  78. onedal/common/_estimator_checks.py +47 -0
  79. onedal/common/_mixin.py +62 -0
  80. onedal/common/_policy.py +59 -0
  81. onedal/common/_spmd_policy.py +30 -0
  82. onedal/common/hyperparameters.py +116 -0
  83. onedal/common/tests/test_policy.py +75 -0
  84. onedal/covariance/__init__.py +20 -0
  85. onedal/covariance/covariance.py +125 -0
  86. onedal/covariance/incremental_covariance.py +146 -0
  87. onedal/covariance/tests/test_covariance.py +50 -0
  88. onedal/covariance/tests/test_incremental_covariance.py +122 -0
  89. onedal/datatypes/__init__.py +19 -0
  90. onedal/datatypes/_data_conversion.py +95 -0
  91. onedal/datatypes/tests/test_data.py +235 -0
  92. onedal/decomposition/__init__.py +20 -0
  93. onedal/decomposition/incremental_pca.py +204 -0
  94. onedal/decomposition/pca.py +186 -0
  95. onedal/decomposition/tests/test_incremental_pca.py +198 -0
  96. onedal/ensemble/__init__.py +29 -0
  97. onedal/ensemble/forest.py +720 -0
  98. onedal/ensemble/tests/test_random_forest.py +97 -0
  99. onedal/linear_model/__init__.py +27 -0
  100. onedal/linear_model/incremental_linear_model.py +258 -0
  101. onedal/linear_model/linear_model.py +329 -0
  102. onedal/linear_model/logistic_regression.py +249 -0
  103. onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
  104. onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
  105. onedal/linear_model/tests/test_linear_regression.py +149 -0
  106. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  107. onedal/linear_model/tests/test_ridge.py +95 -0
  108. onedal/neighbors/__init__.py +19 -0
  109. onedal/neighbors/neighbors.py +778 -0
  110. onedal/neighbors/tests/test_knn_classification.py +49 -0
  111. onedal/primitives/__init__.py +27 -0
  112. onedal/primitives/get_tree.py +25 -0
  113. onedal/primitives/kernel_functions.py +153 -0
  114. onedal/primitives/tests/test_kernel_functions.py +159 -0
  115. onedal/spmd/__init__.py +25 -0
  116. onedal/spmd/_base.py +30 -0
  117. onedal/spmd/basic_statistics/__init__.py +20 -0
  118. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  119. onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
  120. onedal/spmd/cluster/__init__.py +28 -0
  121. onedal/spmd/cluster/dbscan.py +23 -0
  122. onedal/spmd/cluster/kmeans.py +56 -0
  123. onedal/spmd/covariance/__init__.py +20 -0
  124. onedal/spmd/covariance/covariance.py +26 -0
  125. onedal/spmd/covariance/incremental_covariance.py +82 -0
  126. onedal/spmd/decomposition/__init__.py +20 -0
  127. onedal/spmd/decomposition/incremental_pca.py +117 -0
  128. onedal/spmd/decomposition/pca.py +26 -0
  129. onedal/spmd/ensemble/__init__.py +19 -0
  130. onedal/spmd/ensemble/forest.py +28 -0
  131. onedal/spmd/linear_model/__init__.py +21 -0
  132. onedal/spmd/linear_model/incremental_linear_model.py +97 -0
  133. onedal/spmd/linear_model/linear_model.py +30 -0
  134. onedal/spmd/linear_model/logistic_regression.py +38 -0
  135. onedal/spmd/neighbors/__init__.py +19 -0
  136. onedal/spmd/neighbors/neighbors.py +75 -0
  137. onedal/svm/__init__.py +19 -0
  138. onedal/svm/svm.py +556 -0
  139. onedal/svm/tests/test_csr_svm.py +351 -0
  140. onedal/svm/tests/test_nusvc.py +204 -0
  141. onedal/svm/tests/test_nusvr.py +210 -0
  142. onedal/svm/tests/test_svc.py +168 -0
  143. onedal/svm/tests/test_svr.py +243 -0
  144. onedal/tests/test_common.py +41 -0
  145. onedal/tests/utils/_dataframes_support.py +168 -0
  146. onedal/tests/utils/_device_selection.py +107 -0
  147. onedal/utils/__init__.py +49 -0
  148. onedal/utils/_array_api.py +91 -0
  149. onedal/utils/validation.py +432 -0
  150. scikit_learn_intelex-2025.0.0.dist-info/LICENSE.txt +202 -0
  151. scikit_learn_intelex-2025.0.0.dist-info/METADATA +231 -0
  152. scikit_learn_intelex-2025.0.0.dist-info/RECORD +278 -0
  153. scikit_learn_intelex-2025.0.0.dist-info/WHEEL +5 -0
  154. scikit_learn_intelex-2025.0.0.dist-info/top_level.txt +3 -0
  155. sklearnex/__init__.py +65 -0
  156. sklearnex/__main__.py +58 -0
  157. sklearnex/_config.py +98 -0
  158. sklearnex/_device_offload.py +121 -0
  159. sklearnex/_utils.py +109 -0
  160. sklearnex/basic_statistics/__init__.py +20 -0
  161. sklearnex/basic_statistics/basic_statistics.py +140 -0
  162. sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
  163. sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
  164. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +384 -0
  165. sklearnex/cluster/__init__.py +20 -0
  166. sklearnex/cluster/dbscan.py +192 -0
  167. sklearnex/cluster/k_means.py +383 -0
  168. sklearnex/cluster/tests/test_dbscan.py +38 -0
  169. sklearnex/cluster/tests/test_kmeans.py +153 -0
  170. sklearnex/conftest.py +73 -0
  171. sklearnex/covariance/__init__.py +19 -0
  172. sklearnex/covariance/incremental_covariance.py +368 -0
  173. sklearnex/covariance/tests/test_incremental_covariance.py +226 -0
  174. sklearnex/decomposition/__init__.py +19 -0
  175. sklearnex/decomposition/pca.py +414 -0
  176. sklearnex/decomposition/tests/test_pca.py +58 -0
  177. sklearnex/dispatcher.py +543 -0
  178. sklearnex/doc/third-party-programs.txt +424 -0
  179. sklearnex/ensemble/__init__.py +29 -0
  180. sklearnex/ensemble/_forest.py +2016 -0
  181. sklearnex/ensemble/tests/test_forest.py +120 -0
  182. sklearnex/glob/__main__.py +72 -0
  183. sklearnex/glob/dispatcher.py +101 -0
  184. sklearnex/linear_model/__init__.py +32 -0
  185. sklearnex/linear_model/coordinate_descent.py +30 -0
  186. sklearnex/linear_model/incremental_linear.py +463 -0
  187. sklearnex/linear_model/incremental_ridge.py +418 -0
  188. sklearnex/linear_model/linear.py +302 -0
  189. sklearnex/linear_model/logistic_path.py +17 -0
  190. sklearnex/linear_model/logistic_regression.py +403 -0
  191. sklearnex/linear_model/ridge.py +24 -0
  192. sklearnex/linear_model/tests/test_incremental_linear.py +203 -0
  193. sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
  194. sklearnex/linear_model/tests/test_linear.py +142 -0
  195. sklearnex/linear_model/tests/test_logreg.py +134 -0
  196. sklearnex/manifold/__init__.py +19 -0
  197. sklearnex/manifold/t_sne.py +21 -0
  198. sklearnex/manifold/tests/test_tsne.py +26 -0
  199. sklearnex/metrics/__init__.py +23 -0
  200. sklearnex/metrics/pairwise.py +22 -0
  201. sklearnex/metrics/ranking.py +20 -0
  202. sklearnex/metrics/tests/test_metrics.py +39 -0
  203. sklearnex/model_selection/__init__.py +21 -0
  204. sklearnex/model_selection/split.py +22 -0
  205. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  206. sklearnex/neighbors/__init__.py +27 -0
  207. sklearnex/neighbors/_lof.py +231 -0
  208. sklearnex/neighbors/common.py +310 -0
  209. sklearnex/neighbors/knn_classification.py +226 -0
  210. sklearnex/neighbors/knn_regression.py +203 -0
  211. sklearnex/neighbors/knn_unsupervised.py +170 -0
  212. sklearnex/neighbors/tests/test_neighbors.py +80 -0
  213. sklearnex/preview/__init__.py +17 -0
  214. sklearnex/preview/covariance/__init__.py +19 -0
  215. sklearnex/preview/covariance/covariance.py +133 -0
  216. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  217. sklearnex/preview/decomposition/__init__.py +19 -0
  218. sklearnex/preview/decomposition/incremental_pca.py +228 -0
  219. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  220. sklearnex/preview/linear_model/__init__.py +19 -0
  221. sklearnex/preview/linear_model/ridge.py +419 -0
  222. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  223. sklearnex/spmd/__init__.py +25 -0
  224. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  225. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  226. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  227. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  228. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
  229. sklearnex/spmd/cluster/__init__.py +30 -0
  230. sklearnex/spmd/cluster/dbscan.py +50 -0
  231. sklearnex/spmd/cluster/kmeans.py +21 -0
  232. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  233. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  234. sklearnex/spmd/covariance/__init__.py +20 -0
  235. sklearnex/spmd/covariance/covariance.py +21 -0
  236. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  237. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  238. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  239. sklearnex/spmd/decomposition/__init__.py +20 -0
  240. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  241. sklearnex/spmd/decomposition/pca.py +21 -0
  242. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  243. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  244. sklearnex/spmd/ensemble/__init__.py +19 -0
  245. sklearnex/spmd/ensemble/forest.py +71 -0
  246. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  247. sklearnex/spmd/linear_model/__init__.py +21 -0
  248. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  249. sklearnex/spmd/linear_model/linear_model.py +21 -0
  250. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  251. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
  252. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  253. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +166 -0
  254. sklearnex/spmd/neighbors/__init__.py +19 -0
  255. sklearnex/spmd/neighbors/neighbors.py +25 -0
  256. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  257. sklearnex/svm/__init__.py +29 -0
  258. sklearnex/svm/_common.py +328 -0
  259. sklearnex/svm/nusvc.py +332 -0
  260. sklearnex/svm/nusvr.py +148 -0
  261. sklearnex/svm/svc.py +360 -0
  262. sklearnex/svm/svr.py +149 -0
  263. sklearnex/svm/tests/test_svm.py +93 -0
  264. sklearnex/tests/_utils.py +328 -0
  265. sklearnex/tests/_utils_spmd.py +198 -0
  266. sklearnex/tests/test_common.py +54 -0
  267. sklearnex/tests/test_config.py +43 -0
  268. sklearnex/tests/test_memory_usage.py +291 -0
  269. sklearnex/tests/test_monkeypatch.py +276 -0
  270. sklearnex/tests/test_n_jobs_support.py +103 -0
  271. sklearnex/tests/test_parallel.py +48 -0
  272. sklearnex/tests/test_patching.py +385 -0
  273. sklearnex/tests/test_run_to_run_stability.py +296 -0
  274. sklearnex/utils/__init__.py +19 -0
  275. sklearnex/utils/_array_api.py +82 -0
  276. sklearnex/utils/parallel.py +59 -0
  277. sklearnex/utils/tests/test_finite.py +89 -0
  278. sklearnex/utils/validation.py +17 -0
@@ -0,0 +1,288 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.testing import assert_allclose
20
+
21
+ from onedal.tests.utils._dataframes_support import (
22
+ _convert_to_dataframe,
23
+ get_dataframes_and_queues,
24
+ )
25
+ from sklearnex.tests._utils_spmd import (
26
+ _assert_unordered_allclose,
27
+ _generate_classification_data,
28
+ _generate_regression_data,
29
+ _get_local_tensor,
30
+ _mpi_libs_and_gpu_available,
31
+ _spmd_assert_allclose,
32
+ )
33
+
34
+
35
+ @pytest.mark.skipif(
36
+ not _mpi_libs_and_gpu_available,
37
+ reason="GPU device and MPI libs required for test",
38
+ )
39
+ @pytest.mark.parametrize(
40
+ "dataframe,queue",
41
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
42
+ )
43
+ @pytest.mark.mpi
44
+ def test_knncls_spmd_gold(dataframe, queue):
45
+ # Import spmd and batch algo
46
+ from sklearnex.neighbors import KNeighborsClassifier as KNeighborsClassifier_Batch
47
+ from sklearnex.spmd.neighbors import KNeighborsClassifier as KNeighborsClassifier_SPMD
48
+
49
+ # Create gold data and convert to dataframe
50
+ X_train = np.array(
51
+ [
52
+ [0.0, 0.0],
53
+ [0.0, 1.0],
54
+ [1.0, 0.0],
55
+ [0.0, 2.0],
56
+ [2.0, 0.0],
57
+ [0.9, 1.0],
58
+ [0.0, -1.0],
59
+ [-1.0, 0.0],
60
+ [-1.0, -1.0],
61
+ ]
62
+ )
63
+ # TODO: handle situations where not all classes are present on all ranks?
64
+ y_train = np.array([0, 1, 0, 1, 0, 1, 0, 1, 0])
65
+ X_test = np.array(
66
+ [
67
+ [1.0, -0.5],
68
+ [-5.0, 1.0],
69
+ [0.0, 1.0],
70
+ [10.0, -10.0],
71
+ ]
72
+ )
73
+
74
+ local_dpt_X_train = _convert_to_dataframe(
75
+ _get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe
76
+ )
77
+ local_dpt_y_train = _convert_to_dataframe(
78
+ _get_local_tensor(y_train), sycl_queue=queue, target_df=dataframe
79
+ )
80
+ local_dpt_X_test = _convert_to_dataframe(
81
+ _get_local_tensor(X_test), sycl_queue=queue, target_df=dataframe
82
+ )
83
+
84
+ # Ensure predictions of batch algo match spmd
85
+ spmd_model = KNeighborsClassifier_SPMD(n_neighbors=1, algorithm="brute").fit(
86
+ local_dpt_X_train, local_dpt_y_train
87
+ )
88
+ batch_model = KNeighborsClassifier_Batch(n_neighbors=1, algorithm="brute").fit(
89
+ X_train, y_train
90
+ )
91
+ spmd_dists, spmd_indcs = spmd_model.kneighbors(local_dpt_X_test)
92
+ batch_dists, batch_indcs = batch_model.kneighbors(X_test)
93
+ spmd_result = spmd_model.predict(local_dpt_X_test)
94
+ batch_result = batch_model.predict(X_test)
95
+
96
+ _assert_unordered_allclose(spmd_indcs, batch_indcs, localize=True)
97
+ _assert_unordered_allclose(spmd_dists, batch_dists, localize=True)
98
+ _spmd_assert_allclose(spmd_result, batch_result)
99
+
100
+
101
+ @pytest.mark.skipif(
102
+ not _mpi_libs_and_gpu_available,
103
+ reason="GPU device and MPI libs required for test",
104
+ )
105
+ @pytest.mark.parametrize("n_samples", [200, 10000])
106
+ @pytest.mark.parametrize("n_features_and_classes", [(5, 2), (25, 2), (25, 10)])
107
+ @pytest.mark.parametrize("n_neighbors", [1, 5, 20])
108
+ @pytest.mark.parametrize("weights", ["uniform", "distance"])
109
+ @pytest.mark.parametrize(
110
+ "dataframe,queue",
111
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
112
+ )
113
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
114
+ @pytest.mark.mpi
115
+ def test_knncls_spmd_synthetic(
116
+ n_samples,
117
+ n_features_and_classes,
118
+ n_neighbors,
119
+ weights,
120
+ dataframe,
121
+ queue,
122
+ dtype,
123
+ metric="euclidean",
124
+ ):
125
+ n_features, n_classes = n_features_and_classes
126
+ # Import spmd and batch algo
127
+ from sklearnex.neighbors import KNeighborsClassifier as KNeighborsClassifier_Batch
128
+ from sklearnex.spmd.neighbors import KNeighborsClassifier as KNeighborsClassifier_SPMD
129
+
130
+ # Generate data and convert to dataframe
131
+ X_train, X_test, y_train, _ = _generate_classification_data(
132
+ n_samples, n_features, n_classes, dtype=dtype
133
+ )
134
+
135
+ local_dpt_X_train = _convert_to_dataframe(
136
+ _get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe
137
+ )
138
+ local_dpt_y_train = _convert_to_dataframe(
139
+ _get_local_tensor(y_train), sycl_queue=queue, target_df=dataframe
140
+ )
141
+ local_dpt_X_test = _convert_to_dataframe(
142
+ _get_local_tensor(X_test), sycl_queue=queue, target_df=dataframe
143
+ )
144
+
145
+ # Ensure predictions of batch algo match spmd
146
+ spmd_model = KNeighborsClassifier_SPMD(
147
+ n_neighbors=n_neighbors, weights=weights, metric=metric, algorithm="brute"
148
+ ).fit(local_dpt_X_train, local_dpt_y_train)
149
+ batch_model = KNeighborsClassifier_Batch(
150
+ n_neighbors=n_neighbors, weights=weights, metric=metric, algorithm="brute"
151
+ ).fit(X_train, y_train)
152
+ spmd_dists, spmd_indcs = spmd_model.kneighbors(local_dpt_X_test)
153
+ batch_dists, batch_indcs = batch_model.kneighbors(X_test)
154
+ spmd_result = spmd_model.predict(local_dpt_X_test)
155
+ batch_result = batch_model.predict(X_test)
156
+
157
+ tol = 1e-4
158
+ if dtype == np.float64:
159
+ _assert_unordered_allclose(spmd_indcs, batch_indcs, localize=True)
160
+ _assert_unordered_allclose(
161
+ spmd_dists, batch_dists, localize=True, rtol=tol, atol=tol
162
+ )
163
+ _spmd_assert_allclose(spmd_result, batch_result)
164
+
165
+
166
+ @pytest.mark.skipif(
167
+ not _mpi_libs_and_gpu_available,
168
+ reason="GPU device and MPI libs required for test",
169
+ )
170
+ @pytest.mark.parametrize(
171
+ "dataframe,queue",
172
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
173
+ )
174
+ @pytest.mark.mpi
175
+ def test_knnreg_spmd_gold(dataframe, queue):
176
+ # Import spmd and batch algo
177
+ from sklearnex.neighbors import KNeighborsRegressor as KNeighborsRegressor_Batch
178
+ from sklearnex.spmd.neighbors import KNeighborsRegressor as KNeighborsRegressor_SPMD
179
+
180
+ # Create gold data and convert to dataframe
181
+ X_train = np.array(
182
+ [
183
+ [0.0, 0.0],
184
+ [0.0, 1.0],
185
+ [1.0, 0.0],
186
+ [0.0, 2.0],
187
+ [2.0, 0.0],
188
+ [1.0, 1.0],
189
+ [0.0, -1.0],
190
+ [-1.0, 0.0],
191
+ [-1.0, -1.0],
192
+ ]
193
+ )
194
+ y_train = np.array([3.0, 5.0, 4.0, 7.0, 5.0, 6.0, 1.0, 2.0, 0.0])
195
+ X_test = np.array(
196
+ [
197
+ [1.0, -0.5],
198
+ [-5.0, 1.0],
199
+ [0.0, 1.0],
200
+ [10.0, -10.0],
201
+ ]
202
+ )
203
+
204
+ local_dpt_X_train = _convert_to_dataframe(
205
+ _get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe
206
+ )
207
+ local_dpt_y_train = _convert_to_dataframe(
208
+ _get_local_tensor(y_train), sycl_queue=queue, target_df=dataframe
209
+ )
210
+ local_dpt_X_test = _convert_to_dataframe(
211
+ _get_local_tensor(X_test), sycl_queue=queue, target_df=dataframe
212
+ )
213
+
214
+ # Ensure predictions of batch algo match spmd
215
+ spmd_model = KNeighborsRegressor_SPMD(n_neighbors=1, algorithm="brute").fit(
216
+ local_dpt_X_train, local_dpt_y_train
217
+ )
218
+ batch_model = KNeighborsRegressor_Batch(n_neighbors=1, algorithm="brute").fit(
219
+ X_train, y_train
220
+ )
221
+ spmd_dists, spmd_indcs = spmd_model.kneighbors(local_dpt_X_test)
222
+ batch_dists, batch_indcs = batch_model.kneighbors(X_test)
223
+ spmd_result = spmd_model.predict(local_dpt_X_test)
224
+ batch_result = batch_model.predict(X_test)
225
+
226
+ _assert_unordered_allclose(spmd_indcs, batch_indcs, localize=True)
227
+ _assert_unordered_allclose(spmd_dists, batch_dists, localize=True)
228
+ _spmd_assert_allclose(spmd_result, batch_result)
229
+
230
+
231
+ @pytest.mark.skipif(
232
+ not _mpi_libs_and_gpu_available,
233
+ reason="GPU device and MPI libs required for test",
234
+ )
235
+ @pytest.mark.parametrize("n_samples", [200, 10000])
236
+ @pytest.mark.parametrize("n_features", [5, 25])
237
+ @pytest.mark.parametrize("n_neighbors", [1, 5, 20])
238
+ @pytest.mark.parametrize("weights", ["uniform", "distance"])
239
+ @pytest.mark.parametrize(
240
+ "metric", ["euclidean", "manhattan", "minkowski", "chebyshev", "cosine"]
241
+ )
242
+ @pytest.mark.parametrize(
243
+ "dataframe,queue",
244
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
245
+ )
246
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
247
+ @pytest.mark.mpi
248
+ def test_knnreg_spmd_synthetic(
249
+ n_samples, n_features, n_neighbors, weights, metric, dataframe, queue, dtype
250
+ ):
251
+ # Import spmd and batch algo
252
+ from sklearnex.neighbors import KNeighborsRegressor as KNeighborsRegressor_Batch
253
+ from sklearnex.spmd.neighbors import KNeighborsRegressor as KNeighborsRegressor_SPMD
254
+
255
+ # Generate data and convert to dataframe
256
+ X_train, X_test, y_train, _ = _generate_regression_data(
257
+ n_samples, n_features, dtype=dtype
258
+ )
259
+
260
+ local_dpt_X_train = _convert_to_dataframe(
261
+ _get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe
262
+ )
263
+ local_dpt_y_train = _convert_to_dataframe(
264
+ _get_local_tensor(y_train), sycl_queue=queue, target_df=dataframe
265
+ )
266
+ local_dpt_X_test = _convert_to_dataframe(
267
+ _get_local_tensor(X_test), sycl_queue=queue, target_df=dataframe
268
+ )
269
+
270
+ # Ensure predictions of batch algo match spmd
271
+ spmd_model = KNeighborsRegressor_SPMD(
272
+ n_neighbors=n_neighbors, weights=weights, metric=metric, algorithm="brute"
273
+ ).fit(local_dpt_X_train, local_dpt_y_train)
274
+ batch_model = KNeighborsRegressor_Batch(
275
+ n_neighbors=n_neighbors, weights=weights, metric=metric, algorithm="brute"
276
+ ).fit(X_train, y_train)
277
+ spmd_dists, spmd_indcs = spmd_model.kneighbors(local_dpt_X_test)
278
+ batch_dists, batch_indcs = batch_model.kneighbors(X_test)
279
+ spmd_result = spmd_model.predict(local_dpt_X_test)
280
+ batch_result = batch_model.predict(X_test)
281
+
282
+ tol = 0.005 if dtype == np.float32 else 1e-4
283
+ if dtype == np.float64:
284
+ _assert_unordered_allclose(spmd_indcs, batch_indcs, localize=True)
285
+ _assert_unordered_allclose(
286
+ spmd_dists, batch_dists, localize=True, rtol=tol, atol=tol
287
+ )
288
+ _spmd_assert_allclose(spmd_result, batch_result, rtol=tol, atol=tol)
@@ -0,0 +1,29 @@
1
+ # ==============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ from .._utils import get_sklearnex_version
18
+
19
+ if get_sklearnex_version((2021, "P", 300)):
20
+ from .nusvc import NuSVC
21
+ from .nusvr import NuSVR
22
+ from .svc import SVC
23
+ from .svr import SVR
24
+
25
+ __all__ = ["SVR", "SVC", "NuSVC", "NuSVR"]
26
+ else:
27
+ from daal4py.sklearn.svm import SVC
28
+
29
+ __all__ = ["SVC"]
@@ -0,0 +1,328 @@
1
+ # ==============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import warnings
18
+ from abc import ABC
19
+ from numbers import Number, Real
20
+
21
+ import numpy as np
22
+ from scipy import sparse as sp
23
+ from sklearn.base import BaseEstimator, ClassifierMixin
24
+ from sklearn.calibration import CalibratedClassifierCV
25
+ from sklearn.metrics import r2_score
26
+ from sklearn.preprocessing import LabelEncoder
27
+
28
+ from daal4py.sklearn._utils import sklearn_check_version
29
+ from onedal.utils import _check_array, _check_X_y, _column_or_1d
30
+
31
+ from .._config import config_context, get_config
32
+ from .._utils import PatchingConditionsChain
33
+
34
+
35
+ def get_dual_coef(self):
36
+ return self.dual_coef_
37
+
38
+
39
+ def set_dual_coef(self, value):
40
+ self.dual_coef_ = value
41
+ if hasattr(self, "_onedal_estimator"):
42
+ self._onedal_estimator.dual_coef_ = value
43
+ if not self._is_in_fit:
44
+ del self._onedal_estimator._onedal_model
45
+
46
+
47
+ def get_intercept(self):
48
+ return self._intercept_
49
+
50
+
51
+ def set_intercept(self, value):
52
+ self._intercept_ = value
53
+ if hasattr(self, "_onedal_estimator"):
54
+ self._onedal_estimator.intercept_ = value
55
+ if not self._is_in_fit:
56
+ del self._onedal_estimator._onedal_model
57
+
58
+
59
+ class BaseSVM(BaseEstimator, ABC):
60
+
61
+ def _onedal_gpu_supported(self, method_name, *data):
62
+ patching_status = PatchingConditionsChain(f"sklearn.{method_name}")
63
+ patching_status.and_conditions([(False, "GPU offloading is not supported.")])
64
+ return patching_status
65
+
66
+ def _onedal_cpu_supported(self, method_name, *data):
67
+ class_name = self.__class__.__name__
68
+ patching_status = PatchingConditionsChain(
69
+ f"sklearn.svm.{class_name}.{method_name}"
70
+ )
71
+ if method_name == "fit":
72
+ patching_status.and_conditions(
73
+ [
74
+ (
75
+ self.kernel in ["linear", "rbf", "poly", "sigmoid"],
76
+ f'Kernel is "{self.kernel}" while '
77
+ '"linear", "rbf", "poly" and "sigmoid" are only supported.',
78
+ )
79
+ ]
80
+ )
81
+ return patching_status
82
+ inference_methods = (
83
+ ["predict", "score"]
84
+ if class_name.endswith("R")
85
+ else ["predict", "predict_proba", "decision_function", "score"]
86
+ )
87
+ if method_name in inference_methods:
88
+ patching_status.and_conditions(
89
+ [(hasattr(self, "_onedal_estimator"), "oneDAL model was not trained.")]
90
+ )
91
+ return patching_status
92
+ raise RuntimeError(f"Unknown method {method_name} in {class_name}")
93
+
94
+ def _compute_gamma_sigma(self, X):
95
+ # only run extended conversion if kernel is not linear
96
+ # set to a value = 1.0, so gamma will always be passed to
97
+ # the onedal estimator as a float type
98
+ if self.kernel == "linear":
99
+ return 1.0
100
+
101
+ if isinstance(self.gamma, str):
102
+ if self.gamma == "scale":
103
+ if sp.issparse(X):
104
+ # var = E[X^2] - E[X]^2
105
+ X_sc = (X.multiply(X)).mean() - (X.mean()) ** 2
106
+ else:
107
+ X_sc = X.var()
108
+ _gamma = 1.0 / (X.shape[1] * X_sc) if X_sc != 0 else 1.0
109
+ elif self.gamma == "auto":
110
+ _gamma = 1.0 / X.shape[1]
111
+ else:
112
+ raise ValueError(
113
+ "When 'gamma' is a string, it should be either 'scale' or "
114
+ "'auto'. Got '{}' instead.".format(self.gamma)
115
+ )
116
+ else:
117
+ if sklearn_check_version("1.1") and not sklearn_check_version("1.2"):
118
+ if isinstance(self.gamma, Real):
119
+ if self.gamma <= 0:
120
+ msg = (
121
+ f"gamma value must be > 0; {self.gamma!r} is invalid. Use"
122
+ " a positive number or use 'auto' to set gamma to a"
123
+ " value of 1 / n_features."
124
+ )
125
+ raise ValueError(msg)
126
+ _gamma = self.gamma
127
+ else:
128
+ msg = (
129
+ "The gamma value should be set to 'scale', 'auto' or a"
130
+ f" positive float value. {self.gamma!r} is not a valid option"
131
+ )
132
+ raise ValueError(msg)
133
+ else:
134
+ _gamma = self.gamma
135
+ return _gamma
136
+
137
+ def _onedal_fit_checks(self, X, y, sample_weight=None):
138
+ if hasattr(self, "decision_function_shape"):
139
+ if self.decision_function_shape not in ("ovr", "ovo", None):
140
+ raise ValueError(
141
+ f"decision_function_shape must be either 'ovr' or 'ovo', "
142
+ f"got {self.decision_function_shape}."
143
+ )
144
+
145
+ if y is None:
146
+ if self._get_tags()["requires_y"]:
147
+ raise ValueError(
148
+ f"This {self.__class__.__name__} estimator "
149
+ f"requires y to be passed, but the target y is None."
150
+ )
151
+ # using onedal _check_X_y to insure X and y are contiguous
152
+ # finite check occurs in onedal estimator
153
+ X, y = _check_X_y(
154
+ X,
155
+ y,
156
+ dtype=[np.float64, np.float32],
157
+ force_all_finite=False,
158
+ accept_sparse="csr",
159
+ )
160
+ y = self._validate_targets(y)
161
+ sample_weight = self._get_sample_weight(X, y, sample_weight)
162
+ return X, y, sample_weight
163
+
164
+ def _get_sample_weight(self, X, y, sample_weight):
165
+ n_samples = X.shape[0]
166
+ dtype = X.dtype
167
+ if n_samples == 1:
168
+ raise ValueError("n_samples=1")
169
+
170
+ sample_weight = np.ascontiguousarray(
171
+ [] if sample_weight is None else sample_weight, dtype=np.float64
172
+ )
173
+
174
+ sample_weight_count = sample_weight.shape[0]
175
+ if sample_weight_count != 0 and sample_weight_count != n_samples:
176
+ raise ValueError(
177
+ "sample_weight and X have incompatible shapes: "
178
+ "%r vs %r\n"
179
+ "Note: Sparse matrices cannot be indexed w/"
180
+ "boolean masks (use `indices=True` in CV)."
181
+ % (len(sample_weight), X.shape)
182
+ )
183
+
184
+ if sample_weight_count == 0:
185
+ if not isinstance(self, ClassifierMixin) or self.class_weight_ is None:
186
+ return None
187
+ sample_weight = np.ones(n_samples, dtype=dtype)
188
+ elif isinstance(sample_weight, Number):
189
+ sample_weight = np.full(n_samples, sample_weight, dtype=dtype)
190
+ else:
191
+ sample_weight = _check_array(
192
+ sample_weight,
193
+ accept_sparse=False,
194
+ ensure_2d=False,
195
+ dtype=dtype,
196
+ order="C",
197
+ )
198
+ if sample_weight.ndim != 1:
199
+ raise ValueError("Sample weights must be 1D array or scalar")
200
+
201
+ if sample_weight.shape != (n_samples,):
202
+ raise ValueError(
203
+ "sample_weight.shape == {}, expected {}!".format(
204
+ sample_weight.shape, (n_samples,)
205
+ )
206
+ )
207
+
208
+ if np.all(sample_weight <= 0):
209
+ if "nusvc" in self.__module__:
210
+ raise ValueError("negative dimensions are not allowed")
211
+ else:
212
+ raise ValueError(
213
+ "Invalid input - all samples have zero or negative weights."
214
+ )
215
+
216
+ return sample_weight
217
+
218
+
219
+ class BaseSVC(BaseSVM):
220
+ def _compute_balanced_class_weight(self, y):
221
+ y_ = _column_or_1d(y)
222
+ classes, _ = np.unique(y_, return_inverse=True)
223
+
224
+ le = LabelEncoder()
225
+ y_ind = le.fit_transform(y_)
226
+ if not all(np.in1d(classes, le.classes_)):
227
+ raise ValueError("classes should have valid labels that are in y")
228
+
229
+ recip_freq = len(y_) / (len(le.classes_) * np.bincount(y_ind).astype(np.float64))
230
+ return recip_freq[le.transform(classes)]
231
+
232
+ def _fit_proba(self, X, y, sample_weight=None, queue=None):
233
+ # TODO: rewrite this method when probabilities output is implemented in oneDAL
234
+
235
+ # LibSVM uses the random seed to control cross-validation for probability generation
236
+ # CalibratedClassifierCV with "prefit" does not use an RNG nor a seed. This may
237
+ # impact users without their knowledge, so display a warning.
238
+ if self.random_state is not None:
239
+ warnings.warn(
240
+ "random_state does not influence oneDAL SVM results",
241
+ RuntimeWarning,
242
+ )
243
+
244
+ params = self.get_params()
245
+ params["probability"] = False
246
+ params["decision_function_shape"] = "ovr"
247
+ clf_base = self.__class__(**params)
248
+
249
+ # We use stock metaestimators below, so the only way
250
+ # to pass a queue is using config_context.
251
+ cfg = get_config()
252
+ cfg["target_offload"] = queue
253
+ with config_context(**cfg):
254
+ clf_base.fit(X, y)
255
+ self.clf_prob = CalibratedClassifierCV(
256
+ clf_base,
257
+ ensemble=False,
258
+ cv="prefit",
259
+ method="sigmoid",
260
+ ).fit(X, y)
261
+
262
+ def _save_attributes(self):
263
+ self.support_vectors_ = self._onedal_estimator.support_vectors_
264
+ self.n_features_in_ = self._onedal_estimator.n_features_in_
265
+ self.fit_status_ = 0
266
+ self.dual_coef_ = self._onedal_estimator.dual_coef_
267
+ self.shape_fit_ = self._onedal_estimator.class_weight_
268
+ self.classes_ = self._onedal_estimator.classes_
269
+ if isinstance(self, ClassifierMixin) or not sklearn_check_version("1.2"):
270
+ self.class_weight_ = self._onedal_estimator.class_weight_
271
+ self.support_ = self._onedal_estimator.support_
272
+
273
+ self._intercept_ = self._onedal_estimator.intercept_
274
+ self._n_support = self._onedal_estimator._n_support
275
+ self._sparse = False
276
+ self._gamma = self._onedal_estimator._gamma
277
+ if self.probability:
278
+ length = int(len(self.classes_) * (len(self.classes_) - 1) / 2)
279
+ self._probA = np.zeros(length)
280
+ self._probB = np.zeros(length)
281
+ else:
282
+ self._probA = np.empty(0)
283
+ self._probB = np.empty(0)
284
+
285
+ self._dual_coef_ = property(get_dual_coef, set_dual_coef)
286
+ self.intercept_ = property(get_intercept, set_intercept)
287
+
288
+ self._is_in_fit = True
289
+ self._dual_coef_ = self.dual_coef_
290
+ self.intercept_ = self._intercept_
291
+ self._is_in_fit = False
292
+
293
+ if sklearn_check_version("1.1"):
294
+ length = int(len(self.classes_) * (len(self.classes_) - 1) / 2)
295
+ self.n_iter_ = np.full((length,), self._onedal_estimator.n_iter_)
296
+
297
+
298
+ class BaseSVR(BaseSVM):
299
+ def _save_attributes(self):
300
+ self.support_vectors_ = self._onedal_estimator.support_vectors_
301
+ self.n_features_in_ = self._onedal_estimator.n_features_in_
302
+ self.fit_status_ = 0
303
+ self.dual_coef_ = self._onedal_estimator.dual_coef_
304
+ self.shape_fit_ = self._onedal_estimator.shape_fit_
305
+ self.support_ = self._onedal_estimator.support_
306
+
307
+ self._intercept_ = self._onedal_estimator.intercept_
308
+ self._n_support = [self.support_vectors_.shape[0]]
309
+ self._sparse = False
310
+ self._gamma = self._onedal_estimator._gamma
311
+ self._probA = None
312
+ self._probB = None
313
+
314
+ self._dual_coef_ = property(get_dual_coef, set_dual_coef)
315
+ self.intercept_ = property(get_intercept, set_intercept)
316
+
317
+ self._is_in_fit = True
318
+ self._dual_coef_ = self.dual_coef_
319
+ self.intercept_ = self._intercept_
320
+ self._is_in_fit = False
321
+
322
+ if sklearn_check_version("1.1"):
323
+ self.n_iter_ = self._onedal_estimator.n_iter_
324
+
325
+ def _onedal_score(self, X, y, sample_weight=None, queue=None):
326
+ return r2_score(
327
+ y, self._onedal_predict(X, queue=queue), sample_weight=sample_weight
328
+ )