scikit-learn-intelex 2025.0.0__py311-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (278) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-311-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-311-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +242 -0
  10. daal4py/sklearn/_utils.py +241 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +192 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +318 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +196 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +155 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +87 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +118 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +693 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +53 -0
  61. onedal/_device_offload.py +229 -0
  62. onedal/_onedal_py_dpc.cpython-311-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-311-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-311-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +160 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
  70. onedal/cluster/__init__.py +27 -0
  71. onedal/cluster/dbscan.py +110 -0
  72. onedal/cluster/kmeans.py +560 -0
  73. onedal/cluster/kmeans_init.py +115 -0
  74. onedal/cluster/tests/test_dbscan.py +125 -0
  75. onedal/cluster/tests/test_kmeans.py +88 -0
  76. onedal/cluster/tests/test_kmeans_init.py +93 -0
  77. onedal/common/_base.py +38 -0
  78. onedal/common/_estimator_checks.py +47 -0
  79. onedal/common/_mixin.py +62 -0
  80. onedal/common/_policy.py +59 -0
  81. onedal/common/_spmd_policy.py +30 -0
  82. onedal/common/hyperparameters.py +116 -0
  83. onedal/common/tests/test_policy.py +75 -0
  84. onedal/covariance/__init__.py +20 -0
  85. onedal/covariance/covariance.py +125 -0
  86. onedal/covariance/incremental_covariance.py +146 -0
  87. onedal/covariance/tests/test_covariance.py +50 -0
  88. onedal/covariance/tests/test_incremental_covariance.py +122 -0
  89. onedal/datatypes/__init__.py +19 -0
  90. onedal/datatypes/_data_conversion.py +95 -0
  91. onedal/datatypes/tests/test_data.py +235 -0
  92. onedal/decomposition/__init__.py +20 -0
  93. onedal/decomposition/incremental_pca.py +204 -0
  94. onedal/decomposition/pca.py +186 -0
  95. onedal/decomposition/tests/test_incremental_pca.py +198 -0
  96. onedal/ensemble/__init__.py +29 -0
  97. onedal/ensemble/forest.py +720 -0
  98. onedal/ensemble/tests/test_random_forest.py +97 -0
  99. onedal/linear_model/__init__.py +27 -0
  100. onedal/linear_model/incremental_linear_model.py +258 -0
  101. onedal/linear_model/linear_model.py +329 -0
  102. onedal/linear_model/logistic_regression.py +249 -0
  103. onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
  104. onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
  105. onedal/linear_model/tests/test_linear_regression.py +149 -0
  106. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  107. onedal/linear_model/tests/test_ridge.py +95 -0
  108. onedal/neighbors/__init__.py +19 -0
  109. onedal/neighbors/neighbors.py +778 -0
  110. onedal/neighbors/tests/test_knn_classification.py +49 -0
  111. onedal/primitives/__init__.py +27 -0
  112. onedal/primitives/get_tree.py +25 -0
  113. onedal/primitives/kernel_functions.py +153 -0
  114. onedal/primitives/tests/test_kernel_functions.py +159 -0
  115. onedal/spmd/__init__.py +25 -0
  116. onedal/spmd/_base.py +30 -0
  117. onedal/spmd/basic_statistics/__init__.py +20 -0
  118. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  119. onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
  120. onedal/spmd/cluster/__init__.py +28 -0
  121. onedal/spmd/cluster/dbscan.py +23 -0
  122. onedal/spmd/cluster/kmeans.py +56 -0
  123. onedal/spmd/covariance/__init__.py +20 -0
  124. onedal/spmd/covariance/covariance.py +26 -0
  125. onedal/spmd/covariance/incremental_covariance.py +82 -0
  126. onedal/spmd/decomposition/__init__.py +20 -0
  127. onedal/spmd/decomposition/incremental_pca.py +117 -0
  128. onedal/spmd/decomposition/pca.py +26 -0
  129. onedal/spmd/ensemble/__init__.py +19 -0
  130. onedal/spmd/ensemble/forest.py +28 -0
  131. onedal/spmd/linear_model/__init__.py +21 -0
  132. onedal/spmd/linear_model/incremental_linear_model.py +97 -0
  133. onedal/spmd/linear_model/linear_model.py +30 -0
  134. onedal/spmd/linear_model/logistic_regression.py +38 -0
  135. onedal/spmd/neighbors/__init__.py +19 -0
  136. onedal/spmd/neighbors/neighbors.py +75 -0
  137. onedal/svm/__init__.py +19 -0
  138. onedal/svm/svm.py +556 -0
  139. onedal/svm/tests/test_csr_svm.py +351 -0
  140. onedal/svm/tests/test_nusvc.py +204 -0
  141. onedal/svm/tests/test_nusvr.py +210 -0
  142. onedal/svm/tests/test_svc.py +168 -0
  143. onedal/svm/tests/test_svr.py +243 -0
  144. onedal/tests/test_common.py +41 -0
  145. onedal/tests/utils/_dataframes_support.py +168 -0
  146. onedal/tests/utils/_device_selection.py +107 -0
  147. onedal/utils/__init__.py +49 -0
  148. onedal/utils/_array_api.py +91 -0
  149. onedal/utils/validation.py +432 -0
  150. scikit_learn_intelex-2025.0.0.dist-info/LICENSE.txt +202 -0
  151. scikit_learn_intelex-2025.0.0.dist-info/METADATA +231 -0
  152. scikit_learn_intelex-2025.0.0.dist-info/RECORD +278 -0
  153. scikit_learn_intelex-2025.0.0.dist-info/WHEEL +5 -0
  154. scikit_learn_intelex-2025.0.0.dist-info/top_level.txt +3 -0
  155. sklearnex/__init__.py +65 -0
  156. sklearnex/__main__.py +58 -0
  157. sklearnex/_config.py +98 -0
  158. sklearnex/_device_offload.py +121 -0
  159. sklearnex/_utils.py +109 -0
  160. sklearnex/basic_statistics/__init__.py +20 -0
  161. sklearnex/basic_statistics/basic_statistics.py +140 -0
  162. sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
  163. sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
  164. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +384 -0
  165. sklearnex/cluster/__init__.py +20 -0
  166. sklearnex/cluster/dbscan.py +192 -0
  167. sklearnex/cluster/k_means.py +383 -0
  168. sklearnex/cluster/tests/test_dbscan.py +38 -0
  169. sklearnex/cluster/tests/test_kmeans.py +153 -0
  170. sklearnex/conftest.py +73 -0
  171. sklearnex/covariance/__init__.py +19 -0
  172. sklearnex/covariance/incremental_covariance.py +368 -0
  173. sklearnex/covariance/tests/test_incremental_covariance.py +226 -0
  174. sklearnex/decomposition/__init__.py +19 -0
  175. sklearnex/decomposition/pca.py +414 -0
  176. sklearnex/decomposition/tests/test_pca.py +58 -0
  177. sklearnex/dispatcher.py +543 -0
  178. sklearnex/doc/third-party-programs.txt +424 -0
  179. sklearnex/ensemble/__init__.py +29 -0
  180. sklearnex/ensemble/_forest.py +2016 -0
  181. sklearnex/ensemble/tests/test_forest.py +120 -0
  182. sklearnex/glob/__main__.py +72 -0
  183. sklearnex/glob/dispatcher.py +101 -0
  184. sklearnex/linear_model/__init__.py +32 -0
  185. sklearnex/linear_model/coordinate_descent.py +30 -0
  186. sklearnex/linear_model/incremental_linear.py +463 -0
  187. sklearnex/linear_model/incremental_ridge.py +418 -0
  188. sklearnex/linear_model/linear.py +302 -0
  189. sklearnex/linear_model/logistic_path.py +17 -0
  190. sklearnex/linear_model/logistic_regression.py +403 -0
  191. sklearnex/linear_model/ridge.py +24 -0
  192. sklearnex/linear_model/tests/test_incremental_linear.py +203 -0
  193. sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
  194. sklearnex/linear_model/tests/test_linear.py +142 -0
  195. sklearnex/linear_model/tests/test_logreg.py +134 -0
  196. sklearnex/manifold/__init__.py +19 -0
  197. sklearnex/manifold/t_sne.py +21 -0
  198. sklearnex/manifold/tests/test_tsne.py +26 -0
  199. sklearnex/metrics/__init__.py +23 -0
  200. sklearnex/metrics/pairwise.py +22 -0
  201. sklearnex/metrics/ranking.py +20 -0
  202. sklearnex/metrics/tests/test_metrics.py +39 -0
  203. sklearnex/model_selection/__init__.py +21 -0
  204. sklearnex/model_selection/split.py +22 -0
  205. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  206. sklearnex/neighbors/__init__.py +27 -0
  207. sklearnex/neighbors/_lof.py +231 -0
  208. sklearnex/neighbors/common.py +310 -0
  209. sklearnex/neighbors/knn_classification.py +226 -0
  210. sklearnex/neighbors/knn_regression.py +203 -0
  211. sklearnex/neighbors/knn_unsupervised.py +170 -0
  212. sklearnex/neighbors/tests/test_neighbors.py +80 -0
  213. sklearnex/preview/__init__.py +17 -0
  214. sklearnex/preview/covariance/__init__.py +19 -0
  215. sklearnex/preview/covariance/covariance.py +133 -0
  216. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  217. sklearnex/preview/decomposition/__init__.py +19 -0
  218. sklearnex/preview/decomposition/incremental_pca.py +228 -0
  219. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  220. sklearnex/preview/linear_model/__init__.py +19 -0
  221. sklearnex/preview/linear_model/ridge.py +419 -0
  222. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  223. sklearnex/spmd/__init__.py +25 -0
  224. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  225. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  226. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  227. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  228. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
  229. sklearnex/spmd/cluster/__init__.py +30 -0
  230. sklearnex/spmd/cluster/dbscan.py +50 -0
  231. sklearnex/spmd/cluster/kmeans.py +21 -0
  232. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  233. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  234. sklearnex/spmd/covariance/__init__.py +20 -0
  235. sklearnex/spmd/covariance/covariance.py +21 -0
  236. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  237. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  238. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  239. sklearnex/spmd/decomposition/__init__.py +20 -0
  240. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  241. sklearnex/spmd/decomposition/pca.py +21 -0
  242. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  243. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  244. sklearnex/spmd/ensemble/__init__.py +19 -0
  245. sklearnex/spmd/ensemble/forest.py +71 -0
  246. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  247. sklearnex/spmd/linear_model/__init__.py +21 -0
  248. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  249. sklearnex/spmd/linear_model/linear_model.py +21 -0
  250. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  251. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
  252. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  253. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +166 -0
  254. sklearnex/spmd/neighbors/__init__.py +19 -0
  255. sklearnex/spmd/neighbors/neighbors.py +25 -0
  256. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  257. sklearnex/svm/__init__.py +29 -0
  258. sklearnex/svm/_common.py +328 -0
  259. sklearnex/svm/nusvc.py +332 -0
  260. sklearnex/svm/nusvr.py +148 -0
  261. sklearnex/svm/svc.py +360 -0
  262. sklearnex/svm/svr.py +149 -0
  263. sklearnex/svm/tests/test_svm.py +93 -0
  264. sklearnex/tests/_utils.py +328 -0
  265. sklearnex/tests/_utils_spmd.py +198 -0
  266. sklearnex/tests/test_common.py +54 -0
  267. sklearnex/tests/test_config.py +43 -0
  268. sklearnex/tests/test_memory_usage.py +291 -0
  269. sklearnex/tests/test_monkeypatch.py +276 -0
  270. sklearnex/tests/test_n_jobs_support.py +103 -0
  271. sklearnex/tests/test_parallel.py +48 -0
  272. sklearnex/tests/test_patching.py +385 -0
  273. sklearnex/tests/test_run_to_run_stability.py +296 -0
  274. sklearnex/utils/__init__.py +19 -0
  275. sklearnex/utils/_array_api.py +82 -0
  276. sklearnex/utils/parallel.py +59 -0
  277. sklearnex/utils/tests/test_finite.py +89 -0
  278. sklearnex/utils/validation.py +17 -0
@@ -0,0 +1,383 @@
1
+ # ==============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import logging
18
+
19
+ from daal4py.sklearn._utils import daal_check_version
20
+
21
+ if daal_check_version((2023, "P", 200)):
22
+
23
+ import numbers
24
+ import warnings
25
+
26
+ import numpy as np
27
+ from scipy.sparse import issparse
28
+ from sklearn.cluster import KMeans as sklearn_KMeans
29
+ from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
30
+ from sklearn.utils.validation import (
31
+ _check_sample_weight,
32
+ _num_samples,
33
+ check_is_fitted,
34
+ )
35
+
36
+ from daal4py.sklearn._n_jobs_support import control_n_jobs
37
+ from daal4py.sklearn._utils import sklearn_check_version
38
+ from onedal.cluster import KMeans as onedal_KMeans
39
+ from onedal.utils import _is_csr
40
+
41
+ from .._device_offload import dispatch, wrap_output_data
42
+ from .._utils import PatchingConditionsChain
43
+
44
+ @control_n_jobs(decorated_methods=["fit", "predict", "transform", "fit_transform"])
45
+ class KMeans(sklearn_KMeans):
46
+ __doc__ = sklearn_KMeans.__doc__
47
+
48
+ if sklearn_check_version("1.2"):
49
+ _parameter_constraints: dict = {**sklearn_KMeans._parameter_constraints}
50
+
51
+ def __init__(
52
+ self,
53
+ n_clusters=8,
54
+ *,
55
+ init="k-means++",
56
+ n_init=(
57
+ "auto"
58
+ if sklearn_check_version("1.4")
59
+ else "warn" if sklearn_check_version("1.2") else 10
60
+ ),
61
+ max_iter=300,
62
+ tol=1e-4,
63
+ verbose=0,
64
+ random_state=None,
65
+ copy_x=True,
66
+ algorithm="lloyd" if sklearn_check_version("1.1") else "auto",
67
+ ):
68
+ super().__init__(
69
+ n_clusters=n_clusters,
70
+ init=init,
71
+ max_iter=max_iter,
72
+ tol=tol,
73
+ n_init=n_init,
74
+ verbose=verbose,
75
+ random_state=random_state,
76
+ copy_x=copy_x,
77
+ algorithm=algorithm,
78
+ )
79
+
80
+ def _initialize_onedal_estimator(self):
81
+ onedal_params = {
82
+ "n_clusters": self.n_clusters,
83
+ "init": self.init,
84
+ "max_iter": self.max_iter,
85
+ "tol": self.tol,
86
+ "n_init": self.n_init,
87
+ "verbose": self.verbose,
88
+ "random_state": self.random_state,
89
+ }
90
+
91
+ self._onedal_estimator = onedal_KMeans(**onedal_params)
92
+
93
+ def _onedal_fit_supported(self, method_name, X, y=None, sample_weight=None):
94
+ assert method_name == "fit"
95
+
96
+ class_name = self.__class__.__name__
97
+ patching_status = PatchingConditionsChain(f"sklearn.cluster.{class_name}.fit")
98
+
99
+ sample_count = _num_samples(X)
100
+ self._algorithm = self.algorithm
101
+ supported_algs = ["auto", "full", "lloyd", "elkan"]
102
+ if self.algorithm == "elkan":
103
+ logging.getLogger("sklearnex").info(
104
+ "oneDAL does not support 'elkan', using 'lloyd' algorithm instead."
105
+ )
106
+ correct_count = self.n_clusters < sample_count
107
+
108
+ is_data_supported = (
109
+ _is_csr(X) and daal_check_version((2024, "P", 700))
110
+ ) or not issparse(X)
111
+
112
+ _acceptable_sample_weights = self._validate_sample_weight(sample_weight, X)
113
+
114
+ patching_status.and_conditions(
115
+ [
116
+ (
117
+ self.algorithm in supported_algs,
118
+ "Only 'lloyd' algorithm is supported, 'elkan' is computed using lloyd",
119
+ ),
120
+ (correct_count, "n_clusters is smaller than number of samples"),
121
+ (
122
+ _acceptable_sample_weights,
123
+ "oneDAL doesn't support sample_weight. Accepted options are None, constant, or equal weights.",
124
+ ),
125
+ (
126
+ is_data_supported,
127
+ "Supported data formats: Dense, CSR (oneDAL version >= 2024.7.0).",
128
+ ),
129
+ ]
130
+ )
131
+
132
+ return patching_status
133
+
134
+ def fit(self, X, y=None, sample_weight=None):
135
+ if sklearn_check_version("1.2"):
136
+ self._validate_params()
137
+
138
+ dispatch(
139
+ self,
140
+ "fit",
141
+ {
142
+ "onedal": self.__class__._onedal_fit,
143
+ "sklearn": sklearn_KMeans.fit,
144
+ },
145
+ X,
146
+ y,
147
+ sample_weight,
148
+ )
149
+
150
+ return self
151
+
152
+ def _onedal_fit(self, X, _, sample_weight, queue=None):
153
+ X = self._validate_data(
154
+ X,
155
+ accept_sparse="csr",
156
+ dtype=[np.float64, np.float32],
157
+ order="C",
158
+ copy=self.copy_x,
159
+ accept_large_sparse=False,
160
+ )
161
+
162
+ if sklearn_check_version("1.2"):
163
+ self._check_params_vs_input(X)
164
+ else:
165
+ self._check_params(X)
166
+
167
+ self._n_features_out = self.n_clusters
168
+
169
+ self._initialize_onedal_estimator()
170
+ self._n_threads = _openmp_effective_n_threads()
171
+ self._onedal_estimator.fit(X, queue=queue)
172
+
173
+ self._save_attributes()
174
+
175
+ def _validate_sample_weight(self, sample_weight, X):
176
+ if sample_weight is None:
177
+ return True
178
+ elif isinstance(sample_weight, numbers.Number):
179
+ return True
180
+ else:
181
+ sample_weight = _check_sample_weight(
182
+ sample_weight,
183
+ X,
184
+ dtype=X.dtype if hasattr(X, "dtype") else None,
185
+ )
186
+ if np.all(sample_weight == sample_weight[0]):
187
+ return True
188
+ else:
189
+ return False
190
+
191
+ def _onedal_predict_supported(self, method_name, X, sample_weight=None):
192
+ class_name = self.__class__.__name__
193
+ is_data_supported = (
194
+ _is_csr(X) and daal_check_version((2024, "P", 700))
195
+ ) or not issparse(X)
196
+ patching_status = PatchingConditionsChain(
197
+ f"sklearn.cluster.{class_name}.predict"
198
+ )
199
+
200
+ # algorithm "auto" has been deprecated since 1.1,
201
+ # algorithm "full" has been replaced by "lloyd"
202
+ supported_algs = ["auto", "full", "lloyd", "elkan"]
203
+ if self.algorithm == "elkan":
204
+ logging.getLogger("sklearnex").info(
205
+ "oneDAL does not support 'elkan', using 'lloyd' algorithm instead."
206
+ )
207
+
208
+ _acceptable_sample_weights = True
209
+ if not sklearn_check_version("1.5"):
210
+ _acceptable_sample_weights = self._validate_sample_weight(
211
+ sample_weight, X
212
+ )
213
+
214
+ patching_status.and_conditions(
215
+ [
216
+ (
217
+ self.algorithm in supported_algs,
218
+ "Only 'lloyd' algorithm is supported, 'elkan' is computed using lloyd.",
219
+ ),
220
+ (
221
+ is_data_supported,
222
+ "Supported data formats: Dense, CSR (oneDAL version >= 2024.7.0).",
223
+ ),
224
+ (
225
+ _acceptable_sample_weights,
226
+ "oneDAL doesn't support sample_weight. Acceptable options are None, constant, or equal weights.",
227
+ ),
228
+ ]
229
+ )
230
+
231
+ return patching_status
232
+
233
+ if sklearn_check_version("1.5"):
234
+
235
+ @wrap_output_data
236
+ def predict(self, X):
237
+ self._validate_params()
238
+
239
+ return dispatch(
240
+ self,
241
+ "predict",
242
+ {
243
+ "onedal": self.__class__._onedal_predict,
244
+ "sklearn": sklearn_KMeans.predict,
245
+ },
246
+ X,
247
+ )
248
+
249
+ else:
250
+
251
+ @wrap_output_data
252
+ def predict(
253
+ self,
254
+ X,
255
+ sample_weight="deprecated" if sklearn_check_version("1.3") else None,
256
+ ):
257
+ if sklearn_check_version("1.2"):
258
+ self._validate_params()
259
+
260
+ return dispatch(
261
+ self,
262
+ "predict",
263
+ {
264
+ "onedal": self.__class__._onedal_predict,
265
+ "sklearn": sklearn_KMeans.predict,
266
+ },
267
+ X,
268
+ sample_weight=sample_weight,
269
+ )
270
+
271
+ def _onedal_predict(self, X, sample_weight=None, queue=None):
272
+ check_is_fitted(self)
273
+
274
+ X = self._validate_data(
275
+ X,
276
+ accept_sparse="csr",
277
+ reset=False,
278
+ dtype=[np.float64, np.float32],
279
+ )
280
+
281
+ if not sklearn_check_version("1.5") and sklearn_check_version("1.3"):
282
+ if isinstance(sample_weight, str) and sample_weight == "deprecated":
283
+ sample_weight = None
284
+
285
+ if sample_weight is not None:
286
+ warnings.warn(
287
+ "'sample_weight' was deprecated in version 1.3 and "
288
+ "will be removed in 1.5.",
289
+ FutureWarning,
290
+ )
291
+
292
+ if not hasattr(self, "_onedal_estimator"):
293
+ self._initialize_onedal_estimator()
294
+ self._onedal_estimator.cluster_centers_ = self.cluster_centers_
295
+
296
+ return self._onedal_estimator.predict(X, queue=queue)
297
+
298
+ def _onedal_supported(self, method_name, *data):
299
+ if method_name == "fit":
300
+ return self._onedal_fit_supported(method_name, *data)
301
+ if method_name in ["predict", "score"]:
302
+ return self._onedal_predict_supported(method_name, *data)
303
+ raise RuntimeError(
304
+ f"Unknown method {method_name} in {self.__class__.__name__}"
305
+ )
306
+
307
+ _onedal_gpu_supported = _onedal_supported
308
+ _onedal_cpu_supported = _onedal_supported
309
+
310
+ @wrap_output_data
311
+ def fit_transform(self, X, y=None, sample_weight=None):
312
+ return self.fit(X, sample_weight=sample_weight)._transform(X)
313
+
314
+ @wrap_output_data
315
+ def transform(self, X):
316
+ check_is_fitted(self)
317
+
318
+ X = self._check_test_data(X)
319
+ return self._transform(X)
320
+
321
+ @wrap_output_data
322
+ def score(self, X, y=None, sample_weight=None):
323
+ return dispatch(
324
+ self,
325
+ "score",
326
+ {
327
+ "onedal": self.__class__._onedal_score,
328
+ "sklearn": sklearn_KMeans.score,
329
+ },
330
+ X,
331
+ y,
332
+ sample_weight=sample_weight,
333
+ )
334
+
335
+ def _onedal_score(self, X, y, sample_weight=None, queue=None):
336
+ check_is_fitted(self)
337
+
338
+ X = self._validate_data(
339
+ X,
340
+ accept_sparse="csr",
341
+ reset=False,
342
+ dtype=[np.float64, np.float32],
343
+ )
344
+
345
+ if not sklearn_check_version("1.5") and sklearn_check_version("1.3"):
346
+ if isinstance(sample_weight, str) and sample_weight == "deprecated":
347
+ sample_weight = None
348
+
349
+ if sample_weight is not None:
350
+ warnings.warn(
351
+ "'sample_weight' was deprecated in version 1.3 and "
352
+ "will be removed in 1.5.",
353
+ FutureWarning,
354
+ )
355
+
356
+ if not hasattr(self, "_onedal_estimator"):
357
+ self._initialize_onedal_estimator()
358
+ self._onedal_estimator.cluster_centers_ = self.cluster_centers_
359
+
360
+ return self._onedal_estimator.score(X, queue=queue)
361
+
362
+ def _save_attributes(self):
363
+ assert hasattr(self, "_onedal_estimator")
364
+ self.cluster_centers_ = self._onedal_estimator.cluster_centers_
365
+ self.labels_ = self._onedal_estimator.labels_
366
+ self.inertia_ = self._onedal_estimator.inertia_
367
+ self.n_iter_ = self._onedal_estimator.n_iter_
368
+ self.n_features_in_ = self._onedal_estimator.n_features_in_
369
+
370
+ self._n_init = self._onedal_estimator._n_init
371
+
372
+ fit.__doc__ = sklearn_KMeans.fit.__doc__
373
+ predict.__doc__ = sklearn_KMeans.predict.__doc__
374
+ transform.__doc__ = sklearn_KMeans.transform.__doc__
375
+ fit_transform.__doc__ = sklearn_KMeans.fit_transform.__doc__
376
+ score.__doc__ = sklearn_KMeans.score.__doc__
377
+
378
+ else:
379
+ from daal4py.sklearn.cluster import KMeans
380
+
381
+ logging.warning(
382
+ "Sklearnex KMeans requires oneDAL version >= 2023.2, falling back to daal4py."
383
+ )
@@ -0,0 +1,38 @@
1
+ # ===============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.testing import assert_allclose
20
+
21
+ from onedal.tests.utils._dataframes_support import (
22
+ _convert_to_dataframe,
23
+ get_dataframes_and_queues,
24
+ )
25
+
26
+
27
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
28
+ def test_sklearnex_import_dbscan(dataframe, queue):
29
+ from sklearnex.cluster import DBSCAN
30
+
31
+ X = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]])
32
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
33
+ dbscan = DBSCAN(eps=3, min_samples=2).fit(X)
34
+ assert "sklearnex" in dbscan.__module__
35
+
36
+ result = dbscan.labels_
37
+ expected = np.array([0, 0, 0, 1, 1, -1], dtype=np.int32)
38
+ assert_allclose(expected, result)
@@ -0,0 +1,153 @@
1
+ # ===============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.testing import assert_allclose
20
+ from scipy.sparse import csr_matrix
21
+ from sklearn.datasets import make_blobs
22
+
23
+ from daal4py.sklearn._utils import daal_check_version, sklearn_check_version
24
+ from onedal.tests.utils._dataframes_support import (
25
+ _as_numpy,
26
+ _convert_to_dataframe,
27
+ get_dataframes_and_queues,
28
+ get_queues,
29
+ )
30
+ from sklearnex import config_context
31
+
32
+
33
+ def generate_dense_dataset(n_samples, n_features, density, n_clusters):
34
+ np.random.seed(2024 + n_samples + n_features + n_clusters)
35
+ X, _ = make_blobs(
36
+ n_samples=n_samples,
37
+ n_features=n_features,
38
+ centers=n_clusters,
39
+ cluster_std=1.0,
40
+ random_state=42,
41
+ )
42
+ mask = np.random.binomial(1, density, (n_samples, n_features))
43
+ X = X * mask
44
+ return X
45
+
46
+
47
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
48
+ @pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
49
+ @pytest.mark.parametrize("init", ["k-means++", "random"])
50
+ def test_sklearnex_import_for_dense_data(dataframe, queue, algorithm, init):
51
+ if not sklearn_check_version("1.1") and algorithm == "lloyd":
52
+ pytest.skip("lloyd requires sklearn>=1.1.")
53
+ from sklearnex.cluster import KMeans
54
+
55
+ X_dense = generate_dense_dataset(1000, 10, 0.5, 3)
56
+ X_dense_df = _convert_to_dataframe(X_dense, sycl_queue=queue, target_df=dataframe)
57
+
58
+ kmeans_dense = KMeans(
59
+ n_clusters=3, random_state=0, algorithm=algorithm, init=init
60
+ ).fit(X_dense_df)
61
+
62
+ if daal_check_version((2023, "P", 200)):
63
+ assert "sklearnex" in kmeans_dense.__module__
64
+ else:
65
+ assert "daal4py" in kmeans_dense.__module__
66
+
67
+
68
+ @pytest.mark.skipif(
69
+ not daal_check_version((2024, "P", 700)),
70
+ reason="Sparse data requires oneDAL>=2024.7.0",
71
+ )
72
+ @pytest.mark.parametrize("queue", get_queues())
73
+ @pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
74
+ @pytest.mark.parametrize("init", ["k-means++", "random"])
75
+ def test_sklearnex_import_for_sparse_data(queue, algorithm, init):
76
+ from sklearnex.cluster import KMeans
77
+
78
+ X_dense = generate_dense_dataset(1000, 10, 0.5, 3)
79
+ X_sparse = csr_matrix(X_dense)
80
+
81
+ kmeans_sparse = KMeans(
82
+ n_clusters=3, random_state=0, algorithm=algorithm, init=init
83
+ ).fit(X_sparse)
84
+
85
+ assert "sklearnex" in kmeans_sparse.__module__
86
+
87
+
88
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
89
+ @pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
90
+ def test_results_on_dense_gold_data(dataframe, queue, algorithm):
91
+ if not sklearn_check_version("1.1") and algorithm == "lloyd":
92
+ pytest.skip("lloyd requires sklearn>=1.1.")
93
+
94
+ from sklearnex.cluster import KMeans
95
+
96
+ X_train = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
97
+ X_test = np.array([[0, 0], [12, 3]])
98
+ X_train_df = _convert_to_dataframe(X_train, sycl_queue=queue, target_df=dataframe)
99
+ X_test_df = _convert_to_dataframe(X_test, sycl_queue=queue, target_df=dataframe)
100
+
101
+ kmeans = KMeans(n_clusters=2, random_state=0, algorithm=algorithm).fit(X_train_df)
102
+
103
+ if queue and queue.sycl_device.is_gpu:
104
+ # KMeans Init Dense GPU implementation is different from CPU
105
+ expected_cluster_labels = np.array([0, 1], dtype=np.int32)
106
+ expected_cluster_centers = np.array([[1.0, 2.0], [10.0, 2.0]], dtype=np.float32)
107
+ expected_inertia = 16.0
108
+ else:
109
+ expected_cluster_labels = np.array([1, 0], dtype=np.int32)
110
+ expected_cluster_centers = np.array([[10.0, 2.0], [1.0, 2.0]], dtype=np.float32)
111
+ expected_inertia = 16.0
112
+
113
+ assert_allclose(expected_cluster_labels, _as_numpy(kmeans.predict(X_test_df)))
114
+ assert_allclose(expected_cluster_centers, _as_numpy(kmeans.cluster_centers_))
115
+ assert expected_inertia == kmeans.inertia_
116
+
117
+
118
+ @pytest.mark.skipif(
119
+ not daal_check_version((2024, "P", 700)),
120
+ reason="Sparse data requires oneDAL>=2024.7.0",
121
+ )
122
+ @pytest.mark.parametrize("queue", get_queues())
123
+ @pytest.mark.parametrize("init", ["k-means++", "random", "arraylike"])
124
+ @pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
125
+ @pytest.mark.parametrize(
126
+ "dims", [(1000, 10, 0.95, 3), (50000, 100, 0.75, 10), (10000, 10, 0.8, 5)]
127
+ )
128
+ def test_dense_vs_sparse(queue, init, algorithm, dims):
129
+ from sklearnex.cluster import KMeans
130
+
131
+ if init == "random":
132
+ pytest.skip("Random initialization in sparse K-means is buggy.")
133
+
134
+ # For higher level of sparsity (smaller density) the test may fail
135
+ n_samples, n_features, density, n_clusters = dims
136
+ X_dense = generate_dense_dataset(n_samples, n_features, density, n_clusters)
137
+ X_sparse = csr_matrix(X_dense)
138
+
139
+ if init == "arraylike":
140
+ np.random.seed(2024 + n_samples + n_features + n_clusters)
141
+ init = X_dense[np.random.choice(n_samples, size=n_clusters, replace=False)]
142
+
143
+ kmeans_dense = KMeans(
144
+ n_clusters=n_clusters, random_state=0, init=init, algorithm=algorithm
145
+ ).fit(X_dense)
146
+ kmeans_sparse = KMeans(
147
+ n_clusters=n_clusters, random_state=0, init=init, algorithm=algorithm
148
+ ).fit(X_sparse)
149
+
150
+ assert_allclose(
151
+ kmeans_dense.cluster_centers_,
152
+ kmeans_sparse.cluster_centers_,
153
+ )
sklearnex/conftest.py ADDED
@@ -0,0 +1,73 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import io
18
+ import logging
19
+
20
+ import pytest
21
+
22
+ from daal4py.sklearn._utils import sklearn_check_version
23
+ from sklearnex import config_context, patch_sklearn, unpatch_sklearn
24
+
25
+
26
+ def pytest_configure(config):
27
+ config.addinivalue_line(
28
+ "markers", "allow_sklearn_fallback: mark test to not check for sklearnex usage"
29
+ )
30
+
31
+
32
+ @pytest.hookimpl(hookwrapper=True)
33
+ def pytest_runtest_call(item):
34
+ # setup logger to check for sklearn fallback
35
+ if not item.get_closest_marker("allow_sklearn_fallback"):
36
+ log_stream = io.StringIO()
37
+ log_handler = logging.StreamHandler(log_stream)
38
+ sklearnex_logger = logging.getLogger("sklearnex")
39
+ level = sklearnex_logger.level
40
+ sklearnex_stderr_handler = sklearnex_logger.handlers
41
+ sklearnex_logger.handlers = []
42
+ sklearnex_logger.addHandler(log_handler)
43
+ sklearnex_logger.setLevel(logging.INFO)
44
+ log_handler.setLevel(logging.INFO)
45
+
46
+ yield
47
+
48
+ sklearnex_logger.handlers = sklearnex_stderr_handler
49
+ sklearnex_logger.setLevel(level)
50
+ sklearnex_logger.removeHandler(log_handler)
51
+ text = log_stream.getvalue()
52
+ if "fallback to original Scikit-learn" in text:
53
+ raise TypeError(
54
+ f"test did not properly evaluate sklearnex functionality and fell back to sklearn:\n{text}"
55
+ )
56
+ else:
57
+ yield
58
+
59
+
60
+ @pytest.fixture
61
+ def with_sklearnex():
62
+ patch_sklearn()
63
+ yield
64
+ unpatch_sklearn()
65
+
66
+
67
+ @pytest.fixture
68
+ def with_array_api():
69
+ if sklearn_check_version("1.2"):
70
+ with config_context(array_api_dispatch=True):
71
+ yield
72
+ else:
73
+ yield
@@ -0,0 +1,19 @@
1
+ # ===============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ from .incremental_covariance import IncrementalEmpiricalCovariance
18
+
19
+ __all__ = ["IncrementalEmpiricalCovariance"]