scikit-learn-intelex 2025.4.0__py313-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (282) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-313-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-313-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +248 -0
  10. daal4py/sklearn/_utils.py +245 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +236 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +696 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +54 -0
  61. onedal/_device_offload.py +204 -0
  62. onedal/_onedal_py_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-313-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +175 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +242 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +279 -0
  70. onedal/basic_statistics/tests/utils.py +50 -0
  71. onedal/cluster/__init__.py +27 -0
  72. onedal/cluster/dbscan.py +105 -0
  73. onedal/cluster/kmeans.py +557 -0
  74. onedal/cluster/kmeans_init.py +112 -0
  75. onedal/cluster/tests/test_dbscan.py +125 -0
  76. onedal/cluster/tests/test_kmeans.py +88 -0
  77. onedal/cluster/tests/test_kmeans_init.py +93 -0
  78. onedal/common/_base.py +38 -0
  79. onedal/common/_estimator_checks.py +47 -0
  80. onedal/common/_mixin.py +62 -0
  81. onedal/common/_policy.py +55 -0
  82. onedal/common/_spmd_policy.py +30 -0
  83. onedal/common/hyperparameters.py +125 -0
  84. onedal/common/tests/test_policy.py +76 -0
  85. onedal/common/tests/test_sycl.py +128 -0
  86. onedal/covariance/__init__.py +20 -0
  87. onedal/covariance/covariance.py +122 -0
  88. onedal/covariance/incremental_covariance.py +161 -0
  89. onedal/covariance/tests/test_covariance.py +50 -0
  90. onedal/covariance/tests/test_incremental_covariance.py +190 -0
  91. onedal/datatypes/__init__.py +19 -0
  92. onedal/datatypes/_data_conversion.py +121 -0
  93. onedal/datatypes/tests/common.py +126 -0
  94. onedal/datatypes/tests/test_data.py +475 -0
  95. onedal/decomposition/__init__.py +20 -0
  96. onedal/decomposition/incremental_pca.py +214 -0
  97. onedal/decomposition/pca.py +186 -0
  98. onedal/decomposition/tests/test_incremental_pca.py +285 -0
  99. onedal/ensemble/__init__.py +29 -0
  100. onedal/ensemble/forest.py +736 -0
  101. onedal/ensemble/tests/test_random_forest.py +97 -0
  102. onedal/linear_model/__init__.py +27 -0
  103. onedal/linear_model/incremental_linear_model.py +292 -0
  104. onedal/linear_model/linear_model.py +325 -0
  105. onedal/linear_model/logistic_regression.py +247 -0
  106. onedal/linear_model/tests/test_incremental_linear_regression.py +213 -0
  107. onedal/linear_model/tests/test_incremental_ridge_regression.py +171 -0
  108. onedal/linear_model/tests/test_linear_regression.py +259 -0
  109. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  110. onedal/linear_model/tests/test_ridge.py +95 -0
  111. onedal/neighbors/__init__.py +19 -0
  112. onedal/neighbors/neighbors.py +763 -0
  113. onedal/neighbors/tests/test_knn_classification.py +49 -0
  114. onedal/primitives/__init__.py +27 -0
  115. onedal/primitives/get_tree.py +25 -0
  116. onedal/primitives/kernel_functions.py +152 -0
  117. onedal/primitives/tests/test_kernel_functions.py +159 -0
  118. onedal/spmd/__init__.py +25 -0
  119. onedal/spmd/_base.py +30 -0
  120. onedal/spmd/basic_statistics/__init__.py +20 -0
  121. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  122. onedal/spmd/basic_statistics/incremental_basic_statistics.py +71 -0
  123. onedal/spmd/cluster/__init__.py +28 -0
  124. onedal/spmd/cluster/dbscan.py +23 -0
  125. onedal/spmd/cluster/kmeans.py +56 -0
  126. onedal/spmd/covariance/__init__.py +20 -0
  127. onedal/spmd/covariance/covariance.py +26 -0
  128. onedal/spmd/covariance/incremental_covariance.py +83 -0
  129. onedal/spmd/decomposition/__init__.py +20 -0
  130. onedal/spmd/decomposition/incremental_pca.py +124 -0
  131. onedal/spmd/decomposition/pca.py +26 -0
  132. onedal/spmd/ensemble/__init__.py +19 -0
  133. onedal/spmd/ensemble/forest.py +28 -0
  134. onedal/spmd/linear_model/__init__.py +21 -0
  135. onedal/spmd/linear_model/incremental_linear_model.py +101 -0
  136. onedal/spmd/linear_model/linear_model.py +30 -0
  137. onedal/spmd/linear_model/logistic_regression.py +38 -0
  138. onedal/spmd/neighbors/__init__.py +19 -0
  139. onedal/spmd/neighbors/neighbors.py +75 -0
  140. onedal/svm/__init__.py +19 -0
  141. onedal/svm/svm.py +556 -0
  142. onedal/svm/tests/test_csr_svm.py +351 -0
  143. onedal/svm/tests/test_nusvc.py +204 -0
  144. onedal/svm/tests/test_nusvr.py +210 -0
  145. onedal/svm/tests/test_svc.py +176 -0
  146. onedal/svm/tests/test_svr.py +243 -0
  147. onedal/tests/test_common.py +57 -0
  148. onedal/tests/utils/_dataframes_support.py +162 -0
  149. onedal/tests/utils/_device_selection.py +102 -0
  150. onedal/utils/__init__.py +49 -0
  151. onedal/utils/_array_api.py +81 -0
  152. onedal/utils/_dpep_helpers.py +56 -0
  153. onedal/utils/tests/test_validation.py +142 -0
  154. onedal/utils/validation.py +464 -0
  155. scikit_learn_intelex-2025.4.0.dist-info/LICENSE.txt +202 -0
  156. scikit_learn_intelex-2025.4.0.dist-info/METADATA +190 -0
  157. scikit_learn_intelex-2025.4.0.dist-info/RECORD +282 -0
  158. scikit_learn_intelex-2025.4.0.dist-info/WHEEL +5 -0
  159. scikit_learn_intelex-2025.4.0.dist-info/top_level.txt +3 -0
  160. sklearnex/__init__.py +66 -0
  161. sklearnex/__main__.py +58 -0
  162. sklearnex/_config.py +116 -0
  163. sklearnex/_device_offload.py +126 -0
  164. sklearnex/_utils.py +177 -0
  165. sklearnex/basic_statistics/__init__.py +20 -0
  166. sklearnex/basic_statistics/basic_statistics.py +261 -0
  167. sklearnex/basic_statistics/incremental_basic_statistics.py +352 -0
  168. sklearnex/basic_statistics/tests/test_basic_statistics.py +405 -0
  169. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +455 -0
  170. sklearnex/cluster/__init__.py +20 -0
  171. sklearnex/cluster/dbscan.py +197 -0
  172. sklearnex/cluster/k_means.py +397 -0
  173. sklearnex/cluster/tests/test_dbscan.py +38 -0
  174. sklearnex/cluster/tests/test_kmeans.py +157 -0
  175. sklearnex/conftest.py +82 -0
  176. sklearnex/covariance/__init__.py +19 -0
  177. sklearnex/covariance/incremental_covariance.py +405 -0
  178. sklearnex/covariance/tests/test_incremental_covariance.py +287 -0
  179. sklearnex/decomposition/__init__.py +19 -0
  180. sklearnex/decomposition/pca.py +427 -0
  181. sklearnex/decomposition/tests/test_pca.py +58 -0
  182. sklearnex/dispatcher.py +534 -0
  183. sklearnex/doc/third-party-programs.txt +424 -0
  184. sklearnex/ensemble/__init__.py +29 -0
  185. sklearnex/ensemble/_forest.py +2029 -0
  186. sklearnex/ensemble/tests/test_forest.py +140 -0
  187. sklearnex/glob/__main__.py +72 -0
  188. sklearnex/glob/dispatcher.py +101 -0
  189. sklearnex/linear_model/__init__.py +32 -0
  190. sklearnex/linear_model/coordinate_descent.py +30 -0
  191. sklearnex/linear_model/incremental_linear.py +495 -0
  192. sklearnex/linear_model/incremental_ridge.py +432 -0
  193. sklearnex/linear_model/linear.py +346 -0
  194. sklearnex/linear_model/logistic_regression.py +415 -0
  195. sklearnex/linear_model/ridge.py +390 -0
  196. sklearnex/linear_model/tests/test_incremental_linear.py +267 -0
  197. sklearnex/linear_model/tests/test_incremental_ridge.py +214 -0
  198. sklearnex/linear_model/tests/test_linear.py +142 -0
  199. sklearnex/linear_model/tests/test_logreg.py +134 -0
  200. sklearnex/linear_model/tests/test_ridge.py +256 -0
  201. sklearnex/manifold/__init__.py +19 -0
  202. sklearnex/manifold/t_sne.py +26 -0
  203. sklearnex/manifold/tests/test_tsne.py +250 -0
  204. sklearnex/metrics/__init__.py +23 -0
  205. sklearnex/metrics/pairwise.py +22 -0
  206. sklearnex/metrics/ranking.py +20 -0
  207. sklearnex/metrics/tests/test_metrics.py +39 -0
  208. sklearnex/model_selection/__init__.py +21 -0
  209. sklearnex/model_selection/split.py +22 -0
  210. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  211. sklearnex/neighbors/__init__.py +27 -0
  212. sklearnex/neighbors/_lof.py +236 -0
  213. sklearnex/neighbors/common.py +310 -0
  214. sklearnex/neighbors/knn_classification.py +231 -0
  215. sklearnex/neighbors/knn_regression.py +207 -0
  216. sklearnex/neighbors/knn_unsupervised.py +178 -0
  217. sklearnex/neighbors/tests/test_neighbors.py +82 -0
  218. sklearnex/preview/__init__.py +17 -0
  219. sklearnex/preview/covariance/__init__.py +19 -0
  220. sklearnex/preview/covariance/covariance.py +142 -0
  221. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  222. sklearnex/preview/decomposition/__init__.py +19 -0
  223. sklearnex/preview/decomposition/incremental_pca.py +244 -0
  224. sklearnex/preview/decomposition/tests/test_incremental_pca.py +336 -0
  225. sklearnex/spmd/__init__.py +25 -0
  226. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  227. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  228. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  229. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  230. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +306 -0
  231. sklearnex/spmd/cluster/__init__.py +30 -0
  232. sklearnex/spmd/cluster/dbscan.py +50 -0
  233. sklearnex/spmd/cluster/kmeans.py +21 -0
  234. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  235. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +173 -0
  236. sklearnex/spmd/covariance/__init__.py +20 -0
  237. sklearnex/spmd/covariance/covariance.py +21 -0
  238. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  239. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  240. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  241. sklearnex/spmd/decomposition/__init__.py +20 -0
  242. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  243. sklearnex/spmd/decomposition/pca.py +21 -0
  244. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  245. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  246. sklearnex/spmd/ensemble/__init__.py +19 -0
  247. sklearnex/spmd/ensemble/forest.py +71 -0
  248. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  249. sklearnex/spmd/linear_model/__init__.py +21 -0
  250. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  251. sklearnex/spmd/linear_model/linear_model.py +21 -0
  252. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  253. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +331 -0
  254. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  255. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
  256. sklearnex/spmd/neighbors/__init__.py +19 -0
  257. sklearnex/spmd/neighbors/neighbors.py +25 -0
  258. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  259. sklearnex/svm/__init__.py +29 -0
  260. sklearnex/svm/_common.py +339 -0
  261. sklearnex/svm/nusvc.py +371 -0
  262. sklearnex/svm/nusvr.py +170 -0
  263. sklearnex/svm/svc.py +399 -0
  264. sklearnex/svm/svr.py +167 -0
  265. sklearnex/svm/tests/test_svm.py +93 -0
  266. sklearnex/tests/test_common.py +491 -0
  267. sklearnex/tests/test_config.py +123 -0
  268. sklearnex/tests/test_hyperparameters.py +43 -0
  269. sklearnex/tests/test_memory_usage.py +347 -0
  270. sklearnex/tests/test_monkeypatch.py +269 -0
  271. sklearnex/tests/test_n_jobs_support.py +108 -0
  272. sklearnex/tests/test_parallel.py +48 -0
  273. sklearnex/tests/test_patching.py +377 -0
  274. sklearnex/tests/test_run_to_run_stability.py +326 -0
  275. sklearnex/tests/utils/__init__.py +48 -0
  276. sklearnex/tests/utils/base.py +436 -0
  277. sklearnex/tests/utils/spmd.py +198 -0
  278. sklearnex/utils/__init__.py +19 -0
  279. sklearnex/utils/_array_api.py +82 -0
  280. sklearnex/utils/parallel.py +59 -0
  281. sklearnex/utils/tests/test_validation.py +238 -0
  282. sklearnex/utils/validation.py +208 -0
@@ -0,0 +1,436 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import platform
18
+ import subprocess
19
+ from functools import partial
20
+ from inspect import Parameter, getattr_static, isclass, signature
21
+
22
+ import numpy as np
23
+ from scipy import sparse as sp
24
+ from sklearn import clone
25
+ from sklearn.base import (
26
+ BaseEstimator,
27
+ ClassifierMixin,
28
+ ClusterMixin,
29
+ OutlierMixin,
30
+ RegressorMixin,
31
+ TransformerMixin,
32
+ )
33
+ from sklearn.datasets import load_diabetes, load_iris
34
+ from sklearn.neighbors._base import KNeighborsMixin
35
+ from sklearn.utils.validation import check_is_fitted
36
+
37
+ from onedal.datatypes import from_table, to_table
38
+ from onedal.tests.utils._dataframes_support import _convert_to_dataframe
39
+ from onedal.utils._array_api import _get_sycl_namespace
40
+ from sklearnex import get_patch_map, patch_sklearn, sklearn_is_patched, unpatch_sklearn
41
+ from sklearnex.basic_statistics import BasicStatistics, IncrementalBasicStatistics
42
+ from sklearnex.linear_model import LogisticRegression
43
+ from sklearnex.neighbors import (
44
+ KNeighborsClassifier,
45
+ KNeighborsRegressor,
46
+ LocalOutlierFactor,
47
+ NearestNeighbors,
48
+ )
49
+ from sklearnex.svm import SVC, NuSVC
50
+
51
+
52
+ def _load_all_models(with_sklearnex=True, estimator=True):
53
+ """Convert sklearnex patch_map into a dictionary of estimators or functions
54
+
55
+ Parameters
56
+ ----------
57
+ with_sklearnex: bool (default=True)
58
+ Discover estimators and methods with sklearnex patching enabled (True)
59
+ or disabled (False) from the sklearnex patch_map
60
+
61
+ estimator: bool (default=True)
62
+ yield estimators (True) or functions (False)
63
+
64
+ Returns
65
+ -------
66
+ dict: {name:estimator}
67
+ estimator is a class or function from sklearn or sklearnex
68
+ """
69
+ # insure that patch state is correct as dictated by patch_sklearn boolean
70
+ # and return it to the previous state no matter what occurs.
71
+ already_patched_map = sklearn_is_patched(return_map=True)
72
+ already_patched = any(already_patched_map.values())
73
+ try:
74
+ if with_sklearnex:
75
+ patch_sklearn()
76
+ elif already_patched:
77
+ unpatch_sklearn()
78
+
79
+ models = {}
80
+ for patch_infos in get_patch_map().values():
81
+ candidate = getattr(patch_infos[0][0][0], patch_infos[0][0][1], None)
82
+ if candidate is not None and isclass(candidate) == estimator:
83
+ if not estimator or issubclass(candidate, BaseEstimator):
84
+ models[patch_infos[0][0][1]] = candidate
85
+ finally:
86
+ if with_sklearnex:
87
+ unpatch_sklearn()
88
+ # both branches are now in an unpatched state, repatch as necessary
89
+ if already_patched:
90
+ patch_sklearn(name=[i for i in already_patched_map if already_patched_map[i]])
91
+
92
+ return models
93
+
94
+
95
+ PATCHED_MODELS = _load_all_models(with_sklearnex=True)
96
+ UNPATCHED_MODELS = _load_all_models(with_sklearnex=False)
97
+
98
+ PATCHED_FUNCTIONS = _load_all_models(with_sklearnex=True, estimator=False)
99
+ UNPATCHED_FUNCTIONS = _load_all_models(with_sklearnex=False, estimator=False)
100
+
101
+ mixin_map = [
102
+ [
103
+ ClassifierMixin,
104
+ ["decision_function", "predict", "predict_proba", "predict_log_proba", "score"],
105
+ "classification",
106
+ ],
107
+ [RegressorMixin, ["predict", "score"], "regression"],
108
+ [ClusterMixin, ["fit_predict"], "classification"],
109
+ [TransformerMixin, ["fit_transform", "transform", "score"], "classification"],
110
+ [OutlierMixin, ["fit_predict", "predict"], "classification"],
111
+ [KNeighborsMixin, ["kneighbors"], None],
112
+ ]
113
+
114
+
115
+ class sklearn_clone_dict(dict):
116
+ """Special dict type for returning state-free sklearn/sklearnex estimators
117
+ with the same parameters"""
118
+
119
+ def __getitem__(self, key):
120
+ return clone(super().__getitem__(key))
121
+
122
+
123
+ # Special dictionary of sklearnex estimators which must be specifically tested, this
124
+ # could be because of supported non-default parameters, blocked support via sklearn's
125
+ # 'available_if' decorator, or not being a native sklearn estimator (i.e. those not in
126
+ # the default PATCHED_MODELS dictionary)
127
+ SPECIAL_INSTANCES = sklearn_clone_dict(
128
+ {
129
+ str(i): i
130
+ for i in [
131
+ LocalOutlierFactor(novelty=True),
132
+ SVC(probability=True),
133
+ NuSVC(probability=True),
134
+ KNeighborsClassifier(algorithm="brute"),
135
+ KNeighborsRegressor(algorithm="brute"),
136
+ NearestNeighbors(algorithm="brute"),
137
+ LogisticRegression(solver="newton-cg"),
138
+ BasicStatistics(),
139
+ IncrementalBasicStatistics(),
140
+ ]
141
+ }
142
+ )
143
+
144
+
145
+ def gen_models_info(algorithms, required_inputs=["X", "y"], fit=False, daal4py=True):
146
+ """Generate estimator-attribute pairs for pytest test collection.
147
+
148
+ Parameters
149
+ ----------
150
+ algorithms : iterable (list, tuple, 1D array-like object)
151
+ Iterable of valid sklearnex estimators or keys from PATCHED_MODELS
152
+
153
+ required_inputs : list, tuple of strings or None
154
+ list of required args/kwargs for callable attribute (only non-private,
155
+ non-BaseEstimator attributes). Only one must be present, None
156
+ signifies taking all non-private attribues, callable or not.
157
+
158
+ fit: bool (default False)
159
+ Include "fit" method as an estimator-attribute pair
160
+
161
+ daal4py: bool (default True)
162
+ Include daal4py estimators in estimator-attribute list
163
+
164
+ Returns
165
+ -------
166
+ list of 2-element tuples: (estimator, string)
167
+ Returns a list of valid methods or attributes without "fit"
168
+ """
169
+ output = []
170
+ for estimator in algorithms:
171
+
172
+ if estimator in PATCHED_MODELS:
173
+ est = PATCHED_MODELS[estimator]
174
+ elif estimator in SPECIAL_INSTANCES:
175
+ est = SPECIAL_INSTANCES[estimator].__class__
176
+ elif isinstance(algorithms[estimator], BaseEstimator):
177
+ est = algorithms[estimator].__class__
178
+ else:
179
+ raise KeyError(f"Unrecognized sklearnex estimator: {estimator}")
180
+
181
+ if not daal4py and est.__module__.startswith("daal4py"):
182
+ continue
183
+
184
+ # remove BaseEstimator methods (get_params, set_params)
185
+ candidates = set(dir(est)) - set(dir(BaseEstimator))
186
+ # remove private methods
187
+ candidates = set([attr for attr in candidates if not attr.startswith("_")])
188
+ # required to enable other methods
189
+ if not fit:
190
+ candidates = candidates - {"fit"}
191
+
192
+ # allow only callable methods with any of the required inputs
193
+ if required_inputs:
194
+ methods = []
195
+ for attr in candidates:
196
+ attribute = getattr_static(est, attr)
197
+ if callable(attribute):
198
+ params = signature(attribute).parameters
199
+ if any([inp in params for inp in required_inputs]):
200
+ methods += [attr]
201
+ else:
202
+ methods = candidates
203
+
204
+ output += (
205
+ [(estimator, method) for method in methods]
206
+ if methods
207
+ else [(estimator, None)]
208
+ )
209
+
210
+ # In the case that no methods are available, set method to None.
211
+ # This will allow estimators without mixins to still test the fit
212
+ # method in various tests.
213
+ return output
214
+
215
+
216
+ def call_method(estimator, method, X, y, **kwargs):
217
+ """Generalized interface to call most sklearn estimator methods
218
+
219
+ Parameters
220
+ ----------
221
+ estimator : sklearn or sklearnex estimator instance
222
+
223
+ method: string
224
+ Valid callable method to estimator
225
+
226
+ X: array-like
227
+ data
228
+
229
+ y: array-like (for 'score', 'partial-fit', and 'path')
230
+ X-dependent data
231
+
232
+ **kwargs: keyword dict
233
+ keyword arguments to estimator.method
234
+
235
+ Returns
236
+ -------
237
+ return value from estimator.method
238
+ """
239
+ # useful for repository wide testing
240
+
241
+ func = getattr(estimator, method)
242
+ argdict = signature(func).parameters
243
+ argnum = len(
244
+ [i for i in argdict if argdict[i].default == Parameter.empty or i in ["X", "y"]]
245
+ )
246
+
247
+ if method == "inverse_transform":
248
+ # PCA's inverse_transform takes (n_samples, n_components)
249
+ data = (
250
+ (X[:, : estimator.n_components_],)
251
+ if X.shape[1] != estimator.n_components_
252
+ else (X,)
253
+ )
254
+ else:
255
+ data = (X, y)[:argnum]
256
+
257
+ return func(*data, **kwargs)
258
+
259
+
260
+ def _gen_dataset_type(est):
261
+ # est should be an estimator or estimator class
262
+ # dataset initialized to classification, but will be swapped
263
+ # for other types as necessary. Private method.
264
+ dataset = "classification"
265
+ estimator = est.__class__ if isinstance(est, BaseEstimator) else est
266
+
267
+ for mixin, _, data in mixin_map:
268
+ if issubclass(estimator, mixin) and data is not None:
269
+ dataset = data
270
+ return dataset
271
+
272
+
273
+ _dataset_dict = {
274
+ "classification": [partial(load_iris, return_X_y=True)],
275
+ "regression": [partial(load_diabetes, return_X_y=True)],
276
+ }
277
+
278
+
279
+ def gen_dataset(
280
+ est,
281
+ datasets=_dataset_dict,
282
+ sparse=False,
283
+ queue=None,
284
+ target_df=None,
285
+ dtype=None,
286
+ ):
287
+ """Generate dataset for pytest testing.
288
+
289
+ Parameters
290
+ ----------
291
+ est : sklearn or sklearnex estimator class
292
+ Must inherit an sklearn Mixin or sklearn's BaseEstimator
293
+
294
+ dataset: dataset dict
295
+ Dictionary with keys "classification" and/or "regression"
296
+ Value must be a list of object which yield X, y array
297
+ objects when called, ideally using a lambda or
298
+ functools.partial.
299
+
300
+ sparse: bool (default False)
301
+ Convert X data to a scipy.sparse csr_matrix format.
302
+
303
+ queue: SYCL queue or None
304
+ Queue necessary for device offloading following the
305
+ SYCL 2020 standard, usually generated by dpctl.
306
+
307
+ target_df: string or None
308
+ dataframe type for returned dataset, as dictated by
309
+ onedal's _convert_to_dataframe.
310
+
311
+ dtype: numpy dtype or None
312
+ target datatype for returned datasets (see DTYPES).
313
+
314
+ Returns
315
+ -------
316
+ list of 2-element list X,y: (array-like, array-like)
317
+ list of datasets for analysis
318
+ """
319
+ dataset_type = _gen_dataset_type(est)
320
+ output = []
321
+ # load data
322
+ flag = dtype is None
323
+
324
+ for func in datasets[dataset_type]:
325
+ X, y = func()
326
+ if flag:
327
+ dtype = X.dtype if hasattr(X, "dtype") else np.float64
328
+
329
+ if sparse:
330
+ X = sp.csr_matrix(X)
331
+ else:
332
+ X = _convert_to_dataframe(
333
+ X, sycl_queue=queue, target_df=target_df, dtype=dtype
334
+ )
335
+ y = _convert_to_dataframe(
336
+ y, sycl_queue=queue, target_df=target_df, dtype=dtype
337
+ )
338
+ output += [[X, y]]
339
+ return output
340
+
341
+
342
+ def gen_sparse_dataset(row_count, column_count, **kwargs):
343
+ """Generate sparse dataset for pytest testing.
344
+
345
+ Parameters
346
+ ----------
347
+ row_count : number of rows in dataset
348
+
349
+ column_count: number of columns in dataset
350
+
351
+ kwargs: keyword arguments for scipy.sparse.random_array or scipy.sparse.random
352
+
353
+ Returns
354
+ -------
355
+ scipy.sparse random matrix or array depending on scipy version
356
+ """
357
+ if hasattr(sp, "random_array"):
358
+ return sp.random_array((row_count, column_count), **kwargs)
359
+ else:
360
+ return sp.random(row_count, column_count, **kwargs)
361
+
362
+
363
+ DTYPES = [
364
+ np.int8,
365
+ np.int16,
366
+ np.int32,
367
+ np.int64,
368
+ np.float16,
369
+ np.float32,
370
+ np.float64,
371
+ np.uint8,
372
+ np.uint16,
373
+ np.uint32,
374
+ np.uint64,
375
+ ]
376
+
377
+
378
+ def _get_processor_info():
379
+ proc = ""
380
+ if platform.system() == "Linux":
381
+ proc = (
382
+ subprocess.check_output(["/usr/bin/cat", "/proc/cpuinfo"])
383
+ .strip()
384
+ .decode("utf-8")
385
+ )
386
+ elif platform.system() == "Windows":
387
+ proc = platform.processor()
388
+ elif platform.system() == "Darwin":
389
+ proc = (
390
+ subprocess.check_output(["/usr/bin/sysctl", "-n", "machdep.cpu.brand_string"])
391
+ .strip()
392
+ .decode("utf-8")
393
+ )
394
+
395
+ return proc
396
+
397
+
398
+ class DummyEstimator(BaseEstimator):
399
+
400
+ def fit(self, X, y=None):
401
+ sua_iface, xp, _ = _get_sycl_namespace(X)
402
+ X_table = to_table(X)
403
+ y_table = to_table(y)
404
+ # The presence of the fitted attributes (ending with a trailing
405
+ # underscore) is required for the correct check. The cleanup of
406
+ # the memory will occur at the estimator instance deletion.
407
+ if sua_iface:
408
+ self.x_attr_ = from_table(
409
+ X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
410
+ )
411
+ self.y_attr_ = from_table(
412
+ y_table,
413
+ sua_iface=sua_iface,
414
+ sycl_queue=X.sycl_queue if y is None else y.sycl_queue,
415
+ xp=xp,
416
+ )
417
+ else:
418
+ self.x_attr = from_table(X_table)
419
+ self.y_attr = from_table(y_table)
420
+
421
+ return self
422
+
423
+ def predict(self, X):
424
+ # Checks if the estimator is fitted by verifying the presence of
425
+ # fitted attributes (ending with a trailing underscore).
426
+ check_is_fitted(self)
427
+ sua_iface, xp, _ = _get_sycl_namespace(X)
428
+ X_table = to_table(X)
429
+ if sua_iface:
430
+ returned_X = from_table(
431
+ X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
432
+ )
433
+ else:
434
+ returned_X = from_table(X_table)
435
+
436
+ return returned_X
@@ -0,0 +1,198 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import numpy as np
18
+ from numpy.testing import assert_allclose
19
+ from sklearn.datasets import make_blobs, make_classification, make_regression
20
+ from sklearn.model_selection import train_test_split
21
+
22
+ from onedal.tests.utils._dataframes_support import _as_numpy
23
+
24
+ try:
25
+ import dpctl
26
+ from dpctl import SyclQueue
27
+ from mpi4py import MPI
28
+
29
+ mpi_libs_available = True
30
+ gpu_is_available = dpctl.has_gpu_devices()
31
+ except (ImportError, ModuleNotFoundError):
32
+ mpi_libs_available = False
33
+
34
+ _mpi_libs_and_gpu_available = mpi_libs_available and gpu_is_available
35
+
36
+
37
+ def _get_local_tensor(full_data):
38
+ """Splits data across ranks.
39
+
40
+ Called on each rank to extract the subset of data assigned to that rank.
41
+
42
+ Args:
43
+ full_data (numpy or dpctl array): The entire set of data
44
+
45
+ Returns:
46
+ local_data (numpy or dpctl array): The subset of data used by the rank
47
+ """
48
+
49
+ # create sycl queue and gather communicator details
50
+ q = SyclQueue("gpu")
51
+ comm = MPI.COMM_WORLD
52
+ rank = comm.Get_rank()
53
+ size = comm.Get_size()
54
+
55
+ # divide data across ranks and move to dpt tensor
56
+ data_rows = full_data.shape[0]
57
+ local_start = rank * data_rows // size
58
+ local_end = (1 + rank) * data_rows // size
59
+ local_data = full_data[local_start:local_end]
60
+
61
+ return local_data
62
+
63
+
64
+ def _generate_regression_data(n_samples, n_features, dtype=np.float64, random_state=42):
65
+ # Generates regression data and divides between train and test
66
+ X, y = make_regression(
67
+ n_samples=n_samples, n_features=n_features, random_state=random_state
68
+ )
69
+ X = X.astype(dtype)
70
+ y = y.astype(dtype)
71
+ X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
72
+ return X_train, X_test, y_train, y_test
73
+
74
+
75
+ def _generate_classification_data(
76
+ n_samples, n_features, n_classes=2, dtype=np.float64, random_state=42
77
+ ):
78
+ # Generates classification data and divides between train and test
79
+ X, y = make_classification(
80
+ n_samples=n_samples,
81
+ n_features=n_features,
82
+ n_classes=n_classes,
83
+ n_informative=int(0.5 * n_classes + 1),
84
+ random_state=random_state,
85
+ )
86
+ X = X.astype(dtype)
87
+ y = y.astype(dtype)
88
+ X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
89
+ return X_train, X_test, y_train, y_test
90
+
91
+
92
+ def _generate_statistic_data(
93
+ n_samples, n_features=None, dtype=np.float64, random_state=42
94
+ ):
95
+ # Generates statistical data
96
+ gen = np.random.default_rng(random_state)
97
+ data = gen.uniform(
98
+ low=-0.3,
99
+ high=+0.7,
100
+ size=(n_samples, n_features) if n_features is not None else (n_samples,),
101
+ ).astype(dtype)
102
+ return data
103
+
104
+
105
+ def _generate_clustering_data(
106
+ n_samples, n_features, centers=None, dtype=np.float64, random_state=42
107
+ ):
108
+ # Generates clustering data and divides between train and test
109
+ X, _ = make_blobs(
110
+ n_samples=n_samples,
111
+ centers=centers,
112
+ n_features=n_features,
113
+ random_state=random_state,
114
+ )
115
+ X = X.astype(dtype)
116
+ X_train, X_test = train_test_split(X, random_state=random_state)
117
+ return X_train, X_test
118
+
119
+
120
+ def _spmd_assert_allclose(spmd_result, batch_result, **kwargs):
121
+ """Calls assert_allclose on spmd and batch results.
122
+
123
+ Called on each rank to compare the spmd result specific to that rank and
124
+ subset of batch result that corresponds to that rank.
125
+
126
+ Args:
127
+ spmd_result (numpy or dpctl array): The result for the subset of data on the rank the function is called from, computed by the spmd estimator
128
+ batch_result (numpy array): The result for all data, computed by the batch estimator
129
+
130
+ Raises:
131
+ AssertionError: If all results are not adequately close.
132
+ """
133
+
134
+ # extract chunk from batch result to match with local spmd result
135
+ local_batch_result = _get_local_tensor(batch_result)
136
+
137
+ assert_allclose(_as_numpy(spmd_result), _as_numpy(local_batch_result), **kwargs)
138
+
139
+
140
+ def _assert_unordered_allclose(spmd_result, batch_result, localize=False, **kwargs):
141
+ """Checks if rows in spmd and batch results are aligned, even if not in the same order.
142
+
143
+ Called to verify correct unordered results are present. Useful to check KMeans centers
144
+ or KNN neighbors, where order does not matter. Sorts inputs to handle unordering. Also
145
+ capable of handling localization.
146
+
147
+ Args:
148
+ spmd_result (numpy or dpctl array): Result computed by the spmd estimator
149
+ batch_result (numpy array): Result computed by batch estimator
150
+ localize (bool): Whether of not spmd result is specific to the rank, in which case batch result needs to be localized
151
+
152
+ Raises:
153
+ AssertionError: If results do not match.
154
+ """
155
+ np_spmd_result = _as_numpy(spmd_result)
156
+
157
+ sorted_spmd_result = np_spmd_result[
158
+ np.argsort(np.linalg.norm(np_spmd_result, axis=1))
159
+ ]
160
+ if localize:
161
+ local_batch_result = _get_local_tensor(batch_result)
162
+ sorted_batch_result = local_batch_result[
163
+ np.argsort(np.linalg.norm(local_batch_result, axis=1))
164
+ ]
165
+ else:
166
+ sorted_batch_result = batch_result[
167
+ np.argsort(np.linalg.norm(batch_result, axis=1))
168
+ ]
169
+
170
+ assert_allclose(sorted_spmd_result, sorted_batch_result, **kwargs)
171
+
172
+
173
+ def _assert_kmeans_labels_allclose(
174
+ spmd_labels, batch_labels, spmd_centers, batch_centers, **kwargs
175
+ ):
176
+ """Checks if labels for spmd and batch results are aligned, even cluster indices don't match.
177
+
178
+ Called to verify labels are assigned the same way on spmd and batch. Uses raw labels (which
179
+ may not match) to identify cluster center and ensure results match.
180
+
181
+ Args:
182
+ spmd_labels (numpy or dpctl array): The labels for the subset of data on the rank the function is called from, computed by the spmd estimator
183
+ batch_labels (numpy array): The labels for all data, computed by the batch estimator
184
+ spmd_centers (numpy or dpctl array): Centers computed by the spmd estimator
185
+ batch_centers (numpy array): Centers computed by batch estimator
186
+
187
+ Raises:
188
+ AssertionError: If clusters are not correctly assigned.
189
+ """
190
+
191
+ np_spmd_labels = _as_numpy(spmd_labels)
192
+ np_spmd_centers = _as_numpy(spmd_centers)
193
+ local_batch_labels = _get_local_tensor(batch_labels)
194
+ assert_allclose(
195
+ np_spmd_centers[np_spmd_labels],
196
+ batch_centers[local_batch_labels],
197
+ **kwargs,
198
+ )
@@ -0,0 +1,19 @@
1
+ # ===============================================================================
2
+ # Copyright 2022 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ from .validation import assert_all_finite
18
+
19
+ __all__ = ["assert_all_finite"]