scikit-learn-intelex 2025.4.0__py313-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (282) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-313-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-313-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +248 -0
  10. daal4py/sklearn/_utils.py +245 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +236 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +696 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +54 -0
  61. onedal/_device_offload.py +204 -0
  62. onedal/_onedal_py_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-313-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +175 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +242 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +279 -0
  70. onedal/basic_statistics/tests/utils.py +50 -0
  71. onedal/cluster/__init__.py +27 -0
  72. onedal/cluster/dbscan.py +105 -0
  73. onedal/cluster/kmeans.py +557 -0
  74. onedal/cluster/kmeans_init.py +112 -0
  75. onedal/cluster/tests/test_dbscan.py +125 -0
  76. onedal/cluster/tests/test_kmeans.py +88 -0
  77. onedal/cluster/tests/test_kmeans_init.py +93 -0
  78. onedal/common/_base.py +38 -0
  79. onedal/common/_estimator_checks.py +47 -0
  80. onedal/common/_mixin.py +62 -0
  81. onedal/common/_policy.py +55 -0
  82. onedal/common/_spmd_policy.py +30 -0
  83. onedal/common/hyperparameters.py +125 -0
  84. onedal/common/tests/test_policy.py +76 -0
  85. onedal/common/tests/test_sycl.py +128 -0
  86. onedal/covariance/__init__.py +20 -0
  87. onedal/covariance/covariance.py +122 -0
  88. onedal/covariance/incremental_covariance.py +161 -0
  89. onedal/covariance/tests/test_covariance.py +50 -0
  90. onedal/covariance/tests/test_incremental_covariance.py +190 -0
  91. onedal/datatypes/__init__.py +19 -0
  92. onedal/datatypes/_data_conversion.py +121 -0
  93. onedal/datatypes/tests/common.py +126 -0
  94. onedal/datatypes/tests/test_data.py +475 -0
  95. onedal/decomposition/__init__.py +20 -0
  96. onedal/decomposition/incremental_pca.py +214 -0
  97. onedal/decomposition/pca.py +186 -0
  98. onedal/decomposition/tests/test_incremental_pca.py +285 -0
  99. onedal/ensemble/__init__.py +29 -0
  100. onedal/ensemble/forest.py +736 -0
  101. onedal/ensemble/tests/test_random_forest.py +97 -0
  102. onedal/linear_model/__init__.py +27 -0
  103. onedal/linear_model/incremental_linear_model.py +292 -0
  104. onedal/linear_model/linear_model.py +325 -0
  105. onedal/linear_model/logistic_regression.py +247 -0
  106. onedal/linear_model/tests/test_incremental_linear_regression.py +213 -0
  107. onedal/linear_model/tests/test_incremental_ridge_regression.py +171 -0
  108. onedal/linear_model/tests/test_linear_regression.py +259 -0
  109. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  110. onedal/linear_model/tests/test_ridge.py +95 -0
  111. onedal/neighbors/__init__.py +19 -0
  112. onedal/neighbors/neighbors.py +763 -0
  113. onedal/neighbors/tests/test_knn_classification.py +49 -0
  114. onedal/primitives/__init__.py +27 -0
  115. onedal/primitives/get_tree.py +25 -0
  116. onedal/primitives/kernel_functions.py +152 -0
  117. onedal/primitives/tests/test_kernel_functions.py +159 -0
  118. onedal/spmd/__init__.py +25 -0
  119. onedal/spmd/_base.py +30 -0
  120. onedal/spmd/basic_statistics/__init__.py +20 -0
  121. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  122. onedal/spmd/basic_statistics/incremental_basic_statistics.py +71 -0
  123. onedal/spmd/cluster/__init__.py +28 -0
  124. onedal/spmd/cluster/dbscan.py +23 -0
  125. onedal/spmd/cluster/kmeans.py +56 -0
  126. onedal/spmd/covariance/__init__.py +20 -0
  127. onedal/spmd/covariance/covariance.py +26 -0
  128. onedal/spmd/covariance/incremental_covariance.py +83 -0
  129. onedal/spmd/decomposition/__init__.py +20 -0
  130. onedal/spmd/decomposition/incremental_pca.py +124 -0
  131. onedal/spmd/decomposition/pca.py +26 -0
  132. onedal/spmd/ensemble/__init__.py +19 -0
  133. onedal/spmd/ensemble/forest.py +28 -0
  134. onedal/spmd/linear_model/__init__.py +21 -0
  135. onedal/spmd/linear_model/incremental_linear_model.py +101 -0
  136. onedal/spmd/linear_model/linear_model.py +30 -0
  137. onedal/spmd/linear_model/logistic_regression.py +38 -0
  138. onedal/spmd/neighbors/__init__.py +19 -0
  139. onedal/spmd/neighbors/neighbors.py +75 -0
  140. onedal/svm/__init__.py +19 -0
  141. onedal/svm/svm.py +556 -0
  142. onedal/svm/tests/test_csr_svm.py +351 -0
  143. onedal/svm/tests/test_nusvc.py +204 -0
  144. onedal/svm/tests/test_nusvr.py +210 -0
  145. onedal/svm/tests/test_svc.py +176 -0
  146. onedal/svm/tests/test_svr.py +243 -0
  147. onedal/tests/test_common.py +57 -0
  148. onedal/tests/utils/_dataframes_support.py +162 -0
  149. onedal/tests/utils/_device_selection.py +102 -0
  150. onedal/utils/__init__.py +49 -0
  151. onedal/utils/_array_api.py +81 -0
  152. onedal/utils/_dpep_helpers.py +56 -0
  153. onedal/utils/tests/test_validation.py +142 -0
  154. onedal/utils/validation.py +464 -0
  155. scikit_learn_intelex-2025.4.0.dist-info/LICENSE.txt +202 -0
  156. scikit_learn_intelex-2025.4.0.dist-info/METADATA +190 -0
  157. scikit_learn_intelex-2025.4.0.dist-info/RECORD +282 -0
  158. scikit_learn_intelex-2025.4.0.dist-info/WHEEL +5 -0
  159. scikit_learn_intelex-2025.4.0.dist-info/top_level.txt +3 -0
  160. sklearnex/__init__.py +66 -0
  161. sklearnex/__main__.py +58 -0
  162. sklearnex/_config.py +116 -0
  163. sklearnex/_device_offload.py +126 -0
  164. sklearnex/_utils.py +177 -0
  165. sklearnex/basic_statistics/__init__.py +20 -0
  166. sklearnex/basic_statistics/basic_statistics.py +261 -0
  167. sklearnex/basic_statistics/incremental_basic_statistics.py +352 -0
  168. sklearnex/basic_statistics/tests/test_basic_statistics.py +405 -0
  169. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +455 -0
  170. sklearnex/cluster/__init__.py +20 -0
  171. sklearnex/cluster/dbscan.py +197 -0
  172. sklearnex/cluster/k_means.py +397 -0
  173. sklearnex/cluster/tests/test_dbscan.py +38 -0
  174. sklearnex/cluster/tests/test_kmeans.py +157 -0
  175. sklearnex/conftest.py +82 -0
  176. sklearnex/covariance/__init__.py +19 -0
  177. sklearnex/covariance/incremental_covariance.py +405 -0
  178. sklearnex/covariance/tests/test_incremental_covariance.py +287 -0
  179. sklearnex/decomposition/__init__.py +19 -0
  180. sklearnex/decomposition/pca.py +427 -0
  181. sklearnex/decomposition/tests/test_pca.py +58 -0
  182. sklearnex/dispatcher.py +534 -0
  183. sklearnex/doc/third-party-programs.txt +424 -0
  184. sklearnex/ensemble/__init__.py +29 -0
  185. sklearnex/ensemble/_forest.py +2029 -0
  186. sklearnex/ensemble/tests/test_forest.py +140 -0
  187. sklearnex/glob/__main__.py +72 -0
  188. sklearnex/glob/dispatcher.py +101 -0
  189. sklearnex/linear_model/__init__.py +32 -0
  190. sklearnex/linear_model/coordinate_descent.py +30 -0
  191. sklearnex/linear_model/incremental_linear.py +495 -0
  192. sklearnex/linear_model/incremental_ridge.py +432 -0
  193. sklearnex/linear_model/linear.py +346 -0
  194. sklearnex/linear_model/logistic_regression.py +415 -0
  195. sklearnex/linear_model/ridge.py +390 -0
  196. sklearnex/linear_model/tests/test_incremental_linear.py +267 -0
  197. sklearnex/linear_model/tests/test_incremental_ridge.py +214 -0
  198. sklearnex/linear_model/tests/test_linear.py +142 -0
  199. sklearnex/linear_model/tests/test_logreg.py +134 -0
  200. sklearnex/linear_model/tests/test_ridge.py +256 -0
  201. sklearnex/manifold/__init__.py +19 -0
  202. sklearnex/manifold/t_sne.py +26 -0
  203. sklearnex/manifold/tests/test_tsne.py +250 -0
  204. sklearnex/metrics/__init__.py +23 -0
  205. sklearnex/metrics/pairwise.py +22 -0
  206. sklearnex/metrics/ranking.py +20 -0
  207. sklearnex/metrics/tests/test_metrics.py +39 -0
  208. sklearnex/model_selection/__init__.py +21 -0
  209. sklearnex/model_selection/split.py +22 -0
  210. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  211. sklearnex/neighbors/__init__.py +27 -0
  212. sklearnex/neighbors/_lof.py +236 -0
  213. sklearnex/neighbors/common.py +310 -0
  214. sklearnex/neighbors/knn_classification.py +231 -0
  215. sklearnex/neighbors/knn_regression.py +207 -0
  216. sklearnex/neighbors/knn_unsupervised.py +178 -0
  217. sklearnex/neighbors/tests/test_neighbors.py +82 -0
  218. sklearnex/preview/__init__.py +17 -0
  219. sklearnex/preview/covariance/__init__.py +19 -0
  220. sklearnex/preview/covariance/covariance.py +142 -0
  221. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  222. sklearnex/preview/decomposition/__init__.py +19 -0
  223. sklearnex/preview/decomposition/incremental_pca.py +244 -0
  224. sklearnex/preview/decomposition/tests/test_incremental_pca.py +336 -0
  225. sklearnex/spmd/__init__.py +25 -0
  226. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  227. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  228. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  229. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  230. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +306 -0
  231. sklearnex/spmd/cluster/__init__.py +30 -0
  232. sklearnex/spmd/cluster/dbscan.py +50 -0
  233. sklearnex/spmd/cluster/kmeans.py +21 -0
  234. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  235. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +173 -0
  236. sklearnex/spmd/covariance/__init__.py +20 -0
  237. sklearnex/spmd/covariance/covariance.py +21 -0
  238. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  239. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  240. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  241. sklearnex/spmd/decomposition/__init__.py +20 -0
  242. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  243. sklearnex/spmd/decomposition/pca.py +21 -0
  244. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  245. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  246. sklearnex/spmd/ensemble/__init__.py +19 -0
  247. sklearnex/spmd/ensemble/forest.py +71 -0
  248. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  249. sklearnex/spmd/linear_model/__init__.py +21 -0
  250. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  251. sklearnex/spmd/linear_model/linear_model.py +21 -0
  252. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  253. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +331 -0
  254. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  255. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
  256. sklearnex/spmd/neighbors/__init__.py +19 -0
  257. sklearnex/spmd/neighbors/neighbors.py +25 -0
  258. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  259. sklearnex/svm/__init__.py +29 -0
  260. sklearnex/svm/_common.py +339 -0
  261. sklearnex/svm/nusvc.py +371 -0
  262. sklearnex/svm/nusvr.py +170 -0
  263. sklearnex/svm/svc.py +399 -0
  264. sklearnex/svm/svr.py +167 -0
  265. sklearnex/svm/tests/test_svm.py +93 -0
  266. sklearnex/tests/test_common.py +491 -0
  267. sklearnex/tests/test_config.py +123 -0
  268. sklearnex/tests/test_hyperparameters.py +43 -0
  269. sklearnex/tests/test_memory_usage.py +347 -0
  270. sklearnex/tests/test_monkeypatch.py +269 -0
  271. sklearnex/tests/test_n_jobs_support.py +108 -0
  272. sklearnex/tests/test_parallel.py +48 -0
  273. sklearnex/tests/test_patching.py +377 -0
  274. sklearnex/tests/test_run_to_run_stability.py +326 -0
  275. sklearnex/tests/utils/__init__.py +48 -0
  276. sklearnex/tests/utils/base.py +436 -0
  277. sklearnex/tests/utils/spmd.py +198 -0
  278. sklearnex/utils/__init__.py +19 -0
  279. sklearnex/utils/_array_api.py +82 -0
  280. sklearnex/utils/parallel.py +59 -0
  281. sklearnex/utils/tests/test_validation.py +238 -0
  282. sklearnex/utils/validation.py +208 -0
@@ -0,0 +1,347 @@
1
+ # ==============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import gc
18
+ import logging
19
+ import os
20
+ import tracemalloc
21
+ import types
22
+ import warnings
23
+ from inspect import isclass
24
+
25
+ import numpy as np
26
+ import pytest
27
+ from scipy.stats import pearsonr
28
+ from sklearn.base import BaseEstimator, clone
29
+ from sklearn.datasets import make_classification
30
+ from sklearn.model_selection import KFold
31
+
32
+ from onedal import _is_dpc_backend
33
+ from onedal.tests.utils._dataframes_support import (
34
+ _convert_to_dataframe,
35
+ get_dataframes_and_queues,
36
+ )
37
+ from onedal.tests.utils._device_selection import get_queues, is_dpctl_device_available
38
+ from onedal.utils._dpep_helpers import dpctl_available, dpnp_available
39
+ from sklearnex import config_context
40
+ from sklearnex.tests.utils import (
41
+ PATCHED_FUNCTIONS,
42
+ PATCHED_MODELS,
43
+ SPECIAL_INSTANCES,
44
+ DummyEstimator,
45
+ )
46
+ from sklearnex.utils._array_api import get_namespace
47
+
48
+ if dpctl_available:
49
+ from dpctl.tensor import usm_ndarray
50
+
51
+ if dpnp_available:
52
+ import dpnp
53
+
54
+ if _is_dpc_backend:
55
+ from onedal import _backend
56
+
57
+
58
+ CPU_SKIP_LIST = (
59
+ "TSNE", # too slow for using in testing on common data size
60
+ "config_context", # does not malloc
61
+ "get_config", # does not malloc
62
+ "set_config", # does not malloc
63
+ "SVC(probability=True)", # memory leak fortran numpy (investigate _fit_proba)
64
+ "NuSVC(probability=True)", # memory leak fortran numpy (investigate _fit_proba)
65
+ "IncrementalEmpiricalCovariance", # dataframe_f issues
66
+ "IncrementalLinearRegression", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
67
+ "IncrementalPCA", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
68
+ "IncrementalRidge", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
69
+ "LogisticRegression(solver='newton-cg')", # memory leak fortran (1000, 100)
70
+ )
71
+
72
+ GPU_SKIP_LIST = (
73
+ "TSNE", # too slow for using in testing on common data size
74
+ "RandomForestRegressor", # too slow for using in testing on common data size
75
+ "KMeans", # does not support GPU offloading
76
+ "config_context", # does not malloc
77
+ "get_config", # does not malloc
78
+ "set_config", # does not malloc
79
+ "ElasticNet", # does not support GPU offloading (fails silently)
80
+ "Lasso", # does not support GPU offloading (fails silently)
81
+ "SVR", # does not support GPU offloading (fails silently)
82
+ "NuSVR", # does not support GPU offloading (fails silently)
83
+ "NuSVC", # does not support GPU offloading (fails silently)
84
+ "LogisticRegression", # default parameters not supported, see solver=newton-cg
85
+ "NuSVC(probability=True)", # does not support GPU offloading (fails silently)
86
+ "IncrementalLinearRegression", # issue with potrf with the specific dataset
87
+ "LinearRegression", # issue with potrf with the specific dataset
88
+ )
89
+
90
+
91
+ def gen_functions(functions):
92
+ func_dict = functions.copy()
93
+
94
+ roc_auc_score = func_dict.pop("roc_auc_score")
95
+ func_dict["roc_auc_score"] = lambda x, y: roc_auc_score(y, y)
96
+
97
+ pairwise_distances = func_dict.pop("pairwise_distances")
98
+ func_dict["pairwise_distances(metric='cosine')"] = lambda x, y: pairwise_distances(
99
+ x, metric="cosine"
100
+ )
101
+ func_dict["pairwise_distances(metric='correlation')"] = (
102
+ lambda x, y: pairwise_distances(x, metric="correlation")
103
+ )
104
+
105
+ _assert_all_finite = func_dict.pop("_assert_all_finite")
106
+ func_dict["_assert_all_finite"] = lambda x, y: [
107
+ _assert_all_finite(x),
108
+ _assert_all_finite(y),
109
+ ]
110
+ return func_dict
111
+
112
+
113
+ FUNCTIONS = gen_functions(PATCHED_FUNCTIONS)
114
+
115
+ CPU_ESTIMATORS = {
116
+ k: v
117
+ for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES, **FUNCTIONS}.items()
118
+ if not k in CPU_SKIP_LIST
119
+ }
120
+
121
+ GPU_ESTIMATORS = {
122
+ k: v
123
+ for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES}.items()
124
+ if not k in GPU_SKIP_LIST
125
+ }
126
+
127
+ data_shapes = [
128
+ pytest.param((1000, 100), id="(1000, 100)"),
129
+ pytest.param((2000, 50), id="(2000, 50)"),
130
+ ]
131
+
132
+ EXTRA_MEMORY_THRESHOLD = 0.15
133
+ EXTRA_MEMORY_THRESHOLD_PANDAS = 0.25
134
+ N_SPLITS = 10
135
+ ORDER_DICT = {"F": np.asfortranarray, "C": np.ascontiguousarray}
136
+
137
+
138
+ def gen_clsf_data(n_samples, n_features, dtype=None):
139
+ data, label = make_classification(
140
+ n_classes=2, n_samples=n_samples, n_features=n_features, random_state=777
141
+ )
142
+ if dtype:
143
+ data, label = data.astype(dtype), label.astype(dtype)
144
+ return (
145
+ data,
146
+ label,
147
+ data.size * data.dtype.itemsize + label.size * label.dtype.itemsize,
148
+ )
149
+
150
+
151
+ def get_traced_memory(queue=None):
152
+ if _is_dpc_backend and queue and queue.sycl_device.is_gpu:
153
+ return _backend.get_used_memory(queue)
154
+ else:
155
+ return tracemalloc.get_traced_memory()[0]
156
+
157
+
158
+ def take(x, index, axis=0, queue=None):
159
+ xp, array_api = get_namespace(x)
160
+ if (
161
+ dpnp_available
162
+ and isinstance(x, dpnp.ndarray)
163
+ or dpctl_available
164
+ and isinstance(x, usm_ndarray)
165
+ ):
166
+ # Using the same sycl queue for dpnp.ndarray or usm_ndarray.
167
+ return xp.take(
168
+ x, xp.asarray(index, usm_type="device", sycl_queue=x.sycl_queue), axis=axis
169
+ )
170
+ elif array_api:
171
+ return xp.take(x, xp.asarray(index, device=x.device), axis=axis)
172
+ else:
173
+ return x.take(index, axis=axis)
174
+
175
+
176
+ def split_train_inference(kf, x, y, estimator, queue=None):
177
+ mem_tracks = []
178
+ for train_index, test_index in kf.split(x):
179
+ x_train = take(x, train_index, queue=queue)
180
+ y_train = take(y, train_index, queue=queue)
181
+ x_test = take(x, test_index, queue=queue)
182
+ y_test = take(y, test_index, queue=queue)
183
+
184
+ if isclass(estimator) and issubclass(estimator, BaseEstimator):
185
+ alg = estimator()
186
+ flag = True
187
+ elif isinstance(estimator, BaseEstimator):
188
+ alg = clone(estimator)
189
+ flag = True
190
+ else:
191
+ flag = False
192
+
193
+ if flag:
194
+ alg.fit(x_train, y_train)
195
+ if hasattr(alg, "predict"):
196
+ alg.predict(x_test)
197
+ elif hasattr(alg, "transform"):
198
+ alg.transform(x_test)
199
+ elif hasattr(alg, "kneighbors"):
200
+ alg.kneighbors(x_test)
201
+ del alg
202
+ else:
203
+ estimator(x_train, y_train)
204
+
205
+ del x_train, x_test, y_train, y_test, flag
206
+ mem_tracks.append(get_traced_memory(queue))
207
+ return mem_tracks
208
+
209
+
210
+ def _kfold_function_template(
211
+ estimator, dataframe, data_shape, queue=None, func=None, dtype=None
212
+ ):
213
+ tracemalloc.start()
214
+
215
+ n_samples, n_features = data_shape
216
+ X, y, data_memory_size = gen_clsf_data(n_samples, n_features, dtype=dtype)
217
+ kf = KFold(n_splits=N_SPLITS)
218
+ if func:
219
+ X = func(X)
220
+
221
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
222
+ y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
223
+
224
+ mem_before = get_traced_memory(queue)
225
+ mem_tracks = split_train_inference(kf, X, y, estimator, queue=queue)
226
+ mem_iter_diffs = np.array(mem_tracks[1:]) - np.array(mem_tracks[:-1])
227
+ mem_incr_mean, mem_incr_std = mem_iter_diffs.mean(), mem_iter_diffs.std()
228
+ mem_incr_mean, mem_incr_std = round(mem_incr_mean), round(mem_incr_std)
229
+ with warnings.catch_warnings():
230
+ # In the case that the memory usage is constant, this will raise
231
+ # a ConstantInputWarning error in pearsonr from scipy, this can
232
+ # be ignored.
233
+ warnings.filterwarnings(
234
+ "ignore",
235
+ message="An input array is constant; the correlation coefficient is not defined",
236
+ )
237
+ mem_iter_corr, _ = pearsonr(mem_tracks, list(range(len(mem_tracks))))
238
+
239
+ if mem_iter_corr > 0.95:
240
+ logging.warning(
241
+ "Memory usage is steadily increasing with iterations "
242
+ "(Pearson correlation coefficient between "
243
+ f"memory tracks and iterations is {mem_iter_corr})\n"
244
+ "Memory usage increase per iteration: "
245
+ f"{mem_incr_mean}±{mem_incr_std} bytes"
246
+ )
247
+ mem_before_gc = get_traced_memory(queue)
248
+ mem_diff = mem_before_gc - mem_before
249
+ if isinstance(estimator, BaseEstimator):
250
+ name = str(estimator)
251
+ else:
252
+ name = estimator.__name__
253
+
254
+ threshold = (
255
+ EXTRA_MEMORY_THRESHOLD_PANDAS if dataframe == "pandas" else EXTRA_MEMORY_THRESHOLD
256
+ )
257
+ message = (
258
+ "Size of extra allocated memory {} using garbage collector "
259
+ f"is greater than {threshold * 100}% of input data"
260
+ f"\n\tAlgorithm: {name}"
261
+ f"\n\tInput data size: {data_memory_size} bytes"
262
+ "\n\tExtra allocated memory size: {} bytes"
263
+ " / {} %"
264
+ )
265
+ if mem_diff >= threshold * data_memory_size:
266
+ logging.warning(
267
+ message.format(
268
+ "before", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
269
+ )
270
+ )
271
+ gc.collect()
272
+ mem_after = get_traced_memory(queue)
273
+ tracemalloc.stop()
274
+ mem_diff = mem_after - mem_before
275
+
276
+ # GPU offloading with SYCL contains a program/kernel cache which should
277
+ # be controllable via a KernelProgramCache object in the SYCL context.
278
+ # The programs and kernels are stored on the GPU, but cannot be cleared
279
+ # as this class is not available for access in all oneDAL DPC++ runtimes.
280
+ # Therefore, until this is implemented this test must be skipped for gpu
281
+ # as it looks like a memory leak (at least there is no way to discern a
282
+ # leak on the first run).
283
+ if queue is None or queue.sycl_device.is_cpu:
284
+ assert mem_diff < threshold * data_memory_size, message.format(
285
+ "after", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
286
+ )
287
+
288
+
289
+ @pytest.mark.parametrize("order", ["F", "C"])
290
+ @pytest.mark.parametrize(
291
+ "dataframe,queue", get_dataframes_and_queues("numpy,pandas,dpctl", "cpu")
292
+ )
293
+ @pytest.mark.parametrize("estimator", CPU_ESTIMATORS.keys())
294
+ @pytest.mark.parametrize("data_shape", data_shapes)
295
+ def test_memory_leaks(estimator, dataframe, queue, order, data_shape):
296
+ func = ORDER_DICT[order]
297
+ if estimator == "_assert_all_finite" and queue is not None:
298
+ pytest.skip(f"{estimator} is not designed for device offloading")
299
+
300
+ _kfold_function_template(
301
+ CPU_ESTIMATORS[estimator], dataframe, data_shape, queue, func
302
+ )
303
+
304
+
305
+ @pytest.mark.skipif(
306
+ os.getenv("ZES_ENABLE_SYSMAN") is None or not is_dpctl_device_available("gpu"),
307
+ reason="SYCL device memory leak check requires the level zero sysman",
308
+ )
309
+ @pytest.mark.parametrize("queue", get_queues("gpu"))
310
+ @pytest.mark.parametrize("estimator", GPU_ESTIMATORS.keys())
311
+ @pytest.mark.parametrize("order", ["F", "C"])
312
+ @pytest.mark.parametrize("data_shape", data_shapes)
313
+ def test_gpu_memory_leaks(estimator, queue, order, data_shape):
314
+ func = ORDER_DICT[order]
315
+ if "ExtraTrees" in estimator and data_shape == (2000, 50):
316
+ pytest.skip("Avoid a segmentation fault in Extra Trees algorithms")
317
+
318
+ with config_context(target_offload=queue):
319
+ _kfold_function_template(GPU_ESTIMATORS[estimator], None, data_shape, queue, func)
320
+
321
+
322
+ @pytest.mark.skipif(
323
+ not _is_dpc_backend,
324
+ reason="__sycl_usm_array_interface__ support requires DPC backend.",
325
+ )
326
+ @pytest.mark.parametrize(
327
+ "dataframe,queue", get_dataframes_and_queues("dpctl,dpnp", "cpu,gpu")
328
+ )
329
+ @pytest.mark.parametrize("order", ["F", "C"])
330
+ @pytest.mark.parametrize("data_shape", data_shapes)
331
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
332
+ def test_table_conversions_memory_leaks(dataframe, queue, order, data_shape, dtype):
333
+ func = ORDER_DICT[order]
334
+
335
+ if queue.sycl_device.is_gpu and (
336
+ os.getenv("ZES_ENABLE_SYSMAN") is None or not is_dpctl_device_available("gpu")
337
+ ):
338
+ pytest.skip("SYCL device memory leak check requires the level zero sysman")
339
+
340
+ _kfold_function_template(
341
+ DummyEstimator,
342
+ dataframe,
343
+ data_shape,
344
+ queue,
345
+ func,
346
+ dtype,
347
+ )
@@ -0,0 +1,269 @@
1
+ # ===============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import sklearnex
18
+ from daal4py.sklearn._utils import daal_check_version
19
+
20
+ # General use of patch_sklearn and unpatch_sklearn in pytest is not recommended.
21
+ # It changes global state and can impact the operation of other tests. This file
22
+ # specifically tests patch_sklearn and unpatch_sklearn and is exempt from this.
23
+ # If sklearnex patching is necessary in testing, use the 'with_sklearnex' pytest
24
+ # fixture.
25
+
26
+
27
+ def test_monkey_patching():
28
+ _tokens = sklearnex.get_patch_names()
29
+ _values = sklearnex.get_patch_map().values()
30
+ _classes = list()
31
+
32
+ for v in _values:
33
+ for c in v:
34
+ _classes.append(c[0])
35
+
36
+ try:
37
+ sklearnex.patch_sklearn()
38
+
39
+ for i, _ in enumerate(_tokens):
40
+ t = _tokens[i]
41
+ p = _classes[i][0]
42
+ n = _classes[i][1]
43
+
44
+ class_module = getattr(p, n).__module__
45
+ assert class_module.startswith("daal4py") or class_module.startswith(
46
+ "sklearnex"
47
+ ), "Patching has completed with error."
48
+
49
+ for i, _ in enumerate(_tokens):
50
+ t = _tokens[i]
51
+ p = _classes[i][0]
52
+ n = _classes[i][1]
53
+
54
+ sklearnex.unpatch_sklearn(t)
55
+ sklearn_class = getattr(p, n, None)
56
+ if sklearn_class is not None:
57
+ sklearn_class = sklearn_class.__module__
58
+ assert sklearn_class is None or sklearn_class.startswith(
59
+ "sklearn"
60
+ ), "Unpatching has completed with error."
61
+
62
+ finally:
63
+ sklearnex.unpatch_sklearn()
64
+
65
+ try:
66
+ for i, _ in enumerate(_tokens):
67
+ t = _tokens[i]
68
+ p = _classes[i][0]
69
+ n = _classes[i][1]
70
+
71
+ sklearn_class = getattr(p, n, None)
72
+ if sklearn_class is not None:
73
+ sklearn_class = sklearn_class.__module__
74
+ assert sklearn_class is None or sklearn_class.startswith(
75
+ "sklearn"
76
+ ), "Unpatching has completed with error."
77
+
78
+ finally:
79
+ sklearnex.unpatch_sklearn()
80
+
81
+ try:
82
+ for i, _ in enumerate(_tokens):
83
+ t = _tokens[i]
84
+ p = _classes[i][0]
85
+ n = _classes[i][1]
86
+
87
+ sklearnex.patch_sklearn(t)
88
+
89
+ class_module = getattr(p, n).__module__
90
+ assert class_module.startswith("daal4py") or class_module.startswith(
91
+ "sklearnex"
92
+ ), "Patching has completed with error."
93
+ finally:
94
+ sklearnex.unpatch_sklearn()
95
+
96
+
97
+ def test_patch_by_list_simple():
98
+ try:
99
+ sklearnex.patch_sklearn(["LogisticRegression"])
100
+
101
+ from sklearn.ensemble import RandomForestRegressor
102
+ from sklearn.linear_model import LogisticRegression
103
+ from sklearn.neighbors import KNeighborsRegressor
104
+ from sklearn.svm import SVC
105
+
106
+ assert RandomForestRegressor.__module__.startswith("sklearn")
107
+ assert KNeighborsRegressor.__module__.startswith("sklearn")
108
+ if daal_check_version((2024, "P", 1)):
109
+ assert LogisticRegression.__module__.startswith("sklearnex")
110
+ else:
111
+ assert LogisticRegression.__module__.startswith("daal4py")
112
+ assert SVC.__module__.startswith("sklearn")
113
+ finally:
114
+ sklearnex.unpatch_sklearn()
115
+
116
+
117
+ def test_patch_by_list_many_estimators():
118
+ try:
119
+ sklearnex.patch_sklearn(["LogisticRegression", "SVC"])
120
+
121
+ from sklearn.ensemble import RandomForestRegressor
122
+ from sklearn.linear_model import LogisticRegression
123
+ from sklearn.neighbors import KNeighborsRegressor
124
+ from sklearn.svm import SVC
125
+
126
+ assert RandomForestRegressor.__module__.startswith("sklearn")
127
+ assert KNeighborsRegressor.__module__.startswith("sklearn")
128
+ if daal_check_version((2024, "P", 1)):
129
+ assert LogisticRegression.__module__.startswith("sklearnex")
130
+ else:
131
+ assert LogisticRegression.__module__.startswith("daal4py")
132
+ assert SVC.__module__.startswith("daal4py") or SVC.__module__.startswith(
133
+ "sklearnex"
134
+ )
135
+
136
+ finally:
137
+ sklearnex.unpatch_sklearn()
138
+
139
+
140
+ def test_unpatch_by_list_many_estimators():
141
+ try:
142
+ sklearnex.patch_sklearn()
143
+
144
+ from sklearn.ensemble import RandomForestRegressor
145
+ from sklearn.linear_model import LogisticRegression
146
+ from sklearn.neighbors import KNeighborsRegressor
147
+ from sklearn.svm import SVC
148
+
149
+ assert RandomForestRegressor.__module__.startswith("sklearnex")
150
+ assert KNeighborsRegressor.__module__.startswith(
151
+ "daal4py"
152
+ ) or KNeighborsRegressor.__module__.startswith("sklearnex")
153
+ if daal_check_version((2024, "P", 1)):
154
+ assert LogisticRegression.__module__.startswith("sklearnex")
155
+ else:
156
+ assert LogisticRegression.__module__.startswith("daal4py")
157
+ assert SVC.__module__.startswith("daal4py") or SVC.__module__.startswith(
158
+ "sklearnex"
159
+ )
160
+
161
+ sklearnex.unpatch_sklearn(["KNeighborsRegressor", "RandomForestRegressor"])
162
+
163
+ from sklearn.ensemble import RandomForestRegressor
164
+ from sklearn.linear_model import LogisticRegression
165
+ from sklearn.neighbors import KNeighborsRegressor
166
+ from sklearn.svm import SVC
167
+
168
+ assert RandomForestRegressor.__module__.startswith("sklearn")
169
+ assert KNeighborsRegressor.__module__.startswith("sklearn")
170
+ if daal_check_version((2024, "P", 1)):
171
+ assert LogisticRegression.__module__.startswith("sklearnex")
172
+ else:
173
+ assert LogisticRegression.__module__.startswith("daal4py")
174
+
175
+ assert SVC.__module__.startswith("daal4py") or SVC.__module__.startswith(
176
+ "sklearnex"
177
+ )
178
+ finally:
179
+ sklearnex.unpatch_sklearn()
180
+
181
+
182
+ def test_patching_checker():
183
+ for name in [None, "SVC", "PCA"]:
184
+ try:
185
+ sklearnex.patch_sklearn(name=name)
186
+ assert sklearnex.sklearn_is_patched(name=name)
187
+
188
+ finally:
189
+ sklearnex.unpatch_sklearn(name=name)
190
+ assert not sklearnex.sklearn_is_patched(name=name)
191
+ try:
192
+ sklearnex.patch_sklearn()
193
+ patching_status_map = sklearnex.sklearn_is_patched(return_map=True)
194
+ assert len(patching_status_map) == len(sklearnex.get_patch_names())
195
+ for status in patching_status_map.values():
196
+ assert status
197
+ finally:
198
+ sklearnex.unpatch_sklearn()
199
+
200
+ patching_status_map = sklearnex.sklearn_is_patched(return_map=True)
201
+ assert len(patching_status_map) == len(sklearnex.get_patch_names())
202
+ for status in patching_status_map.values():
203
+ assert not status
204
+
205
+
206
+ def test_preview_namespace():
207
+ def get_estimators():
208
+ from sklearn.cluster import DBSCAN
209
+ from sklearn.decomposition import PCA
210
+ from sklearn.ensemble import RandomForestClassifier
211
+ from sklearn.linear_model import LinearRegression
212
+ from sklearn.svm import SVC
213
+
214
+ return (
215
+ LinearRegression(),
216
+ PCA(),
217
+ DBSCAN(),
218
+ SVC(),
219
+ RandomForestClassifier(),
220
+ )
221
+
222
+ from sklearnex.dispatcher import _is_preview_enabled
223
+
224
+ try:
225
+ sklearnex.patch_sklearn(preview=True)
226
+
227
+ assert _is_preview_enabled()
228
+
229
+ lr, pca, dbscan, svc, rfc = get_estimators()
230
+ assert "sklearnex" in rfc.__module__
231
+
232
+ if daal_check_version((2023, "P", 100)):
233
+ assert "sklearnex" in lr.__module__
234
+ else:
235
+ assert "daal4py" in lr.__module__
236
+
237
+ assert "sklearnex" in pca.__module__
238
+ assert "sklearnex" in dbscan.__module__
239
+ assert "sklearnex" in svc.__module__
240
+
241
+ finally:
242
+ sklearnex.unpatch_sklearn()
243
+
244
+ # no patching behavior
245
+ lr, pca, dbscan, svc, rfc = get_estimators()
246
+ assert "sklearn." in lr.__module__ and "daal4py" not in lr.__module__
247
+ assert "sklearn." in pca.__module__ and "daal4py" not in pca.__module__
248
+ assert "sklearn." in dbscan.__module__ and "daal4py" not in dbscan.__module__
249
+ assert "sklearn." in svc.__module__ and "daal4py" not in svc.__module__
250
+ assert "sklearn." in rfc.__module__ and "daal4py" not in rfc.__module__
251
+
252
+ # default patching behavior
253
+ try:
254
+ sklearnex.patch_sklearn()
255
+ assert not _is_preview_enabled()
256
+
257
+ lr, pca, dbscan, svc, rfc = get_estimators()
258
+
259
+ if daal_check_version((2023, "P", 100)):
260
+ assert "sklearnex" in lr.__module__
261
+ else:
262
+ assert "daal4py" in lr.__module__
263
+
264
+ assert "sklearnex" in pca.__module__
265
+ assert "sklearnex" in rfc.__module__
266
+ assert "sklearnex" in dbscan.__module__
267
+ assert "sklearnex" in svc.__module__
268
+ finally:
269
+ sklearnex.unpatch_sklearn()