scikit-learn-intelex 2025.0.0__py39-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (278) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-39-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-39-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +242 -0
  10. daal4py/sklearn/_utils.py +241 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +192 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +318 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +196 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +155 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +87 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +118 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +693 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +53 -0
  61. onedal/_device_offload.py +229 -0
  62. onedal/_onedal_py_dpc.cpython-39-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-39-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-39-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +160 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
  70. onedal/cluster/__init__.py +27 -0
  71. onedal/cluster/dbscan.py +110 -0
  72. onedal/cluster/kmeans.py +560 -0
  73. onedal/cluster/kmeans_init.py +115 -0
  74. onedal/cluster/tests/test_dbscan.py +125 -0
  75. onedal/cluster/tests/test_kmeans.py +88 -0
  76. onedal/cluster/tests/test_kmeans_init.py +93 -0
  77. onedal/common/_base.py +38 -0
  78. onedal/common/_estimator_checks.py +47 -0
  79. onedal/common/_mixin.py +62 -0
  80. onedal/common/_policy.py +59 -0
  81. onedal/common/_spmd_policy.py +30 -0
  82. onedal/common/hyperparameters.py +116 -0
  83. onedal/common/tests/test_policy.py +75 -0
  84. onedal/covariance/__init__.py +20 -0
  85. onedal/covariance/covariance.py +125 -0
  86. onedal/covariance/incremental_covariance.py +146 -0
  87. onedal/covariance/tests/test_covariance.py +50 -0
  88. onedal/covariance/tests/test_incremental_covariance.py +122 -0
  89. onedal/datatypes/__init__.py +19 -0
  90. onedal/datatypes/_data_conversion.py +95 -0
  91. onedal/datatypes/tests/test_data.py +235 -0
  92. onedal/decomposition/__init__.py +20 -0
  93. onedal/decomposition/incremental_pca.py +204 -0
  94. onedal/decomposition/pca.py +186 -0
  95. onedal/decomposition/tests/test_incremental_pca.py +198 -0
  96. onedal/ensemble/__init__.py +29 -0
  97. onedal/ensemble/forest.py +720 -0
  98. onedal/ensemble/tests/test_random_forest.py +97 -0
  99. onedal/linear_model/__init__.py +27 -0
  100. onedal/linear_model/incremental_linear_model.py +258 -0
  101. onedal/linear_model/linear_model.py +329 -0
  102. onedal/linear_model/logistic_regression.py +249 -0
  103. onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
  104. onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
  105. onedal/linear_model/tests/test_linear_regression.py +149 -0
  106. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  107. onedal/linear_model/tests/test_ridge.py +95 -0
  108. onedal/neighbors/__init__.py +19 -0
  109. onedal/neighbors/neighbors.py +778 -0
  110. onedal/neighbors/tests/test_knn_classification.py +49 -0
  111. onedal/primitives/__init__.py +27 -0
  112. onedal/primitives/get_tree.py +25 -0
  113. onedal/primitives/kernel_functions.py +153 -0
  114. onedal/primitives/tests/test_kernel_functions.py +159 -0
  115. onedal/spmd/__init__.py +25 -0
  116. onedal/spmd/_base.py +30 -0
  117. onedal/spmd/basic_statistics/__init__.py +20 -0
  118. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  119. onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
  120. onedal/spmd/cluster/__init__.py +28 -0
  121. onedal/spmd/cluster/dbscan.py +23 -0
  122. onedal/spmd/cluster/kmeans.py +56 -0
  123. onedal/spmd/covariance/__init__.py +20 -0
  124. onedal/spmd/covariance/covariance.py +26 -0
  125. onedal/spmd/covariance/incremental_covariance.py +82 -0
  126. onedal/spmd/decomposition/__init__.py +20 -0
  127. onedal/spmd/decomposition/incremental_pca.py +117 -0
  128. onedal/spmd/decomposition/pca.py +26 -0
  129. onedal/spmd/ensemble/__init__.py +19 -0
  130. onedal/spmd/ensemble/forest.py +28 -0
  131. onedal/spmd/linear_model/__init__.py +21 -0
  132. onedal/spmd/linear_model/incremental_linear_model.py +97 -0
  133. onedal/spmd/linear_model/linear_model.py +30 -0
  134. onedal/spmd/linear_model/logistic_regression.py +38 -0
  135. onedal/spmd/neighbors/__init__.py +19 -0
  136. onedal/spmd/neighbors/neighbors.py +75 -0
  137. onedal/svm/__init__.py +19 -0
  138. onedal/svm/svm.py +556 -0
  139. onedal/svm/tests/test_csr_svm.py +351 -0
  140. onedal/svm/tests/test_nusvc.py +204 -0
  141. onedal/svm/tests/test_nusvr.py +210 -0
  142. onedal/svm/tests/test_svc.py +168 -0
  143. onedal/svm/tests/test_svr.py +243 -0
  144. onedal/tests/test_common.py +41 -0
  145. onedal/tests/utils/_dataframes_support.py +168 -0
  146. onedal/tests/utils/_device_selection.py +107 -0
  147. onedal/utils/__init__.py +49 -0
  148. onedal/utils/_array_api.py +91 -0
  149. onedal/utils/validation.py +432 -0
  150. scikit_learn_intelex-2025.0.0.dist-info/LICENSE.txt +202 -0
  151. scikit_learn_intelex-2025.0.0.dist-info/METADATA +231 -0
  152. scikit_learn_intelex-2025.0.0.dist-info/RECORD +278 -0
  153. scikit_learn_intelex-2025.0.0.dist-info/WHEEL +5 -0
  154. scikit_learn_intelex-2025.0.0.dist-info/top_level.txt +3 -0
  155. sklearnex/__init__.py +65 -0
  156. sklearnex/__main__.py +58 -0
  157. sklearnex/_config.py +98 -0
  158. sklearnex/_device_offload.py +121 -0
  159. sklearnex/_utils.py +109 -0
  160. sklearnex/basic_statistics/__init__.py +20 -0
  161. sklearnex/basic_statistics/basic_statistics.py +140 -0
  162. sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
  163. sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
  164. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +384 -0
  165. sklearnex/cluster/__init__.py +20 -0
  166. sklearnex/cluster/dbscan.py +192 -0
  167. sklearnex/cluster/k_means.py +383 -0
  168. sklearnex/cluster/tests/test_dbscan.py +38 -0
  169. sklearnex/cluster/tests/test_kmeans.py +153 -0
  170. sklearnex/conftest.py +73 -0
  171. sklearnex/covariance/__init__.py +19 -0
  172. sklearnex/covariance/incremental_covariance.py +368 -0
  173. sklearnex/covariance/tests/test_incremental_covariance.py +226 -0
  174. sklearnex/decomposition/__init__.py +19 -0
  175. sklearnex/decomposition/pca.py +414 -0
  176. sklearnex/decomposition/tests/test_pca.py +58 -0
  177. sklearnex/dispatcher.py +543 -0
  178. sklearnex/doc/third-party-programs.txt +424 -0
  179. sklearnex/ensemble/__init__.py +29 -0
  180. sklearnex/ensemble/_forest.py +2016 -0
  181. sklearnex/ensemble/tests/test_forest.py +120 -0
  182. sklearnex/glob/__main__.py +72 -0
  183. sklearnex/glob/dispatcher.py +101 -0
  184. sklearnex/linear_model/__init__.py +32 -0
  185. sklearnex/linear_model/coordinate_descent.py +30 -0
  186. sklearnex/linear_model/incremental_linear.py +463 -0
  187. sklearnex/linear_model/incremental_ridge.py +418 -0
  188. sklearnex/linear_model/linear.py +302 -0
  189. sklearnex/linear_model/logistic_path.py +17 -0
  190. sklearnex/linear_model/logistic_regression.py +403 -0
  191. sklearnex/linear_model/ridge.py +24 -0
  192. sklearnex/linear_model/tests/test_incremental_linear.py +203 -0
  193. sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
  194. sklearnex/linear_model/tests/test_linear.py +142 -0
  195. sklearnex/linear_model/tests/test_logreg.py +134 -0
  196. sklearnex/manifold/__init__.py +19 -0
  197. sklearnex/manifold/t_sne.py +21 -0
  198. sklearnex/manifold/tests/test_tsne.py +26 -0
  199. sklearnex/metrics/__init__.py +23 -0
  200. sklearnex/metrics/pairwise.py +22 -0
  201. sklearnex/metrics/ranking.py +20 -0
  202. sklearnex/metrics/tests/test_metrics.py +39 -0
  203. sklearnex/model_selection/__init__.py +21 -0
  204. sklearnex/model_selection/split.py +22 -0
  205. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  206. sklearnex/neighbors/__init__.py +27 -0
  207. sklearnex/neighbors/_lof.py +231 -0
  208. sklearnex/neighbors/common.py +310 -0
  209. sklearnex/neighbors/knn_classification.py +226 -0
  210. sklearnex/neighbors/knn_regression.py +203 -0
  211. sklearnex/neighbors/knn_unsupervised.py +170 -0
  212. sklearnex/neighbors/tests/test_neighbors.py +80 -0
  213. sklearnex/preview/__init__.py +17 -0
  214. sklearnex/preview/covariance/__init__.py +19 -0
  215. sklearnex/preview/covariance/covariance.py +133 -0
  216. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  217. sklearnex/preview/decomposition/__init__.py +19 -0
  218. sklearnex/preview/decomposition/incremental_pca.py +228 -0
  219. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  220. sklearnex/preview/linear_model/__init__.py +19 -0
  221. sklearnex/preview/linear_model/ridge.py +419 -0
  222. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  223. sklearnex/spmd/__init__.py +25 -0
  224. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  225. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  226. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  227. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  228. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
  229. sklearnex/spmd/cluster/__init__.py +30 -0
  230. sklearnex/spmd/cluster/dbscan.py +50 -0
  231. sklearnex/spmd/cluster/kmeans.py +21 -0
  232. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  233. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  234. sklearnex/spmd/covariance/__init__.py +20 -0
  235. sklearnex/spmd/covariance/covariance.py +21 -0
  236. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  237. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  238. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  239. sklearnex/spmd/decomposition/__init__.py +20 -0
  240. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  241. sklearnex/spmd/decomposition/pca.py +21 -0
  242. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  243. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  244. sklearnex/spmd/ensemble/__init__.py +19 -0
  245. sklearnex/spmd/ensemble/forest.py +71 -0
  246. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  247. sklearnex/spmd/linear_model/__init__.py +21 -0
  248. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  249. sklearnex/spmd/linear_model/linear_model.py +21 -0
  250. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  251. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
  252. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  253. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +166 -0
  254. sklearnex/spmd/neighbors/__init__.py +19 -0
  255. sklearnex/spmd/neighbors/neighbors.py +25 -0
  256. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  257. sklearnex/svm/__init__.py +29 -0
  258. sklearnex/svm/_common.py +328 -0
  259. sklearnex/svm/nusvc.py +332 -0
  260. sklearnex/svm/nusvr.py +148 -0
  261. sklearnex/svm/svc.py +360 -0
  262. sklearnex/svm/svr.py +149 -0
  263. sklearnex/svm/tests/test_svm.py +93 -0
  264. sklearnex/tests/_utils.py +328 -0
  265. sklearnex/tests/_utils_spmd.py +198 -0
  266. sklearnex/tests/test_common.py +54 -0
  267. sklearnex/tests/test_config.py +43 -0
  268. sklearnex/tests/test_memory_usage.py +291 -0
  269. sklearnex/tests/test_monkeypatch.py +276 -0
  270. sklearnex/tests/test_n_jobs_support.py +103 -0
  271. sklearnex/tests/test_parallel.py +48 -0
  272. sklearnex/tests/test_patching.py +385 -0
  273. sklearnex/tests/test_run_to_run_stability.py +296 -0
  274. sklearnex/utils/__init__.py +19 -0
  275. sklearnex/utils/_array_api.py +82 -0
  276. sklearnex/utils/parallel.py +59 -0
  277. sklearnex/utils/tests/test_finite.py +89 -0
  278. sklearnex/utils/validation.py +17 -0
@@ -0,0 +1,291 @@
1
+ # ==============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import gc
18
+ import logging
19
+ import os
20
+ import tracemalloc
21
+ import types
22
+ import warnings
23
+ from inspect import isclass
24
+
25
+ import numpy as np
26
+ import pandas as pd
27
+ import pytest
28
+ from scipy.stats import pearsonr
29
+ from sklearn.base import BaseEstimator, clone
30
+ from sklearn.datasets import make_classification
31
+ from sklearn.model_selection import KFold
32
+
33
+ from onedal import _is_dpc_backend
34
+ from onedal.tests.utils._dataframes_support import (
35
+ _convert_to_dataframe,
36
+ get_dataframes_and_queues,
37
+ )
38
+ from onedal.tests.utils._device_selection import get_queues, is_dpctl_available
39
+ from sklearnex import config_context
40
+ from sklearnex.tests._utils import PATCHED_FUNCTIONS, PATCHED_MODELS, SPECIAL_INSTANCES
41
+ from sklearnex.utils._array_api import get_namespace
42
+
43
+ if _is_dpc_backend:
44
+ from onedal import _backend
45
+
46
+
47
+ CPU_SKIP_LIST = (
48
+ "TSNE", # too slow for using in testing on common data size
49
+ "config_context", # does not malloc
50
+ "get_config", # does not malloc
51
+ "set_config", # does not malloc
52
+ "SVC(probability=True)", # memory leak fortran numpy (investigate _fit_proba)
53
+ "NuSVC(probability=True)", # memory leak fortran numpy (investigate _fit_proba)
54
+ "IncrementalEmpiricalCovariance", # dataframe_f issues
55
+ "IncrementalLinearRegression", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
56
+ "IncrementalPCA", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
57
+ "IncrementalRidge", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
58
+ "LogisticRegression(solver='newton-cg')", # memory leak fortran (1000, 100)
59
+ )
60
+
61
+ GPU_SKIP_LIST = (
62
+ "TSNE", # too slow for using in testing on common data size
63
+ "RandomForestRegressor", # too slow for using in testing on common data size
64
+ "KMeans", # does not support GPU offloading
65
+ "config_context", # does not malloc
66
+ "get_config", # does not malloc
67
+ "set_config", # does not malloc
68
+ "Ridge", # does not support GPU offloading (fails silently)
69
+ "ElasticNet", # does not support GPU offloading (fails silently)
70
+ "Lasso", # does not support GPU offloading (fails silently)
71
+ "SVR", # does not support GPU offloading (fails silently)
72
+ "NuSVR", # does not support GPU offloading (fails silently)
73
+ "NuSVC", # does not support GPU offloading (fails silently)
74
+ "LogisticRegression", # default parameters not supported, see solver=newton-cg
75
+ "NuSVC(probability=True)", # does not support GPU offloading (fails silently)
76
+ "IncrementalLinearRegression", # issue with potrf with the specific dataset
77
+ "LinearRegression", # issue with potrf with the specific dataset
78
+ )
79
+
80
+
81
+ def gen_functions(functions):
82
+ func_dict = functions.copy()
83
+
84
+ roc_auc_score = func_dict.pop("roc_auc_score")
85
+ func_dict["roc_auc_score"] = lambda x, y: roc_auc_score(y, y)
86
+
87
+ pairwise_distances = func_dict.pop("pairwise_distances")
88
+ func_dict["pairwise_distances(metric='cosine')"] = lambda x, y: pairwise_distances(
89
+ x, metric="cosine"
90
+ )
91
+ func_dict["pairwise_distances(metric='correlation')"] = (
92
+ lambda x, y: pairwise_distances(x, metric="correlation")
93
+ )
94
+
95
+ _assert_all_finite = func_dict.pop("_assert_all_finite")
96
+ func_dict["_assert_all_finite"] = lambda x, y: [
97
+ _assert_all_finite(x),
98
+ _assert_all_finite(y),
99
+ ]
100
+ return func_dict
101
+
102
+
103
+ FUNCTIONS = gen_functions(PATCHED_FUNCTIONS)
104
+
105
+ CPU_ESTIMATORS = {
106
+ k: v
107
+ for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES, **FUNCTIONS}.items()
108
+ if not k in CPU_SKIP_LIST
109
+ }
110
+
111
+ GPU_ESTIMATORS = {
112
+ k: v
113
+ for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES}.items()
114
+ if not k in GPU_SKIP_LIST
115
+ }
116
+
117
+ data_shapes = [
118
+ pytest.param((1000, 100), id="(1000, 100)"),
119
+ pytest.param((2000, 50), id="(2000, 50)"),
120
+ ]
121
+
122
+ EXTRA_MEMORY_THRESHOLD = 0.15
123
+ N_SPLITS = 10
124
+ ORDER_DICT = {"F": np.asfortranarray, "C": np.ascontiguousarray}
125
+
126
+
127
+ def gen_clsf_data(n_samples, n_features):
128
+ data, label = make_classification(
129
+ n_classes=2, n_samples=n_samples, n_features=n_features, random_state=777
130
+ )
131
+ return (
132
+ data,
133
+ label,
134
+ data.size * data.dtype.itemsize + label.size * label.dtype.itemsize,
135
+ )
136
+
137
+
138
+ def get_traced_memory(queue=None):
139
+ if _is_dpc_backend and queue and queue.sycl_device.is_gpu:
140
+ return _backend.get_used_memory(queue)
141
+ else:
142
+ return tracemalloc.get_traced_memory()[0]
143
+
144
+
145
+ def take(x, index, axis=0, queue=None):
146
+ xp, array_api = get_namespace(x)
147
+ if array_api:
148
+ return xp.take(x, xp.asarray(index, device=queue), axis=axis)
149
+ else:
150
+ return x.take(index, axis=axis)
151
+
152
+
153
+ def split_train_inference(kf, x, y, estimator, queue=None):
154
+ mem_tracks = []
155
+ for train_index, test_index in kf.split(x):
156
+ x_train = take(x, train_index, queue=queue)
157
+ y_train = take(y, train_index, queue=queue)
158
+ x_test = take(x, test_index, queue=queue)
159
+ y_test = take(y, test_index, queue=queue)
160
+
161
+ if isclass(estimator) and issubclass(estimator, BaseEstimator):
162
+ alg = estimator()
163
+ flag = True
164
+ elif isinstance(estimator, BaseEstimator):
165
+ alg = clone(estimator)
166
+ flag = True
167
+ else:
168
+ flag = False
169
+
170
+ if flag:
171
+ alg.fit(x_train, y_train)
172
+ if hasattr(alg, "predict"):
173
+ alg.predict(x_test)
174
+ elif hasattr(alg, "transform"):
175
+ alg.transform(x_test)
176
+ elif hasattr(alg, "kneighbors"):
177
+ alg.kneighbors(x_test)
178
+ del alg
179
+ else:
180
+ estimator(x_train, y_train)
181
+
182
+ del x_train, x_test, y_train, y_test, flag
183
+ mem_tracks.append(get_traced_memory(queue))
184
+ return mem_tracks
185
+
186
+
187
+ def _kfold_function_template(estimator, dataframe, data_shape, queue=None, func=None):
188
+ tracemalloc.start()
189
+
190
+ n_samples, n_features = data_shape
191
+ X, y, data_memory_size = gen_clsf_data(n_samples, n_features)
192
+ kf = KFold(n_splits=N_SPLITS)
193
+ if func:
194
+ X = func(X)
195
+
196
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
197
+ y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
198
+
199
+ mem_before = get_traced_memory(queue)
200
+ mem_tracks = split_train_inference(kf, X, y, estimator, queue=queue)
201
+ mem_iter_diffs = np.array(mem_tracks[1:]) - np.array(mem_tracks[:-1])
202
+ mem_incr_mean, mem_incr_std = mem_iter_diffs.mean(), mem_iter_diffs.std()
203
+ mem_incr_mean, mem_incr_std = round(mem_incr_mean), round(mem_incr_std)
204
+ with warnings.catch_warnings():
205
+ # In the case that the memory usage is constant, this will raise
206
+ # a ConstantInputWarning error in pearsonr from scipy, this can
207
+ # be ignored.
208
+ warnings.filterwarnings(
209
+ "ignore",
210
+ message="An input array is constant; the correlation coefficient is not defined",
211
+ )
212
+ mem_iter_corr, _ = pearsonr(mem_tracks, list(range(len(mem_tracks))))
213
+
214
+ if mem_iter_corr > 0.95:
215
+ logging.warning(
216
+ "Memory usage is steadily increasing with iterations "
217
+ "(Pearson correlation coefficient between "
218
+ f"memory tracks and iterations is {mem_iter_corr})\n"
219
+ "Memory usage increase per iteration: "
220
+ f"{mem_incr_mean}±{mem_incr_std} bytes"
221
+ )
222
+ mem_before_gc = get_traced_memory(queue)
223
+ mem_diff = mem_before_gc - mem_before
224
+ if isinstance(estimator, BaseEstimator):
225
+ name = str(estimator)
226
+ else:
227
+ name = estimator.__name__
228
+
229
+ message = (
230
+ "Size of extra allocated memory {} using garbage collector "
231
+ f"is greater than {EXTRA_MEMORY_THRESHOLD * 100}% of input data"
232
+ f"\n\tAlgorithm: {name}"
233
+ f"\n\tInput data size: {data_memory_size} bytes"
234
+ "\n\tExtra allocated memory size: {} bytes"
235
+ " / {} %"
236
+ )
237
+ if mem_diff >= EXTRA_MEMORY_THRESHOLD * data_memory_size:
238
+ logging.warning(
239
+ message.format(
240
+ "before", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
241
+ )
242
+ )
243
+ gc.collect()
244
+ mem_after = get_traced_memory(queue)
245
+ tracemalloc.stop()
246
+ mem_diff = mem_after - mem_before
247
+
248
+ # GPU offloading with SYCL contains a program/kernel cache which should
249
+ # be controllable via a KernelProgramCache object in the SYCL context.
250
+ # The programs and kernels are stored on the GPU, but cannot be cleared
251
+ # as this class is not available for access in all oneDAL DPC++ runtimes.
252
+ # Therefore, until this is implemented this test must be skipped for gpu
253
+ # as it looks like a memory leak (at least there is no way to discern a
254
+ # leak on the first run).
255
+ if queue is None or queue.sycl_device.is_cpu:
256
+ assert mem_diff < EXTRA_MEMORY_THRESHOLD * data_memory_size, message.format(
257
+ "after", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
258
+ )
259
+
260
+
261
+ @pytest.mark.parametrize("order", ["F", "C"])
262
+ @pytest.mark.parametrize(
263
+ "dataframe,queue", get_dataframes_and_queues("numpy,pandas,dpctl", "cpu")
264
+ )
265
+ @pytest.mark.parametrize("estimator", CPU_ESTIMATORS.keys())
266
+ @pytest.mark.parametrize("data_shape", data_shapes)
267
+ def test_memory_leaks(estimator, dataframe, queue, order, data_shape):
268
+ func = ORDER_DICT[order]
269
+ if estimator == "_assert_all_finite" and queue is not None:
270
+ pytest.skip(f"{estimator} is not designed for device offloading")
271
+
272
+ _kfold_function_template(
273
+ CPU_ESTIMATORS[estimator], dataframe, data_shape, queue, func
274
+ )
275
+
276
+
277
+ @pytest.mark.skipif(
278
+ os.getenv("ZES_ENABLE_SYSMAN") is None or not is_dpctl_available("gpu"),
279
+ reason="SYCL device memory leak check requires the level zero sysman",
280
+ )
281
+ @pytest.mark.parametrize("queue", get_queues("gpu"))
282
+ @pytest.mark.parametrize("estimator", GPU_ESTIMATORS.keys())
283
+ @pytest.mark.parametrize("order", ["F", "C"])
284
+ @pytest.mark.parametrize("data_shape", data_shapes)
285
+ def test_gpu_memory_leaks(estimator, queue, order, data_shape):
286
+ func = ORDER_DICT[order]
287
+ if "ExtraTrees" in estimator and data_shape == (2000, 50):
288
+ pytest.skip("Avoid a segmentation fault in Extra Trees algorithms")
289
+
290
+ with config_context(target_offload=queue):
291
+ _kfold_function_template(GPU_ESTIMATORS[estimator], None, data_shape, queue, func)
@@ -0,0 +1,276 @@
1
+ # ===============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import sklearnex
18
+ from daal4py.sklearn._utils import daal_check_version
19
+
20
+ # General use of patch_sklearn and unpatch_sklearn in pytest is not recommended.
21
+ # It changes global state and can impact the operation of other tests. This file
22
+ # specifically tests patch_sklearn and unpatch_sklearn and is exempt from this.
23
+ # If sklearnex patching is necessary in testing, use the 'with_sklearnex' pytest
24
+ # fixture.
25
+
26
+
27
+ def test_monkey_patching():
28
+ _tokens = sklearnex.get_patch_names()
29
+ _values = sklearnex.get_patch_map().values()
30
+ _classes = list()
31
+
32
+ for v in _values:
33
+ for c in v:
34
+ _classes.append(c[0])
35
+
36
+ try:
37
+ sklearnex.patch_sklearn()
38
+
39
+ for i, _ in enumerate(_tokens):
40
+ t = _tokens[i]
41
+ p = _classes[i][0]
42
+ n = _classes[i][1]
43
+
44
+ class_module = getattr(p, n).__module__
45
+ assert class_module.startswith("daal4py") or class_module.startswith(
46
+ "sklearnex"
47
+ ), "Patching has completed with error."
48
+
49
+ for i, _ in enumerate(_tokens):
50
+ t = _tokens[i]
51
+ p = _classes[i][0]
52
+ n = _classes[i][1]
53
+
54
+ sklearnex.unpatch_sklearn(t)
55
+ sklearn_class = getattr(p, n, None)
56
+ if sklearn_class is not None:
57
+ sklearn_class = sklearn_class.__module__
58
+ assert sklearn_class is None or sklearn_class.startswith(
59
+ "sklearn"
60
+ ), "Unpatching has completed with error."
61
+
62
+ finally:
63
+ sklearnex.unpatch_sklearn()
64
+
65
+ try:
66
+ for i, _ in enumerate(_tokens):
67
+ t = _tokens[i]
68
+ p = _classes[i][0]
69
+ n = _classes[i][1]
70
+
71
+ sklearn_class = getattr(p, n, None)
72
+ if sklearn_class is not None:
73
+ sklearn_class = sklearn_class.__module__
74
+ assert sklearn_class is None or sklearn_class.startswith(
75
+ "sklearn"
76
+ ), "Unpatching has completed with error."
77
+
78
+ finally:
79
+ sklearnex.unpatch_sklearn()
80
+
81
+ try:
82
+ for i, _ in enumerate(_tokens):
83
+ t = _tokens[i]
84
+ p = _classes[i][0]
85
+ n = _classes[i][1]
86
+
87
+ sklearnex.patch_sklearn(t)
88
+
89
+ class_module = getattr(p, n).__module__
90
+ assert class_module.startswith("daal4py") or class_module.startswith(
91
+ "sklearnex"
92
+ ), "Patching has completed with error."
93
+ finally:
94
+ sklearnex.unpatch_sklearn()
95
+
96
+
97
+ def test_patch_by_list_simple():
98
+ try:
99
+ sklearnex.patch_sklearn(["LogisticRegression"])
100
+
101
+ from sklearn.ensemble import RandomForestRegressor
102
+ from sklearn.linear_model import LogisticRegression
103
+ from sklearn.neighbors import KNeighborsRegressor
104
+ from sklearn.svm import SVC
105
+
106
+ assert RandomForestRegressor.__module__.startswith("sklearn")
107
+ assert KNeighborsRegressor.__module__.startswith("sklearn")
108
+ if daal_check_version((2024, "P", 1)):
109
+ assert LogisticRegression.__module__.startswith("sklearnex")
110
+ else:
111
+ assert LogisticRegression.__module__.startswith("daal4py")
112
+ assert SVC.__module__.startswith("sklearn")
113
+ finally:
114
+ sklearnex.unpatch_sklearn()
115
+
116
+
117
+ def test_patch_by_list_many_estimators():
118
+ try:
119
+ sklearnex.patch_sklearn(["LogisticRegression", "SVC"])
120
+
121
+ from sklearn.ensemble import RandomForestRegressor
122
+ from sklearn.linear_model import LogisticRegression
123
+ from sklearn.neighbors import KNeighborsRegressor
124
+ from sklearn.svm import SVC
125
+
126
+ assert RandomForestRegressor.__module__.startswith("sklearn")
127
+ assert KNeighborsRegressor.__module__.startswith("sklearn")
128
+ if daal_check_version((2024, "P", 1)):
129
+ assert LogisticRegression.__module__.startswith("sklearnex")
130
+ else:
131
+ assert LogisticRegression.__module__.startswith("daal4py")
132
+ assert SVC.__module__.startswith("daal4py") or SVC.__module__.startswith(
133
+ "sklearnex"
134
+ )
135
+
136
+ finally:
137
+ sklearnex.unpatch_sklearn()
138
+
139
+
140
+ def test_unpatch_by_list_many_estimators():
141
+ try:
142
+ sklearnex.patch_sklearn()
143
+
144
+ from sklearn.ensemble import RandomForestRegressor
145
+ from sklearn.linear_model import LogisticRegression
146
+ from sklearn.neighbors import KNeighborsRegressor
147
+ from sklearn.svm import SVC
148
+
149
+ assert RandomForestRegressor.__module__.startswith("sklearnex")
150
+ assert KNeighborsRegressor.__module__.startswith(
151
+ "daal4py"
152
+ ) or KNeighborsRegressor.__module__.startswith("sklearnex")
153
+ if daal_check_version((2024, "P", 1)):
154
+ assert LogisticRegression.__module__.startswith("sklearnex")
155
+ else:
156
+ assert LogisticRegression.__module__.startswith("daal4py")
157
+ assert SVC.__module__.startswith("daal4py") or SVC.__module__.startswith(
158
+ "sklearnex"
159
+ )
160
+
161
+ sklearnex.unpatch_sklearn(["KNeighborsRegressor", "RandomForestRegressor"])
162
+
163
+ from sklearn.ensemble import RandomForestRegressor
164
+ from sklearn.linear_model import LogisticRegression
165
+ from sklearn.neighbors import KNeighborsRegressor
166
+ from sklearn.svm import SVC
167
+
168
+ assert RandomForestRegressor.__module__.startswith("sklearn")
169
+ assert KNeighborsRegressor.__module__.startswith("sklearn")
170
+ if daal_check_version((2024, "P", 1)):
171
+ assert LogisticRegression.__module__.startswith("sklearnex")
172
+ else:
173
+ assert LogisticRegression.__module__.startswith("daal4py")
174
+
175
+ assert SVC.__module__.startswith("daal4py") or SVC.__module__.startswith(
176
+ "sklearnex"
177
+ )
178
+ finally:
179
+ sklearnex.unpatch_sklearn()
180
+
181
+
182
+ def test_patching_checker():
183
+ for name in [None, "SVC", "PCA"]:
184
+ try:
185
+ sklearnex.patch_sklearn(name=name)
186
+ assert sklearnex.sklearn_is_patched(name=name)
187
+
188
+ finally:
189
+ sklearnex.unpatch_sklearn(name=name)
190
+ assert not sklearnex.sklearn_is_patched(name=name)
191
+ try:
192
+ sklearnex.patch_sklearn()
193
+ patching_status_map = sklearnex.sklearn_is_patched(return_map=True)
194
+ assert len(patching_status_map) == len(sklearnex.get_patch_names())
195
+ for status in patching_status_map.values():
196
+ assert status
197
+ finally:
198
+ sklearnex.unpatch_sklearn()
199
+
200
+ patching_status_map = sklearnex.sklearn_is_patched(return_map=True)
201
+ assert len(patching_status_map) == len(sklearnex.get_patch_names())
202
+ for status in patching_status_map.values():
203
+ assert not status
204
+
205
+
206
+ def test_preview_namespace():
207
+ def get_estimators():
208
+ from sklearn.cluster import DBSCAN
209
+ from sklearn.decomposition import PCA
210
+ from sklearn.ensemble import RandomForestClassifier
211
+ from sklearn.linear_model import LinearRegression, Ridge
212
+ from sklearn.svm import SVC
213
+
214
+ return (
215
+ Ridge(),
216
+ LinearRegression(),
217
+ PCA(),
218
+ DBSCAN(),
219
+ SVC(),
220
+ RandomForestClassifier(),
221
+ )
222
+
223
+ from sklearnex.dispatcher import _is_preview_enabled
224
+
225
+ try:
226
+ sklearnex.patch_sklearn(preview=True)
227
+
228
+ assert _is_preview_enabled()
229
+
230
+ ridge, lr, pca, dbscan, svc, rfc = get_estimators()
231
+ assert "sklearnex" in rfc.__module__
232
+
233
+ if daal_check_version((2024, "P", 600)):
234
+ assert "sklearnex.preview" in ridge.__module__
235
+
236
+ if daal_check_version((2023, "P", 100)):
237
+ assert "sklearnex" in lr.__module__
238
+ else:
239
+ assert "daal4py" in lr.__module__
240
+
241
+ assert "sklearnex" in pca.__module__
242
+ assert "sklearnex" in dbscan.__module__
243
+ assert "sklearnex" in svc.__module__
244
+
245
+ finally:
246
+ sklearnex.unpatch_sklearn()
247
+
248
+ # no patching behavior
249
+ ridge, lr, pca, dbscan, svc, rfc = get_estimators()
250
+ assert "sklearn." in ridge.__module__ and "daal4py" not in ridge.__module__
251
+ assert "sklearn." in lr.__module__ and "daal4py" not in lr.__module__
252
+ assert "sklearn." in pca.__module__ and "daal4py" not in pca.__module__
253
+ assert "sklearn." in dbscan.__module__ and "daal4py" not in dbscan.__module__
254
+ assert "sklearn." in svc.__module__ and "daal4py" not in svc.__module__
255
+ assert "sklearn." in rfc.__module__ and "daal4py" not in rfc.__module__
256
+
257
+ # default patching behavior
258
+ try:
259
+ sklearnex.patch_sklearn()
260
+ assert not _is_preview_enabled()
261
+
262
+ ridge, lr, pca, dbscan, svc, rfc = get_estimators()
263
+
264
+ assert "daal4py" in ridge.__module__
265
+
266
+ if daal_check_version((2023, "P", 100)):
267
+ assert "sklearnex" in lr.__module__
268
+ else:
269
+ assert "daal4py" in lr.__module__
270
+
271
+ assert "sklearnex" in pca.__module__
272
+ assert "sklearnex" in rfc.__module__
273
+ assert "sklearnex" in dbscan.__module__
274
+ assert "sklearnex" in svc.__module__
275
+ finally:
276
+ sklearnex.unpatch_sklearn()
@@ -0,0 +1,103 @@
1
+ # ==============================================================================
2
+ # Copyright 2023 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import inspect
18
+ import logging
19
+ from multiprocessing import cpu_count
20
+
21
+ import pytest
22
+ from sklearn.base import BaseEstimator
23
+ from sklearn.datasets import make_classification
24
+
25
+ from sklearnex.decomposition import PCA
26
+ from sklearnex.dispatcher import get_patch_map
27
+ from sklearnex.svm import SVC, NuSVC
28
+
29
+ ESTIMATORS = set(
30
+ filter(
31
+ lambda x: inspect.isclass(x) and issubclass(x, BaseEstimator),
32
+ [value[0][0][2] for value in get_patch_map().values()],
33
+ )
34
+ )
35
+
36
+ X, Y = make_classification(n_samples=40, n_features=4, random_state=42)
37
+
38
+
39
+ @pytest.mark.parametrize("estimator_class", ESTIMATORS)
40
+ @pytest.mark.parametrize("n_jobs", [None, -1, 1, 2])
41
+ def test_n_jobs_support(caplog, estimator_class, n_jobs):
42
+ def check_estimator_doc(estimator):
43
+ if estimator.__doc__ is not None:
44
+ assert "n_jobs" in estimator.__doc__
45
+
46
+ def check_n_jobs_entry_in_logs(caplog, function_name, n_jobs):
47
+ for rec in caplog.records:
48
+ if function_name in rec.message and "threads" in rec.message:
49
+ expected_n_jobs = n_jobs if n_jobs > 0 else cpu_count() + 1 + n_jobs
50
+ logging.info(f"{function_name}: setting {expected_n_jobs} threads")
51
+ if f"{function_name}: setting {expected_n_jobs} threads" in rec.message:
52
+ return True
53
+ # False if n_jobs is set and not found in logs
54
+ return n_jobs is None
55
+
56
+ def check_method(*args, method, caplog):
57
+ method(*args)
58
+ assert check_n_jobs_entry_in_logs(caplog, method.__name__, n_jobs)
59
+
60
+ def check_methods_decoration(estimator):
61
+ funcs = {
62
+ i: getattr(estimator, i)
63
+ for i in dir(estimator)
64
+ if hasattr(estimator, i) and callable(getattr(estimator, i))
65
+ }
66
+
67
+ for func_name, func in funcs.items():
68
+ assert hasattr(func, "__onedal_n_jobs_decorated__") == (
69
+ func_name in estimator._n_jobs_supported_onedal_methods
70
+ ), f"{estimator}.{func_name} n_jobs decoration does not match {estimator} n_jobs supported methods"
71
+
72
+ caplog.set_level(logging.DEBUG, logger="sklearnex")
73
+ estimator_kwargs = {"n_jobs": n_jobs}
74
+ # by default, [Nu]SVC.predict_proba is restricted by @available_if decorator
75
+ if estimator_class in [SVC, NuSVC]:
76
+ estimator_kwargs["probability"] = True
77
+ # explicitly request oneDAL's PCA-Covariance algorithm
78
+ if estimator_class == PCA:
79
+ estimator_kwargs["svd_solver"] = "covariance_eigh"
80
+ estimator_instance = estimator_class(**estimator_kwargs)
81
+ # check `n_jobs` parameter doc entry
82
+ check_estimator_doc(estimator_class)
83
+ check_estimator_doc(estimator_instance)
84
+ # check `n_jobs` log entry for supported methods
85
+ # `fit` call is required before other methods
86
+ check_method(X, Y, method=estimator_instance.fit, caplog=caplog)
87
+ for method_name in estimator_instance._n_jobs_supported_onedal_methods:
88
+ if method_name == "fit":
89
+ continue
90
+ method = getattr(estimator_instance, method_name)
91
+ argdict = inspect.signature(method).parameters
92
+ argnum = len(
93
+ [i for i in argdict if argdict[i].default == inspect.Parameter.empty]
94
+ )
95
+ if argnum == 0:
96
+ check_method(method=method, caplog=caplog)
97
+ elif argnum == 1:
98
+ check_method(X, method=method, caplog=caplog)
99
+ else:
100
+ check_method(X, Y, method=method, caplog=caplog)
101
+ # check if correct methods were decorated
102
+ check_methods_decoration(estimator_class)
103
+ check_methods_decoration(estimator_instance)