scikit-learn-intelex 2025.1.0__py311-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (280) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-311-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-311-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +248 -0
  10. daal4py/sklearn/_utils.py +245 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +236 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +693 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +54 -0
  61. onedal/_device_offload.py +222 -0
  62. onedal/_onedal_py_dpc.cpython-311-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-311-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-311-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +160 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
  70. onedal/cluster/__init__.py +27 -0
  71. onedal/cluster/dbscan.py +110 -0
  72. onedal/cluster/kmeans.py +564 -0
  73. onedal/cluster/kmeans_init.py +115 -0
  74. onedal/cluster/tests/test_dbscan.py +125 -0
  75. onedal/cluster/tests/test_kmeans.py +88 -0
  76. onedal/cluster/tests/test_kmeans_init.py +93 -0
  77. onedal/common/_base.py +38 -0
  78. onedal/common/_estimator_checks.py +47 -0
  79. onedal/common/_mixin.py +62 -0
  80. onedal/common/_policy.py +59 -0
  81. onedal/common/_spmd_policy.py +30 -0
  82. onedal/common/hyperparameters.py +125 -0
  83. onedal/common/tests/test_policy.py +76 -0
  84. onedal/covariance/__init__.py +20 -0
  85. onedal/covariance/covariance.py +125 -0
  86. onedal/covariance/incremental_covariance.py +146 -0
  87. onedal/covariance/tests/test_covariance.py +50 -0
  88. onedal/covariance/tests/test_incremental_covariance.py +122 -0
  89. onedal/datatypes/__init__.py +19 -0
  90. onedal/datatypes/_data_conversion.py +154 -0
  91. onedal/datatypes/tests/common.py +126 -0
  92. onedal/datatypes/tests/test_data.py +414 -0
  93. onedal/decomposition/__init__.py +20 -0
  94. onedal/decomposition/incremental_pca.py +204 -0
  95. onedal/decomposition/pca.py +186 -0
  96. onedal/decomposition/tests/test_incremental_pca.py +198 -0
  97. onedal/ensemble/__init__.py +29 -0
  98. onedal/ensemble/forest.py +727 -0
  99. onedal/ensemble/tests/test_random_forest.py +97 -0
  100. onedal/linear_model/__init__.py +27 -0
  101. onedal/linear_model/incremental_linear_model.py +258 -0
  102. onedal/linear_model/linear_model.py +329 -0
  103. onedal/linear_model/logistic_regression.py +249 -0
  104. onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
  105. onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
  106. onedal/linear_model/tests/test_linear_regression.py +250 -0
  107. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  108. onedal/linear_model/tests/test_ridge.py +95 -0
  109. onedal/neighbors/__init__.py +19 -0
  110. onedal/neighbors/neighbors.py +767 -0
  111. onedal/neighbors/tests/test_knn_classification.py +49 -0
  112. onedal/primitives/__init__.py +27 -0
  113. onedal/primitives/get_tree.py +25 -0
  114. onedal/primitives/kernel_functions.py +153 -0
  115. onedal/primitives/tests/test_kernel_functions.py +159 -0
  116. onedal/spmd/__init__.py +25 -0
  117. onedal/spmd/_base.py +30 -0
  118. onedal/spmd/basic_statistics/__init__.py +20 -0
  119. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  120. onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
  121. onedal/spmd/cluster/__init__.py +28 -0
  122. onedal/spmd/cluster/dbscan.py +23 -0
  123. onedal/spmd/cluster/kmeans.py +56 -0
  124. onedal/spmd/covariance/__init__.py +20 -0
  125. onedal/spmd/covariance/covariance.py +26 -0
  126. onedal/spmd/covariance/incremental_covariance.py +82 -0
  127. onedal/spmd/decomposition/__init__.py +20 -0
  128. onedal/spmd/decomposition/incremental_pca.py +117 -0
  129. onedal/spmd/decomposition/pca.py +26 -0
  130. onedal/spmd/ensemble/__init__.py +19 -0
  131. onedal/spmd/ensemble/forest.py +28 -0
  132. onedal/spmd/linear_model/__init__.py +21 -0
  133. onedal/spmd/linear_model/incremental_linear_model.py +97 -0
  134. onedal/spmd/linear_model/linear_model.py +30 -0
  135. onedal/spmd/linear_model/logistic_regression.py +38 -0
  136. onedal/spmd/neighbors/__init__.py +19 -0
  137. onedal/spmd/neighbors/neighbors.py +75 -0
  138. onedal/svm/__init__.py +19 -0
  139. onedal/svm/svm.py +556 -0
  140. onedal/svm/tests/test_csr_svm.py +351 -0
  141. onedal/svm/tests/test_nusvc.py +204 -0
  142. onedal/svm/tests/test_nusvr.py +210 -0
  143. onedal/svm/tests/test_svc.py +176 -0
  144. onedal/svm/tests/test_svr.py +243 -0
  145. onedal/tests/test_common.py +57 -0
  146. onedal/tests/utils/_dataframes_support.py +162 -0
  147. onedal/tests/utils/_device_selection.py +102 -0
  148. onedal/utils/__init__.py +49 -0
  149. onedal/utils/_array_api.py +81 -0
  150. onedal/utils/_dpep_helpers.py +56 -0
  151. onedal/utils/validation.py +440 -0
  152. scikit_learn_intelex-2025.1.0.dist-info/LICENSE.txt +202 -0
  153. scikit_learn_intelex-2025.1.0.dist-info/METADATA +231 -0
  154. scikit_learn_intelex-2025.1.0.dist-info/RECORD +280 -0
  155. scikit_learn_intelex-2025.1.0.dist-info/WHEEL +5 -0
  156. scikit_learn_intelex-2025.1.0.dist-info/top_level.txt +3 -0
  157. sklearnex/__init__.py +66 -0
  158. sklearnex/__main__.py +58 -0
  159. sklearnex/_config.py +116 -0
  160. sklearnex/_device_offload.py +126 -0
  161. sklearnex/_utils.py +132 -0
  162. sklearnex/basic_statistics/__init__.py +20 -0
  163. sklearnex/basic_statistics/basic_statistics.py +230 -0
  164. sklearnex/basic_statistics/incremental_basic_statistics.py +345 -0
  165. sklearnex/basic_statistics/tests/test_basic_statistics.py +270 -0
  166. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +404 -0
  167. sklearnex/cluster/__init__.py +20 -0
  168. sklearnex/cluster/dbscan.py +197 -0
  169. sklearnex/cluster/k_means.py +395 -0
  170. sklearnex/cluster/tests/test_dbscan.py +38 -0
  171. sklearnex/cluster/tests/test_kmeans.py +159 -0
  172. sklearnex/conftest.py +82 -0
  173. sklearnex/covariance/__init__.py +19 -0
  174. sklearnex/covariance/incremental_covariance.py +398 -0
  175. sklearnex/covariance/tests/test_incremental_covariance.py +237 -0
  176. sklearnex/decomposition/__init__.py +19 -0
  177. sklearnex/decomposition/pca.py +425 -0
  178. sklearnex/decomposition/tests/test_pca.py +58 -0
  179. sklearnex/dispatcher.py +543 -0
  180. sklearnex/doc/third-party-programs.txt +424 -0
  181. sklearnex/ensemble/__init__.py +29 -0
  182. sklearnex/ensemble/_forest.py +2029 -0
  183. sklearnex/ensemble/tests/test_forest.py +135 -0
  184. sklearnex/glob/__main__.py +72 -0
  185. sklearnex/glob/dispatcher.py +101 -0
  186. sklearnex/linear_model/__init__.py +32 -0
  187. sklearnex/linear_model/coordinate_descent.py +30 -0
  188. sklearnex/linear_model/incremental_linear.py +482 -0
  189. sklearnex/linear_model/incremental_ridge.py +425 -0
  190. sklearnex/linear_model/linear.py +341 -0
  191. sklearnex/linear_model/logistic_regression.py +413 -0
  192. sklearnex/linear_model/ridge.py +24 -0
  193. sklearnex/linear_model/tests/test_incremental_linear.py +207 -0
  194. sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
  195. sklearnex/linear_model/tests/test_linear.py +167 -0
  196. sklearnex/linear_model/tests/test_logreg.py +134 -0
  197. sklearnex/manifold/__init__.py +19 -0
  198. sklearnex/manifold/t_sne.py +21 -0
  199. sklearnex/manifold/tests/test_tsne.py +26 -0
  200. sklearnex/metrics/__init__.py +23 -0
  201. sklearnex/metrics/pairwise.py +22 -0
  202. sklearnex/metrics/ranking.py +20 -0
  203. sklearnex/metrics/tests/test_metrics.py +39 -0
  204. sklearnex/model_selection/__init__.py +21 -0
  205. sklearnex/model_selection/split.py +22 -0
  206. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  207. sklearnex/neighbors/__init__.py +27 -0
  208. sklearnex/neighbors/_lof.py +236 -0
  209. sklearnex/neighbors/common.py +310 -0
  210. sklearnex/neighbors/knn_classification.py +231 -0
  211. sklearnex/neighbors/knn_regression.py +207 -0
  212. sklearnex/neighbors/knn_unsupervised.py +178 -0
  213. sklearnex/neighbors/tests/test_neighbors.py +82 -0
  214. sklearnex/preview/__init__.py +17 -0
  215. sklearnex/preview/covariance/__init__.py +19 -0
  216. sklearnex/preview/covariance/covariance.py +138 -0
  217. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  218. sklearnex/preview/decomposition/__init__.py +19 -0
  219. sklearnex/preview/decomposition/incremental_pca.py +233 -0
  220. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  221. sklearnex/preview/linear_model/__init__.py +19 -0
  222. sklearnex/preview/linear_model/ridge.py +424 -0
  223. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  224. sklearnex/spmd/__init__.py +25 -0
  225. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  226. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  227. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  228. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  229. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
  230. sklearnex/spmd/cluster/__init__.py +30 -0
  231. sklearnex/spmd/cluster/dbscan.py +50 -0
  232. sklearnex/spmd/cluster/kmeans.py +21 -0
  233. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  234. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  235. sklearnex/spmd/covariance/__init__.py +20 -0
  236. sklearnex/spmd/covariance/covariance.py +21 -0
  237. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  238. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  239. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  240. sklearnex/spmd/decomposition/__init__.py +20 -0
  241. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  242. sklearnex/spmd/decomposition/pca.py +21 -0
  243. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  244. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  245. sklearnex/spmd/ensemble/__init__.py +19 -0
  246. sklearnex/spmd/ensemble/forest.py +71 -0
  247. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  248. sklearnex/spmd/linear_model/__init__.py +21 -0
  249. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  250. sklearnex/spmd/linear_model/linear_model.py +21 -0
  251. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  252. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
  253. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  254. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
  255. sklearnex/spmd/neighbors/__init__.py +19 -0
  256. sklearnex/spmd/neighbors/neighbors.py +25 -0
  257. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  258. sklearnex/svm/__init__.py +29 -0
  259. sklearnex/svm/_common.py +339 -0
  260. sklearnex/svm/nusvc.py +371 -0
  261. sklearnex/svm/nusvr.py +170 -0
  262. sklearnex/svm/svc.py +399 -0
  263. sklearnex/svm/svr.py +167 -0
  264. sklearnex/svm/tests/test_svm.py +93 -0
  265. sklearnex/tests/test_common.py +390 -0
  266. sklearnex/tests/test_config.py +123 -0
  267. sklearnex/tests/test_memory_usage.py +379 -0
  268. sklearnex/tests/test_monkeypatch.py +276 -0
  269. sklearnex/tests/test_n_jobs_support.py +108 -0
  270. sklearnex/tests/test_parallel.py +48 -0
  271. sklearnex/tests/test_patching.py +385 -0
  272. sklearnex/tests/test_run_to_run_stability.py +321 -0
  273. sklearnex/tests/utils/__init__.py +44 -0
  274. sklearnex/tests/utils/base.py +371 -0
  275. sklearnex/tests/utils/spmd.py +198 -0
  276. sklearnex/utils/__init__.py +19 -0
  277. sklearnex/utils/_array_api.py +82 -0
  278. sklearnex/utils/parallel.py +59 -0
  279. sklearnex/utils/tests/test_finite.py +89 -0
  280. sklearnex/utils/validation.py +17 -0
@@ -0,0 +1,123 @@
1
+ # ==============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import sklearn
18
+
19
+ import onedal
20
+ import sklearnex
21
+
22
+
23
+ def test_get_config_contains_sklearn_params():
24
+ skex_config = sklearnex.get_config()
25
+ sk_config = sklearn.get_config()
26
+
27
+ assert all(value in skex_config.keys() for value in sk_config.keys())
28
+
29
+
30
+ def test_set_config_works():
31
+ """Test validates that the config settings were applied correctly by
32
+ set_config.
33
+ """
34
+ # This retrieves the current configuration settings
35
+ # from sklearnex to restore them later.
36
+ default_config = sklearnex.get_config()
37
+
38
+ # These variables define the new configuration settings
39
+ # that will be tested.
40
+ assume_finite = True
41
+ target_offload = "cpu:0"
42
+ allow_fallback_to_host = True
43
+ allow_sklearn_after_onedal = False
44
+
45
+ sklearnex.set_config(
46
+ assume_finite=assume_finite,
47
+ target_offload=target_offload,
48
+ allow_fallback_to_host=allow_fallback_to_host,
49
+ allow_sklearn_after_onedal=allow_sklearn_after_onedal,
50
+ )
51
+
52
+ config = sklearnex.get_config()
53
+ onedal_config = onedal._config._get_config()
54
+ # Any assert in test_set_config_works will leave the default config in place.
55
+ # This is an undesired behavior. Using a try finally statement will guarantee
56
+ # the use of set_config in the case of a failure.
57
+ try:
58
+ # These assertions check if the configuration was set correctly.
59
+ # If any assertion fails, it will raise an error.
60
+ assert config["target_offload"] == target_offload
61
+ assert config["allow_fallback_to_host"] == allow_fallback_to_host
62
+ assert config["allow_sklearn_after_onedal"] == allow_sklearn_after_onedal
63
+ assert config["assume_finite"] == assume_finite
64
+ assert onedal_config["target_offload"] == target_offload
65
+ assert onedal_config["allow_fallback_to_host"] == allow_fallback_to_host
66
+ finally:
67
+ # This ensures that the original configuration is restored, regardless of
68
+ # whether the assertions pass or fail.
69
+ sklearnex.set_config(**default_config)
70
+
71
+
72
+ def test_config_context_works():
73
+ """Test validates that the config settings were applied correctly
74
+ by config context manager.
75
+ """
76
+ from sklearnex import config_context, get_config
77
+
78
+ default_config = get_config()
79
+ onedal_default_config = onedal._config._get_config()
80
+
81
+ # These variables define the new configuration settings
82
+ # that will be tested.
83
+ assume_finite = True
84
+ target_offload = "cpu:0"
85
+ allow_fallback_to_host = True
86
+ allow_sklearn_after_onedal = False
87
+
88
+ # Nested context manager applies the new configuration settings.
89
+ # Each config_context temporarily sets a specific configuration,
90
+ # allowing for a clean and isolated testing environment.
91
+ with config_context(assume_finite=assume_finite):
92
+ with config_context(target_offload=target_offload):
93
+ with config_context(allow_fallback_to_host=allow_fallback_to_host):
94
+ with config_context(
95
+ allow_sklearn_after_onedal=allow_sklearn_after_onedal
96
+ ):
97
+ config = sklearnex.get_config()
98
+ onedal_config = onedal._config._get_config()
99
+
100
+ assert config["target_offload"] == target_offload
101
+ assert config["allow_fallback_to_host"] == allow_fallback_to_host
102
+ assert config["allow_sklearn_after_onedal"] == allow_sklearn_after_onedal
103
+ assert config["assume_finite"] == assume_finite
104
+ assert onedal_config["target_offload"] == target_offload
105
+ assert onedal_config["allow_fallback_to_host"] == allow_fallback_to_host
106
+
107
+ # Check that out of the config context manager default settings are
108
+ # remaining.
109
+ default_config_after_cc = get_config()
110
+ onedal_default_config_after_cc = onedal._config._get_config()
111
+ for param in [
112
+ "target_offload",
113
+ "allow_fallback_to_host",
114
+ "allow_sklearn_after_onedal",
115
+ "assume_finite",
116
+ ]:
117
+ assert default_config_after_cc[param] == default_config[param]
118
+
119
+ for param in [
120
+ "target_offload",
121
+ "allow_fallback_to_host",
122
+ ]:
123
+ assert onedal_default_config_after_cc[param] == onedal_default_config[param]
@@ -0,0 +1,379 @@
1
+ # ==============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import gc
18
+ import logging
19
+ import os
20
+ import tracemalloc
21
+ import types
22
+ import warnings
23
+ from inspect import isclass
24
+
25
+ import numpy as np
26
+ import pytest
27
+ from scipy.stats import pearsonr
28
+ from sklearn.base import BaseEstimator, clone
29
+ from sklearn.datasets import make_classification
30
+ from sklearn.model_selection import KFold
31
+
32
+ from onedal import _is_dpc_backend
33
+ from onedal.tests.utils._dataframes_support import (
34
+ _convert_to_dataframe,
35
+ get_dataframes_and_queues,
36
+ )
37
+ from onedal.tests.utils._device_selection import get_queues, is_dpctl_device_available
38
+ from onedal.utils._array_api import _get_sycl_namespace
39
+ from onedal.utils._dpep_helpers import dpctl_available, dpnp_available
40
+ from sklearnex import config_context
41
+ from sklearnex.tests.utils import PATCHED_FUNCTIONS, PATCHED_MODELS, SPECIAL_INSTANCES
42
+ from sklearnex.utils._array_api import get_namespace
43
+
44
+ if dpctl_available:
45
+ from dpctl.tensor import usm_ndarray
46
+
47
+ if dpnp_available:
48
+ import dpnp
49
+
50
+ if _is_dpc_backend:
51
+ from onedal import _backend
52
+
53
+
54
+ CPU_SKIP_LIST = (
55
+ "TSNE", # too slow for using in testing on common data size
56
+ "config_context", # does not malloc
57
+ "get_config", # does not malloc
58
+ "set_config", # does not malloc
59
+ "SVC(probability=True)", # memory leak fortran numpy (investigate _fit_proba)
60
+ "NuSVC(probability=True)", # memory leak fortran numpy (investigate _fit_proba)
61
+ "IncrementalEmpiricalCovariance", # dataframe_f issues
62
+ "IncrementalLinearRegression", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
63
+ "IncrementalPCA", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
64
+ "IncrementalRidge", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
65
+ "LogisticRegression(solver='newton-cg')", # memory leak fortran (1000, 100)
66
+ )
67
+
68
+ GPU_SKIP_LIST = (
69
+ "TSNE", # too slow for using in testing on common data size
70
+ "RandomForestRegressor", # too slow for using in testing on common data size
71
+ "KMeans", # does not support GPU offloading
72
+ "config_context", # does not malloc
73
+ "get_config", # does not malloc
74
+ "set_config", # does not malloc
75
+ "Ridge", # does not support GPU offloading (fails silently)
76
+ "ElasticNet", # does not support GPU offloading (fails silently)
77
+ "Lasso", # does not support GPU offloading (fails silently)
78
+ "SVR", # does not support GPU offloading (fails silently)
79
+ "NuSVR", # does not support GPU offloading (fails silently)
80
+ "NuSVC", # does not support GPU offloading (fails silently)
81
+ "LogisticRegression", # default parameters not supported, see solver=newton-cg
82
+ "NuSVC(probability=True)", # does not support GPU offloading (fails silently)
83
+ "IncrementalLinearRegression", # issue with potrf with the specific dataset
84
+ "LinearRegression", # issue with potrf with the specific dataset
85
+ )
86
+
87
+
88
+ def gen_functions(functions):
89
+ func_dict = functions.copy()
90
+
91
+ roc_auc_score = func_dict.pop("roc_auc_score")
92
+ func_dict["roc_auc_score"] = lambda x, y: roc_auc_score(y, y)
93
+
94
+ pairwise_distances = func_dict.pop("pairwise_distances")
95
+ func_dict["pairwise_distances(metric='cosine')"] = lambda x, y: pairwise_distances(
96
+ x, metric="cosine"
97
+ )
98
+ func_dict["pairwise_distances(metric='correlation')"] = (
99
+ lambda x, y: pairwise_distances(x, metric="correlation")
100
+ )
101
+
102
+ _assert_all_finite = func_dict.pop("_assert_all_finite")
103
+ func_dict["_assert_all_finite"] = lambda x, y: [
104
+ _assert_all_finite(x),
105
+ _assert_all_finite(y),
106
+ ]
107
+ return func_dict
108
+
109
+
110
+ FUNCTIONS = gen_functions(PATCHED_FUNCTIONS)
111
+
112
+ CPU_ESTIMATORS = {
113
+ k: v
114
+ for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES, **FUNCTIONS}.items()
115
+ if not k in CPU_SKIP_LIST
116
+ }
117
+
118
+ GPU_ESTIMATORS = {
119
+ k: v
120
+ for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES}.items()
121
+ if not k in GPU_SKIP_LIST
122
+ }
123
+
124
+ data_shapes = [
125
+ pytest.param((1000, 100), id="(1000, 100)"),
126
+ pytest.param((2000, 50), id="(2000, 50)"),
127
+ ]
128
+
129
+ EXTRA_MEMORY_THRESHOLD = 0.15
130
+ EXTRA_MEMORY_THRESHOLD_PANDAS = 0.25
131
+ N_SPLITS = 10
132
+ ORDER_DICT = {"F": np.asfortranarray, "C": np.ascontiguousarray}
133
+
134
+
135
+ if _is_dpc_backend:
136
+
137
+ from sklearn.utils.validation import check_is_fitted
138
+
139
+ from onedal.datatypes import from_table, to_table
140
+
141
+ class DummyEstimatorWithTableConversions(BaseEstimator):
142
+
143
+ def fit(self, X, y=None):
144
+ sua_iface, xp, _ = _get_sycl_namespace(X)
145
+ X_table = to_table(X)
146
+ y_table = to_table(y)
147
+ # The presence of the fitted attributes (ending with a trailing
148
+ # underscore) is required for the correct check. The cleanup of
149
+ # the memory will occur at the estimator instance deletion.
150
+ self.x_attr_ = from_table(
151
+ X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
152
+ )
153
+ self.y_attr_ = from_table(
154
+ y_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
155
+ )
156
+ return self
157
+
158
+ def predict(self, X):
159
+ # Checks if the estimator is fitted by verifying the presence of
160
+ # fitted attributes (ending with a trailing underscore).
161
+ check_is_fitted(self)
162
+ sua_iface, xp, _ = _get_sycl_namespace(X)
163
+ X_table = to_table(X)
164
+ returned_X = from_table(
165
+ X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
166
+ )
167
+ return returned_X
168
+
169
+
170
+ def gen_clsf_data(n_samples, n_features, dtype=None):
171
+ data, label = make_classification(
172
+ n_classes=2, n_samples=n_samples, n_features=n_features, random_state=777
173
+ )
174
+ if dtype:
175
+ data, label = data.astype(dtype), label.astype(dtype)
176
+ return (
177
+ data,
178
+ label,
179
+ data.size * data.dtype.itemsize + label.size * label.dtype.itemsize,
180
+ )
181
+
182
+
183
+ def get_traced_memory(queue=None):
184
+ if _is_dpc_backend and queue and queue.sycl_device.is_gpu:
185
+ return _backend.get_used_memory(queue)
186
+ else:
187
+ return tracemalloc.get_traced_memory()[0]
188
+
189
+
190
+ def take(x, index, axis=0, queue=None):
191
+ xp, array_api = get_namespace(x)
192
+ if (
193
+ dpnp_available
194
+ and isinstance(x, dpnp.ndarray)
195
+ or dpctl_available
196
+ and isinstance(x, usm_ndarray)
197
+ ):
198
+ # Using the same sycl queue for dpnp.ndarray or usm_ndarray.
199
+ return xp.take(
200
+ x, xp.asarray(index, usm_type="device", sycl_queue=x.sycl_queue), axis=axis
201
+ )
202
+ elif array_api:
203
+ return xp.take(x, xp.asarray(index, device=x.device), axis=axis)
204
+ else:
205
+ return x.take(index, axis=axis)
206
+
207
+
208
+ def split_train_inference(kf, x, y, estimator, queue=None):
209
+ mem_tracks = []
210
+ for train_index, test_index in kf.split(x):
211
+ x_train = take(x, train_index, queue=queue)
212
+ y_train = take(y, train_index, queue=queue)
213
+ x_test = take(x, test_index, queue=queue)
214
+ y_test = take(y, test_index, queue=queue)
215
+
216
+ if isclass(estimator) and issubclass(estimator, BaseEstimator):
217
+ alg = estimator()
218
+ flag = True
219
+ elif isinstance(estimator, BaseEstimator):
220
+ alg = clone(estimator)
221
+ flag = True
222
+ else:
223
+ flag = False
224
+
225
+ if flag:
226
+ alg.fit(x_train, y_train)
227
+ if hasattr(alg, "predict"):
228
+ alg.predict(x_test)
229
+ elif hasattr(alg, "transform"):
230
+ alg.transform(x_test)
231
+ elif hasattr(alg, "kneighbors"):
232
+ alg.kneighbors(x_test)
233
+ del alg
234
+ else:
235
+ estimator(x_train, y_train)
236
+
237
+ del x_train, x_test, y_train, y_test, flag
238
+ mem_tracks.append(get_traced_memory(queue))
239
+ return mem_tracks
240
+
241
+
242
+ def _kfold_function_template(
243
+ estimator, dataframe, data_shape, queue=None, func=None, dtype=None
244
+ ):
245
+ tracemalloc.start()
246
+
247
+ n_samples, n_features = data_shape
248
+ X, y, data_memory_size = gen_clsf_data(n_samples, n_features, dtype=dtype)
249
+ kf = KFold(n_splits=N_SPLITS)
250
+ if func:
251
+ X = func(X)
252
+
253
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
254
+ y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
255
+
256
+ mem_before = get_traced_memory(queue)
257
+ mem_tracks = split_train_inference(kf, X, y, estimator, queue=queue)
258
+ mem_iter_diffs = np.array(mem_tracks[1:]) - np.array(mem_tracks[:-1])
259
+ mem_incr_mean, mem_incr_std = mem_iter_diffs.mean(), mem_iter_diffs.std()
260
+ mem_incr_mean, mem_incr_std = round(mem_incr_mean), round(mem_incr_std)
261
+ with warnings.catch_warnings():
262
+ # In the case that the memory usage is constant, this will raise
263
+ # a ConstantInputWarning error in pearsonr from scipy, this can
264
+ # be ignored.
265
+ warnings.filterwarnings(
266
+ "ignore",
267
+ message="An input array is constant; the correlation coefficient is not defined",
268
+ )
269
+ mem_iter_corr, _ = pearsonr(mem_tracks, list(range(len(mem_tracks))))
270
+
271
+ if mem_iter_corr > 0.95:
272
+ logging.warning(
273
+ "Memory usage is steadily increasing with iterations "
274
+ "(Pearson correlation coefficient between "
275
+ f"memory tracks and iterations is {mem_iter_corr})\n"
276
+ "Memory usage increase per iteration: "
277
+ f"{mem_incr_mean}±{mem_incr_std} bytes"
278
+ )
279
+ mem_before_gc = get_traced_memory(queue)
280
+ mem_diff = mem_before_gc - mem_before
281
+ if isinstance(estimator, BaseEstimator):
282
+ name = str(estimator)
283
+ else:
284
+ name = estimator.__name__
285
+
286
+ threshold = (
287
+ EXTRA_MEMORY_THRESHOLD_PANDAS if dataframe == "pandas" else EXTRA_MEMORY_THRESHOLD
288
+ )
289
+ message = (
290
+ "Size of extra allocated memory {} using garbage collector "
291
+ f"is greater than {threshold * 100}% of input data"
292
+ f"\n\tAlgorithm: {name}"
293
+ f"\n\tInput data size: {data_memory_size} bytes"
294
+ "\n\tExtra allocated memory size: {} bytes"
295
+ " / {} %"
296
+ )
297
+ if mem_diff >= threshold * data_memory_size:
298
+ logging.warning(
299
+ message.format(
300
+ "before", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
301
+ )
302
+ )
303
+ gc.collect()
304
+ mem_after = get_traced_memory(queue)
305
+ tracemalloc.stop()
306
+ mem_diff = mem_after - mem_before
307
+
308
+ # GPU offloading with SYCL contains a program/kernel cache which should
309
+ # be controllable via a KernelProgramCache object in the SYCL context.
310
+ # The programs and kernels are stored on the GPU, but cannot be cleared
311
+ # as this class is not available for access in all oneDAL DPC++ runtimes.
312
+ # Therefore, until this is implemented this test must be skipped for gpu
313
+ # as it looks like a memory leak (at least there is no way to discern a
314
+ # leak on the first run).
315
+ if queue is None or queue.sycl_device.is_cpu:
316
+ assert mem_diff < threshold * data_memory_size, message.format(
317
+ "after", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
318
+ )
319
+
320
+
321
+ @pytest.mark.parametrize("order", ["F", "C"])
322
+ @pytest.mark.parametrize(
323
+ "dataframe,queue", get_dataframes_and_queues("numpy,pandas,dpctl", "cpu")
324
+ )
325
+ @pytest.mark.parametrize("estimator", CPU_ESTIMATORS.keys())
326
+ @pytest.mark.parametrize("data_shape", data_shapes)
327
+ def test_memory_leaks(estimator, dataframe, queue, order, data_shape):
328
+ func = ORDER_DICT[order]
329
+ if estimator == "_assert_all_finite" and queue is not None:
330
+ pytest.skip(f"{estimator} is not designed for device offloading")
331
+
332
+ _kfold_function_template(
333
+ CPU_ESTIMATORS[estimator], dataframe, data_shape, queue, func
334
+ )
335
+
336
+
337
+ @pytest.mark.skipif(
338
+ os.getenv("ZES_ENABLE_SYSMAN") is None or not is_dpctl_device_available("gpu"),
339
+ reason="SYCL device memory leak check requires the level zero sysman",
340
+ )
341
+ @pytest.mark.parametrize("queue", get_queues("gpu"))
342
+ @pytest.mark.parametrize("estimator", GPU_ESTIMATORS.keys())
343
+ @pytest.mark.parametrize("order", ["F", "C"])
344
+ @pytest.mark.parametrize("data_shape", data_shapes)
345
+ def test_gpu_memory_leaks(estimator, queue, order, data_shape):
346
+ func = ORDER_DICT[order]
347
+ if "ExtraTrees" in estimator and data_shape == (2000, 50):
348
+ pytest.skip("Avoid a segmentation fault in Extra Trees algorithms")
349
+
350
+ with config_context(target_offload=queue):
351
+ _kfold_function_template(GPU_ESTIMATORS[estimator], None, data_shape, queue, func)
352
+
353
+
354
+ @pytest.mark.skipif(
355
+ not _is_dpc_backend,
356
+ reason="__sycl_usm_array_interface__ support requires DPC backend.",
357
+ )
358
+ @pytest.mark.parametrize(
359
+ "dataframe,queue", get_dataframes_and_queues("dpctl,dpnp", "cpu,gpu")
360
+ )
361
+ @pytest.mark.parametrize("order", ["F", "C"])
362
+ @pytest.mark.parametrize("data_shape", data_shapes)
363
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
364
+ def test_table_conversions_memory_leaks(dataframe, queue, order, data_shape, dtype):
365
+ func = ORDER_DICT[order]
366
+
367
+ if queue.sycl_device.is_gpu and (
368
+ os.getenv("ZES_ENABLE_SYSMAN") is None or not is_dpctl_device_available("gpu")
369
+ ):
370
+ pytest.skip("SYCL device memory leak check requires the level zero sysman")
371
+
372
+ _kfold_function_template(
373
+ DummyEstimatorWithTableConversions,
374
+ dataframe,
375
+ data_shape,
376
+ queue,
377
+ func,
378
+ dtype,
379
+ )