scikit-learn-intelex 2025.4.0__py313-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (282) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-313-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-313-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +248 -0
  10. daal4py/sklearn/_utils.py +245 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +236 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +696 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +54 -0
  61. onedal/_device_offload.py +204 -0
  62. onedal/_onedal_py_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-313-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +175 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +242 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +279 -0
  70. onedal/basic_statistics/tests/utils.py +50 -0
  71. onedal/cluster/__init__.py +27 -0
  72. onedal/cluster/dbscan.py +105 -0
  73. onedal/cluster/kmeans.py +557 -0
  74. onedal/cluster/kmeans_init.py +112 -0
  75. onedal/cluster/tests/test_dbscan.py +125 -0
  76. onedal/cluster/tests/test_kmeans.py +88 -0
  77. onedal/cluster/tests/test_kmeans_init.py +93 -0
  78. onedal/common/_base.py +38 -0
  79. onedal/common/_estimator_checks.py +47 -0
  80. onedal/common/_mixin.py +62 -0
  81. onedal/common/_policy.py +55 -0
  82. onedal/common/_spmd_policy.py +30 -0
  83. onedal/common/hyperparameters.py +125 -0
  84. onedal/common/tests/test_policy.py +76 -0
  85. onedal/common/tests/test_sycl.py +128 -0
  86. onedal/covariance/__init__.py +20 -0
  87. onedal/covariance/covariance.py +122 -0
  88. onedal/covariance/incremental_covariance.py +161 -0
  89. onedal/covariance/tests/test_covariance.py +50 -0
  90. onedal/covariance/tests/test_incremental_covariance.py +190 -0
  91. onedal/datatypes/__init__.py +19 -0
  92. onedal/datatypes/_data_conversion.py +121 -0
  93. onedal/datatypes/tests/common.py +126 -0
  94. onedal/datatypes/tests/test_data.py +475 -0
  95. onedal/decomposition/__init__.py +20 -0
  96. onedal/decomposition/incremental_pca.py +214 -0
  97. onedal/decomposition/pca.py +186 -0
  98. onedal/decomposition/tests/test_incremental_pca.py +285 -0
  99. onedal/ensemble/__init__.py +29 -0
  100. onedal/ensemble/forest.py +736 -0
  101. onedal/ensemble/tests/test_random_forest.py +97 -0
  102. onedal/linear_model/__init__.py +27 -0
  103. onedal/linear_model/incremental_linear_model.py +292 -0
  104. onedal/linear_model/linear_model.py +325 -0
  105. onedal/linear_model/logistic_regression.py +247 -0
  106. onedal/linear_model/tests/test_incremental_linear_regression.py +213 -0
  107. onedal/linear_model/tests/test_incremental_ridge_regression.py +171 -0
  108. onedal/linear_model/tests/test_linear_regression.py +259 -0
  109. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  110. onedal/linear_model/tests/test_ridge.py +95 -0
  111. onedal/neighbors/__init__.py +19 -0
  112. onedal/neighbors/neighbors.py +763 -0
  113. onedal/neighbors/tests/test_knn_classification.py +49 -0
  114. onedal/primitives/__init__.py +27 -0
  115. onedal/primitives/get_tree.py +25 -0
  116. onedal/primitives/kernel_functions.py +152 -0
  117. onedal/primitives/tests/test_kernel_functions.py +159 -0
  118. onedal/spmd/__init__.py +25 -0
  119. onedal/spmd/_base.py +30 -0
  120. onedal/spmd/basic_statistics/__init__.py +20 -0
  121. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  122. onedal/spmd/basic_statistics/incremental_basic_statistics.py +71 -0
  123. onedal/spmd/cluster/__init__.py +28 -0
  124. onedal/spmd/cluster/dbscan.py +23 -0
  125. onedal/spmd/cluster/kmeans.py +56 -0
  126. onedal/spmd/covariance/__init__.py +20 -0
  127. onedal/spmd/covariance/covariance.py +26 -0
  128. onedal/spmd/covariance/incremental_covariance.py +83 -0
  129. onedal/spmd/decomposition/__init__.py +20 -0
  130. onedal/spmd/decomposition/incremental_pca.py +124 -0
  131. onedal/spmd/decomposition/pca.py +26 -0
  132. onedal/spmd/ensemble/__init__.py +19 -0
  133. onedal/spmd/ensemble/forest.py +28 -0
  134. onedal/spmd/linear_model/__init__.py +21 -0
  135. onedal/spmd/linear_model/incremental_linear_model.py +101 -0
  136. onedal/spmd/linear_model/linear_model.py +30 -0
  137. onedal/spmd/linear_model/logistic_regression.py +38 -0
  138. onedal/spmd/neighbors/__init__.py +19 -0
  139. onedal/spmd/neighbors/neighbors.py +75 -0
  140. onedal/svm/__init__.py +19 -0
  141. onedal/svm/svm.py +556 -0
  142. onedal/svm/tests/test_csr_svm.py +351 -0
  143. onedal/svm/tests/test_nusvc.py +204 -0
  144. onedal/svm/tests/test_nusvr.py +210 -0
  145. onedal/svm/tests/test_svc.py +176 -0
  146. onedal/svm/tests/test_svr.py +243 -0
  147. onedal/tests/test_common.py +57 -0
  148. onedal/tests/utils/_dataframes_support.py +162 -0
  149. onedal/tests/utils/_device_selection.py +102 -0
  150. onedal/utils/__init__.py +49 -0
  151. onedal/utils/_array_api.py +81 -0
  152. onedal/utils/_dpep_helpers.py +56 -0
  153. onedal/utils/tests/test_validation.py +142 -0
  154. onedal/utils/validation.py +464 -0
  155. scikit_learn_intelex-2025.4.0.dist-info/LICENSE.txt +202 -0
  156. scikit_learn_intelex-2025.4.0.dist-info/METADATA +190 -0
  157. scikit_learn_intelex-2025.4.0.dist-info/RECORD +282 -0
  158. scikit_learn_intelex-2025.4.0.dist-info/WHEEL +5 -0
  159. scikit_learn_intelex-2025.4.0.dist-info/top_level.txt +3 -0
  160. sklearnex/__init__.py +66 -0
  161. sklearnex/__main__.py +58 -0
  162. sklearnex/_config.py +116 -0
  163. sklearnex/_device_offload.py +126 -0
  164. sklearnex/_utils.py +177 -0
  165. sklearnex/basic_statistics/__init__.py +20 -0
  166. sklearnex/basic_statistics/basic_statistics.py +261 -0
  167. sklearnex/basic_statistics/incremental_basic_statistics.py +352 -0
  168. sklearnex/basic_statistics/tests/test_basic_statistics.py +405 -0
  169. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +455 -0
  170. sklearnex/cluster/__init__.py +20 -0
  171. sklearnex/cluster/dbscan.py +197 -0
  172. sklearnex/cluster/k_means.py +397 -0
  173. sklearnex/cluster/tests/test_dbscan.py +38 -0
  174. sklearnex/cluster/tests/test_kmeans.py +157 -0
  175. sklearnex/conftest.py +82 -0
  176. sklearnex/covariance/__init__.py +19 -0
  177. sklearnex/covariance/incremental_covariance.py +405 -0
  178. sklearnex/covariance/tests/test_incremental_covariance.py +287 -0
  179. sklearnex/decomposition/__init__.py +19 -0
  180. sklearnex/decomposition/pca.py +427 -0
  181. sklearnex/decomposition/tests/test_pca.py +58 -0
  182. sklearnex/dispatcher.py +534 -0
  183. sklearnex/doc/third-party-programs.txt +424 -0
  184. sklearnex/ensemble/__init__.py +29 -0
  185. sklearnex/ensemble/_forest.py +2029 -0
  186. sklearnex/ensemble/tests/test_forest.py +140 -0
  187. sklearnex/glob/__main__.py +72 -0
  188. sklearnex/glob/dispatcher.py +101 -0
  189. sklearnex/linear_model/__init__.py +32 -0
  190. sklearnex/linear_model/coordinate_descent.py +30 -0
  191. sklearnex/linear_model/incremental_linear.py +495 -0
  192. sklearnex/linear_model/incremental_ridge.py +432 -0
  193. sklearnex/linear_model/linear.py +346 -0
  194. sklearnex/linear_model/logistic_regression.py +415 -0
  195. sklearnex/linear_model/ridge.py +390 -0
  196. sklearnex/linear_model/tests/test_incremental_linear.py +267 -0
  197. sklearnex/linear_model/tests/test_incremental_ridge.py +214 -0
  198. sklearnex/linear_model/tests/test_linear.py +142 -0
  199. sklearnex/linear_model/tests/test_logreg.py +134 -0
  200. sklearnex/linear_model/tests/test_ridge.py +256 -0
  201. sklearnex/manifold/__init__.py +19 -0
  202. sklearnex/manifold/t_sne.py +26 -0
  203. sklearnex/manifold/tests/test_tsne.py +250 -0
  204. sklearnex/metrics/__init__.py +23 -0
  205. sklearnex/metrics/pairwise.py +22 -0
  206. sklearnex/metrics/ranking.py +20 -0
  207. sklearnex/metrics/tests/test_metrics.py +39 -0
  208. sklearnex/model_selection/__init__.py +21 -0
  209. sklearnex/model_selection/split.py +22 -0
  210. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  211. sklearnex/neighbors/__init__.py +27 -0
  212. sklearnex/neighbors/_lof.py +236 -0
  213. sklearnex/neighbors/common.py +310 -0
  214. sklearnex/neighbors/knn_classification.py +231 -0
  215. sklearnex/neighbors/knn_regression.py +207 -0
  216. sklearnex/neighbors/knn_unsupervised.py +178 -0
  217. sklearnex/neighbors/tests/test_neighbors.py +82 -0
  218. sklearnex/preview/__init__.py +17 -0
  219. sklearnex/preview/covariance/__init__.py +19 -0
  220. sklearnex/preview/covariance/covariance.py +142 -0
  221. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  222. sklearnex/preview/decomposition/__init__.py +19 -0
  223. sklearnex/preview/decomposition/incremental_pca.py +244 -0
  224. sklearnex/preview/decomposition/tests/test_incremental_pca.py +336 -0
  225. sklearnex/spmd/__init__.py +25 -0
  226. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  227. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  228. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  229. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  230. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +306 -0
  231. sklearnex/spmd/cluster/__init__.py +30 -0
  232. sklearnex/spmd/cluster/dbscan.py +50 -0
  233. sklearnex/spmd/cluster/kmeans.py +21 -0
  234. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  235. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +173 -0
  236. sklearnex/spmd/covariance/__init__.py +20 -0
  237. sklearnex/spmd/covariance/covariance.py +21 -0
  238. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  239. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  240. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  241. sklearnex/spmd/decomposition/__init__.py +20 -0
  242. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  243. sklearnex/spmd/decomposition/pca.py +21 -0
  244. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  245. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  246. sklearnex/spmd/ensemble/__init__.py +19 -0
  247. sklearnex/spmd/ensemble/forest.py +71 -0
  248. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  249. sklearnex/spmd/linear_model/__init__.py +21 -0
  250. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  251. sklearnex/spmd/linear_model/linear_model.py +21 -0
  252. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  253. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +331 -0
  254. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  255. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
  256. sklearnex/spmd/neighbors/__init__.py +19 -0
  257. sklearnex/spmd/neighbors/neighbors.py +25 -0
  258. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  259. sklearnex/svm/__init__.py +29 -0
  260. sklearnex/svm/_common.py +339 -0
  261. sklearnex/svm/nusvc.py +371 -0
  262. sklearnex/svm/nusvr.py +170 -0
  263. sklearnex/svm/svc.py +399 -0
  264. sklearnex/svm/svr.py +167 -0
  265. sklearnex/svm/tests/test_svm.py +93 -0
  266. sklearnex/tests/test_common.py +491 -0
  267. sklearnex/tests/test_config.py +123 -0
  268. sklearnex/tests/test_hyperparameters.py +43 -0
  269. sklearnex/tests/test_memory_usage.py +347 -0
  270. sklearnex/tests/test_monkeypatch.py +269 -0
  271. sklearnex/tests/test_n_jobs_support.py +108 -0
  272. sklearnex/tests/test_parallel.py +48 -0
  273. sklearnex/tests/test_patching.py +377 -0
  274. sklearnex/tests/test_run_to_run_stability.py +326 -0
  275. sklearnex/tests/utils/__init__.py +48 -0
  276. sklearnex/tests/utils/base.py +436 -0
  277. sklearnex/tests/utils/spmd.py +198 -0
  278. sklearnex/utils/__init__.py +19 -0
  279. sklearnex/utils/_array_api.py +82 -0
  280. sklearnex/utils/parallel.py +59 -0
  281. sklearnex/utils/tests/test_validation.py +238 -0
  282. sklearnex/utils/validation.py +208 -0
@@ -0,0 +1,242 @@
1
+ # ==============================================================================
2
+ # Copyright 2023 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.testing import assert_allclose
20
+ from scipy import sparse as sp
21
+
22
+ from daal4py.sklearn._utils import daal_check_version
23
+ from onedal.basic_statistics import BasicStatistics
24
+ from onedal.basic_statistics.tests.utils import options_and_tests
25
+ from onedal.tests.utils._device_selection import get_queues
26
+
27
+ options_and_tests_csr = [
28
+ ("sum", "sum", (5e-6, 1e-9)),
29
+ ("min", "min", (0, 0)),
30
+ ("max", "max", (0, 0)),
31
+ ("mean", "mean", (5e-6, 1e-9)),
32
+ ]
33
+
34
+
35
+ @pytest.mark.parametrize("queue", get_queues())
36
+ @pytest.mark.parametrize("result_option", options_and_tests.keys())
37
+ @pytest.mark.parametrize("row_count", [100, 1000])
38
+ @pytest.mark.parametrize("column_count", [10, 100])
39
+ @pytest.mark.parametrize("weighted", [True, False])
40
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
41
+ def test_single_option_on_random_data(
42
+ queue, result_option, row_count, column_count, weighted, dtype
43
+ ):
44
+ function, tols = options_and_tests[result_option]
45
+ fp32tol, fp64tol = tols
46
+ seed = 77
47
+ gen = np.random.default_rng(seed)
48
+ data = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
49
+ data = data.astype(dtype=dtype)
50
+ if weighted:
51
+ weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
52
+ weights = weights.astype(dtype=dtype)
53
+ else:
54
+ weights = None
55
+
56
+ basicstat = BasicStatistics(result_options=result_option)
57
+
58
+ result = basicstat.fit(data, sample_weight=weights, queue=queue)
59
+
60
+ res = getattr(result, result_option)
61
+ if weighted:
62
+ weighted_data = np.diag(weights) @ data
63
+ gtr = function(weighted_data)
64
+ else:
65
+ gtr = function(data)
66
+
67
+ tol = fp32tol if res.dtype == np.float32 else fp64tol
68
+ assert_allclose(gtr, res, atol=tol)
69
+
70
+
71
+ @pytest.mark.parametrize("queue", get_queues())
72
+ @pytest.mark.parametrize("row_count", [100, 1000])
73
+ @pytest.mark.parametrize("column_count", [10, 100])
74
+ @pytest.mark.parametrize("weighted", [True, False])
75
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
76
+ def test_multiple_options_on_random_data(queue, row_count, column_count, weighted, dtype):
77
+ seed = 42
78
+ gen = np.random.default_rng(seed)
79
+ data = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
80
+ data = data.astype(dtype=dtype)
81
+
82
+ if weighted:
83
+ weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
84
+ weights = weights.astype(dtype=dtype)
85
+ else:
86
+ weights = None
87
+
88
+ basicstat = BasicStatistics(result_options=["mean", "max", "sum"])
89
+
90
+ result = basicstat.fit(data, sample_weight=weights, queue=queue)
91
+
92
+ res_mean, res_max, res_sum = result.mean, result.max, result.sum
93
+ if weighted:
94
+ weighted_data = np.diag(weights) @ data
95
+ gtr_mean, gtr_max, gtr_sum = (
96
+ options_and_tests["mean"][0](weighted_data),
97
+ options_and_tests["max"][0](weighted_data),
98
+ options_and_tests["sum"][0](weighted_data),
99
+ )
100
+ else:
101
+ gtr_mean, gtr_max, gtr_sum = (
102
+ options_and_tests["mean"][0](data),
103
+ options_and_tests["max"][0](data),
104
+ options_and_tests["sum"][0](data),
105
+ )
106
+
107
+ tol = 5e-4 if res_mean.dtype == np.float32 else 1e-7
108
+ assert_allclose(gtr_mean, res_mean, atol=tol)
109
+ assert_allclose(gtr_max, res_max, atol=tol)
110
+ assert_allclose(gtr_sum, res_sum, atol=tol)
111
+
112
+
113
+ @pytest.mark.parametrize("queue", get_queues())
114
+ @pytest.mark.parametrize("row_count", [100, 1000])
115
+ @pytest.mark.parametrize("column_count", [10, 100])
116
+ @pytest.mark.parametrize("weighted", [True, False])
117
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
118
+ def test_all_option_on_random_data(queue, row_count, column_count, weighted, dtype):
119
+ seed = 77
120
+ gen = np.random.default_rng(seed)
121
+ data = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
122
+ data = data.astype(dtype=dtype)
123
+ if weighted:
124
+ weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
125
+ weights = weights.astype(dtype=dtype)
126
+ else:
127
+ weights = None
128
+
129
+ basicstat = BasicStatistics(result_options="all")
130
+
131
+ result = basicstat.fit(data, sample_weight=weights, queue=queue)
132
+
133
+ if weighted:
134
+ weighted_data = np.diag(weights) @ data
135
+
136
+ for result_option in options_and_tests:
137
+ function, tols = options_and_tests[result_option]
138
+ fp32tol, fp64tol = tols
139
+ res = getattr(result, result_option)
140
+ if weighted:
141
+ gtr = function(weighted_data)
142
+ else:
143
+ gtr = function(data)
144
+ tol = fp32tol if res.dtype == np.float32 else fp64tol
145
+ assert_allclose(gtr, res, atol=tol)
146
+
147
+
148
+ @pytest.mark.parametrize("queue", get_queues())
149
+ @pytest.mark.parametrize("result_option", options_and_tests.keys())
150
+ @pytest.mark.parametrize("data_size", [100, 1000])
151
+ @pytest.mark.parametrize("weighted", [True, False])
152
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
153
+ def test_1d_input_on_random_data(queue, result_option, data_size, weighted, dtype):
154
+
155
+ function, tols = options_and_tests[result_option]
156
+ fp32tol, fp64tol = tols
157
+ seed = 77
158
+ gen = np.random.default_rng(seed)
159
+ data = gen.uniform(low=-0.3, high=+0.7, size=data_size)
160
+ data = data.astype(dtype=dtype)
161
+ if weighted:
162
+ weights = gen.uniform(low=-0.5, high=+1.0, size=data_size)
163
+ weights = weights.astype(dtype=dtype)
164
+ else:
165
+ weights = None
166
+
167
+ basicstat = BasicStatistics(result_options=result_option)
168
+
169
+ result = basicstat.fit(data, sample_weight=weights, queue=queue)
170
+
171
+ res = getattr(result, result_option)
172
+ if weighted:
173
+ weighted_data = weights * data
174
+ gtr = function(weighted_data)
175
+ else:
176
+ gtr = function(data)
177
+
178
+ tol = fp32tol if res.dtype == np.float32 else fp64tol
179
+ assert_allclose(gtr, res, atol=tol)
180
+
181
+
182
+ @pytest.mark.skipif(not hasattr(sp, "random_array"), reason="requires scipy>=1.12.0")
183
+ @pytest.mark.parametrize("queue", get_queues())
184
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
185
+ def test_basic_csr(queue, dtype):
186
+ seed = 42
187
+ row_count, column_count = 5000, 3008
188
+
189
+ gen = np.random.default_rng(seed)
190
+
191
+ data = sp.random_array(
192
+ shape=(row_count, column_count),
193
+ density=0.01,
194
+ format="csr",
195
+ dtype=dtype,
196
+ random_state=gen,
197
+ )
198
+
199
+ basicstat = BasicStatistics(result_options="mean")
200
+ result = basicstat.fit(data, queue=queue)
201
+
202
+ res_mean = result.mean
203
+ gtr_mean = data.mean(axis=0)
204
+ tol = 5e-6 if res_mean.dtype == np.float32 else 1e-9
205
+ assert_allclose(gtr_mean, res_mean, rtol=tol)
206
+
207
+
208
+ @pytest.mark.skipif(not hasattr(sp, "random_array"), reason="requires scipy>=1.12.0")
209
+ @pytest.mark.parametrize("queue", get_queues())
210
+ @pytest.mark.parametrize("option", options_and_tests_csr)
211
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
212
+ def test_options_csr(queue, option, dtype):
213
+ result_option, function, tols = option
214
+ fp32tol, fp64tol = tols
215
+
216
+ if result_option == "max":
217
+ pytest.skip("There is a bug in oneDAL's max computations on GPU")
218
+
219
+ seed = 42
220
+ row_count, column_count = 20046, 4007
221
+
222
+ gen = np.random.default_rng(seed)
223
+
224
+ data = sp.random_array(
225
+ shape=(row_count, column_count),
226
+ density=0.002,
227
+ format="csr",
228
+ dtype=dtype,
229
+ random_state=gen,
230
+ )
231
+
232
+ basicstat = BasicStatistics(result_options=result_option)
233
+ result = basicstat.fit(data, queue=queue)
234
+
235
+ res = getattr(result, result_option)
236
+ func = getattr(data, function)
237
+ gtr = func(axis=0)
238
+ if type(gtr).__name__ != "ndarray":
239
+ gtr = gtr.toarray().flatten()
240
+ tol = fp32tol if res.dtype == np.float32 else fp64tol
241
+
242
+ assert_allclose(gtr, res, rtol=tol)
@@ -0,0 +1,279 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.testing import assert_allclose
20
+
21
+ from onedal.basic_statistics import IncrementalBasicStatistics
22
+ from onedal.basic_statistics.tests.utils import options_and_tests
23
+ from onedal.datatypes import from_table
24
+ from onedal.tests.utils._device_selection import get_queues
25
+
26
+
27
+ @pytest.mark.parametrize("queue", get_queues())
28
+ @pytest.mark.parametrize("weighted", [True, False])
29
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
30
+ def test_multiple_options_on_gold_data(queue, weighted, dtype):
31
+ X = np.array([[0, 0], [1, 1]])
32
+ X = X.astype(dtype=dtype)
33
+ X_split = np.array_split(X, 2)
34
+ if weighted:
35
+ weights = np.array([1, 0.5])
36
+ weights = weights.astype(dtype=dtype)
37
+ weights_split = np.array_split(weights, 2)
38
+
39
+ incbs = IncrementalBasicStatistics()
40
+ for i in range(2):
41
+ if weighted:
42
+ incbs.partial_fit(X_split[i], weights_split[i], queue=queue)
43
+ else:
44
+ incbs.partial_fit(X_split[i], queue=queue)
45
+
46
+ result = incbs.finalize_fit()
47
+
48
+ if weighted:
49
+ expected_weighted_mean = np.array([0.25, 0.25])
50
+ expected_weighted_min = np.array([0, 0])
51
+ expected_weighted_max = np.array([0.5, 0.5])
52
+ assert_allclose(expected_weighted_mean, result.mean)
53
+ assert_allclose(expected_weighted_max, result.max)
54
+ assert_allclose(expected_weighted_min, result.min)
55
+ else:
56
+ expected_mean = np.array([0.5, 0.5])
57
+ expected_min = np.array([0, 0])
58
+ expected_max = np.array([1, 1])
59
+ assert_allclose(expected_mean, result.mean)
60
+ assert_allclose(expected_max, result.max)
61
+ assert_allclose(expected_min, result.min)
62
+
63
+
64
+ @pytest.mark.parametrize("queue", get_queues())
65
+ @pytest.mark.parametrize("num_batches", [2, 10])
66
+ @pytest.mark.parametrize("result_option", options_and_tests.keys())
67
+ @pytest.mark.parametrize("row_count", [100, 1000])
68
+ @pytest.mark.parametrize("column_count", [10, 100])
69
+ @pytest.mark.parametrize("weighted", [True, False])
70
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
71
+ def test_single_option_on_random_data(
72
+ queue, num_batches, result_option, row_count, column_count, weighted, dtype
73
+ ):
74
+ function, tols = options_and_tests[result_option]
75
+ fp32tol, fp64tol = tols
76
+ seed = 77
77
+ gen = np.random.default_rng(seed)
78
+ data = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
79
+ data = data.astype(dtype=dtype)
80
+ data_split = np.array_split(data, num_batches)
81
+ if weighted:
82
+ weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
83
+ weights = weights.astype(dtype=dtype)
84
+ weights_split = np.array_split(weights, num_batches)
85
+ incbs = IncrementalBasicStatistics(result_options=result_option)
86
+
87
+ for i in range(num_batches):
88
+ if weighted:
89
+ incbs.partial_fit(data_split[i], weights_split[i], queue=queue)
90
+ else:
91
+ incbs.partial_fit(data_split[i], queue=queue)
92
+ result = incbs.finalize_fit()
93
+
94
+ res = getattr(result, result_option)
95
+ if weighted:
96
+ weighted_data = np.diag(weights) @ data
97
+ gtr = function(weighted_data)
98
+ else:
99
+ gtr = function(data)
100
+
101
+ tol = fp32tol if res.dtype == np.float32 else fp64tol
102
+ assert_allclose(gtr, res, atol=tol)
103
+
104
+
105
+ @pytest.mark.parametrize("queue", get_queues())
106
+ @pytest.mark.parametrize("num_batches", [2, 10])
107
+ @pytest.mark.parametrize("row_count", [100, 1000])
108
+ @pytest.mark.parametrize("column_count", [10, 100])
109
+ @pytest.mark.parametrize("weighted", [True, False])
110
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
111
+ def test_multiple_options_on_random_data(
112
+ queue, num_batches, row_count, column_count, weighted, dtype
113
+ ):
114
+ seed = 42
115
+ gen = np.random.default_rng(seed)
116
+ data = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
117
+ data = data.astype(dtype=dtype)
118
+ data_split = np.array_split(data, num_batches)
119
+ if weighted:
120
+ weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
121
+ weights = weights.astype(dtype=dtype)
122
+ weights_split = np.array_split(weights, num_batches)
123
+ incbs = IncrementalBasicStatistics(result_options=["mean", "max", "sum"])
124
+
125
+ for i in range(num_batches):
126
+ if weighted:
127
+ incbs.partial_fit(data_split[i], weights_split[i], queue=queue)
128
+ else:
129
+ incbs.partial_fit(data_split[i], queue=queue)
130
+ result = incbs.finalize_fit()
131
+
132
+ res_mean, res_max, res_sum = result.mean, result.max, result.sum
133
+ if weighted:
134
+ weighted_data = np.diag(weights) @ data
135
+ gtr_mean, gtr_max, gtr_sum = (
136
+ options_and_tests["mean"][0](weighted_data),
137
+ options_and_tests["max"][0](weighted_data),
138
+ options_and_tests["sum"][0](weighted_data),
139
+ )
140
+ else:
141
+ gtr_mean, gtr_max, gtr_sum = (
142
+ options_and_tests["mean"][0](data),
143
+ options_and_tests["max"][0](data),
144
+ options_and_tests["sum"][0](data),
145
+ )
146
+
147
+ tol = 3e-4 if res_mean.dtype == np.float32 else 1e-7
148
+ assert_allclose(gtr_mean, res_mean, atol=tol)
149
+ assert_allclose(gtr_max, res_max, atol=tol)
150
+ assert_allclose(gtr_sum, res_sum, atol=tol)
151
+
152
+
153
+ @pytest.mark.parametrize("queue", get_queues())
154
+ @pytest.mark.parametrize("num_batches", [2, 10])
155
+ @pytest.mark.parametrize("row_count", [100, 1000])
156
+ @pytest.mark.parametrize("column_count", [10, 100])
157
+ @pytest.mark.parametrize("weighted", [True, False])
158
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
159
+ def test_all_option_on_random_data(
160
+ queue, num_batches, row_count, column_count, weighted, dtype
161
+ ):
162
+ seed = 77
163
+ gen = np.random.default_rng(seed)
164
+ data = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
165
+ data = data.astype(dtype=dtype)
166
+ data_split = np.array_split(data, num_batches)
167
+ if weighted:
168
+ weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
169
+ weights = weights.astype(dtype=dtype)
170
+ weights_split = np.array_split(weights, num_batches)
171
+ incbs = IncrementalBasicStatistics(result_options="all")
172
+
173
+ for i in range(num_batches):
174
+ if weighted:
175
+ incbs.partial_fit(data_split[i], weights_split[i], queue=queue)
176
+ else:
177
+ incbs.partial_fit(data_split[i], queue=queue)
178
+ result = incbs.finalize_fit()
179
+
180
+ if weighted:
181
+ weighted_data = np.diag(weights) @ data
182
+
183
+ for result_option in options_and_tests:
184
+ function, tols = options_and_tests[result_option]
185
+ fp32tol, fp64tol = tols
186
+ res = getattr(result, result_option)
187
+ if weighted:
188
+ gtr = function(weighted_data)
189
+ else:
190
+ gtr = function(data)
191
+ tol = fp32tol if res.dtype == np.float32 else fp64tol
192
+ assert_allclose(gtr, res, atol=tol)
193
+
194
+
195
+ @pytest.mark.parametrize("queue", get_queues())
196
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
197
+ def test_incremental_estimator_pickle(queue, dtype):
198
+ import pickle
199
+
200
+ from onedal.basic_statistics import IncrementalBasicStatistics
201
+
202
+ incbs = IncrementalBasicStatistics()
203
+
204
+ # Check that estimator can be serialized without any data.
205
+ dump = pickle.dumps(incbs)
206
+ incbs_loaded = pickle.loads(dump)
207
+ seed = 77
208
+ gen = np.random.default_rng(seed)
209
+ X = gen.uniform(low=-0.3, high=+0.7, size=(10, 10))
210
+ X = X.astype(dtype)
211
+ X_split = np.array_split(X, 2)
212
+ incbs.partial_fit(X_split[0], queue=queue)
213
+ incbs_loaded.partial_fit(X_split[0], queue=queue)
214
+
215
+ assert incbs._need_to_finalize == True
216
+ assert incbs_loaded._need_to_finalize == True
217
+
218
+ # Check that estimator can be serialized after partial_fit call.
219
+ dump = pickle.dumps(incbs)
220
+ incbs_loaded = pickle.loads(dump)
221
+ assert incbs._need_to_finalize == False
222
+ # Finalize is called during serialization to make sure partial results are finalized correctly.
223
+ assert incbs_loaded._need_to_finalize == False
224
+
225
+ partial_n_rows = from_table(incbs._partial_result.partial_n_rows)
226
+ partial_n_rows_loaded = from_table(incbs_loaded._partial_result.partial_n_rows)
227
+ assert_allclose(partial_n_rows, partial_n_rows_loaded)
228
+
229
+ partial_min = from_table(incbs._partial_result.partial_min)
230
+ partial_min_loaded = from_table(incbs_loaded._partial_result.partial_min)
231
+ assert_allclose(partial_min, partial_min_loaded)
232
+
233
+ partial_max = from_table(incbs._partial_result.partial_max)
234
+ partial_max_loaded = from_table(incbs_loaded._partial_result.partial_max)
235
+ assert_allclose(partial_max, partial_max_loaded)
236
+
237
+ partial_sum = from_table(incbs._partial_result.partial_sum)
238
+ partial_sum_loaded = from_table(incbs_loaded._partial_result.partial_sum)
239
+ assert_allclose(partial_sum, partial_sum_loaded)
240
+
241
+ partial_sum_squares = from_table(incbs._partial_result.partial_sum_squares)
242
+ partial_sum_squares_loaded = from_table(
243
+ incbs_loaded._partial_result.partial_sum_squares
244
+ )
245
+ assert_allclose(partial_sum_squares, partial_sum_squares_loaded)
246
+
247
+ partial_sum_squares_centered = from_table(
248
+ incbs._partial_result.partial_sum_squares_centered
249
+ )
250
+ partial_sum_squares_centered_loaded = from_table(
251
+ incbs_loaded._partial_result.partial_sum_squares_centered
252
+ )
253
+ assert_allclose(partial_sum_squares_centered, partial_sum_squares_centered_loaded)
254
+
255
+ incbs.partial_fit(X_split[1], queue=queue)
256
+ incbs_loaded.partial_fit(X_split[1], queue=queue)
257
+ assert incbs._need_to_finalize == True
258
+ assert incbs_loaded._need_to_finalize == True
259
+
260
+ dump = pickle.dumps(incbs_loaded)
261
+ incbs_loaded = pickle.loads(dump)
262
+
263
+ assert incbs._need_to_finalize == True
264
+ assert incbs_loaded._need_to_finalize == False
265
+
266
+ incbs.finalize_fit()
267
+ incbs_loaded.finalize_fit()
268
+
269
+ # Check that finalized estimator can be serialized.
270
+ dump = pickle.dumps(incbs_loaded)
271
+ incbs_loaded = pickle.loads(dump)
272
+
273
+ for result_option in options_and_tests:
274
+ _, tols = options_and_tests[result_option]
275
+ fp32tol, fp64tol = tols
276
+ res = getattr(incbs, result_option)
277
+ res_loaded = getattr(incbs_loaded, result_option)
278
+ tol = fp32tol if res.dtype == np.float32 else fp64tol
279
+ assert_allclose(res, res_loaded, atol=tol)
@@ -0,0 +1,50 @@
1
+ # ===============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import numpy as np
18
+
19
+
20
+ # Compute unbiased variation for the columns of array-like X
21
+ def variation(X):
22
+ X_mean = np.mean(X, axis=0)
23
+ if np.all(X_mean):
24
+ # Avoid division by zero
25
+ return np.std(X, axis=0, ddof=1) / X_mean
26
+ else:
27
+ return np.array(
28
+ [
29
+ x / y if y != 0 else np.nan
30
+ for x, y in zip(np.std(X, axis=0, ddof=1), X_mean)
31
+ ]
32
+ )
33
+
34
+
35
+ options_and_tests = {
36
+ "sum": (lambda X: np.sum(X, axis=0), (5e-4, 1e-7)),
37
+ "min": (lambda X: np.min(X, axis=0), (1e-7, 1e-7)),
38
+ "max": (lambda X: np.max(X, axis=0), (1e-7, 1e-7)),
39
+ "mean": (lambda X: np.mean(X, axis=0), (5e-7, 1e-7)),
40
+ # sklearnex computes unbiased variance and standard deviation that is why ddof=1
41
+ "variance": (lambda X: np.var(X, axis=0, ddof=1), (2e-4, 1e-7)),
42
+ "variation": (lambda X: variation(X), (1e-3, 1e-6)),
43
+ "sum_squares": (lambda X: np.sum(np.square(X), axis=0), (2e-4, 1e-7)),
44
+ "sum_squares_centered": (
45
+ lambda X: np.sum(np.square(X - np.mean(X, axis=0)), axis=0),
46
+ (1e-3, 1e-7),
47
+ ),
48
+ "standard_deviation": (lambda X: np.std(X, axis=0, ddof=1), (2e-3, 1e-7)),
49
+ "second_order_raw_moment": (lambda X: np.mean(np.square(X), axis=0), (1e-6, 1e-7)),
50
+ }
@@ -0,0 +1,27 @@
1
+ # ==============================================================================
2
+ # Copyright 2023 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ from daal4py.sklearn._utils import daal_check_version
18
+
19
+ from .dbscan import DBSCAN
20
+ from .kmeans import KMeans, k_means
21
+
22
+ __all__ = ["DBSCAN", "KMeans", "k_means"]
23
+
24
+ if daal_check_version((2023, "P", 200)):
25
+ from .kmeans_init import KMeansInit, kmeans_plusplus
26
+
27
+ __all__ += ["KMeansInit", "kmeans_plusplus"]