scikit-learn-intelex 2025.4.0__py313-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (282) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-313-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-313-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +248 -0
  10. daal4py/sklearn/_utils.py +245 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +236 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +696 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +54 -0
  61. onedal/_device_offload.py +204 -0
  62. onedal/_onedal_py_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-313-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +175 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +242 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +279 -0
  70. onedal/basic_statistics/tests/utils.py +50 -0
  71. onedal/cluster/__init__.py +27 -0
  72. onedal/cluster/dbscan.py +105 -0
  73. onedal/cluster/kmeans.py +557 -0
  74. onedal/cluster/kmeans_init.py +112 -0
  75. onedal/cluster/tests/test_dbscan.py +125 -0
  76. onedal/cluster/tests/test_kmeans.py +88 -0
  77. onedal/cluster/tests/test_kmeans_init.py +93 -0
  78. onedal/common/_base.py +38 -0
  79. onedal/common/_estimator_checks.py +47 -0
  80. onedal/common/_mixin.py +62 -0
  81. onedal/common/_policy.py +55 -0
  82. onedal/common/_spmd_policy.py +30 -0
  83. onedal/common/hyperparameters.py +125 -0
  84. onedal/common/tests/test_policy.py +76 -0
  85. onedal/common/tests/test_sycl.py +128 -0
  86. onedal/covariance/__init__.py +20 -0
  87. onedal/covariance/covariance.py +122 -0
  88. onedal/covariance/incremental_covariance.py +161 -0
  89. onedal/covariance/tests/test_covariance.py +50 -0
  90. onedal/covariance/tests/test_incremental_covariance.py +190 -0
  91. onedal/datatypes/__init__.py +19 -0
  92. onedal/datatypes/_data_conversion.py +121 -0
  93. onedal/datatypes/tests/common.py +126 -0
  94. onedal/datatypes/tests/test_data.py +475 -0
  95. onedal/decomposition/__init__.py +20 -0
  96. onedal/decomposition/incremental_pca.py +214 -0
  97. onedal/decomposition/pca.py +186 -0
  98. onedal/decomposition/tests/test_incremental_pca.py +285 -0
  99. onedal/ensemble/__init__.py +29 -0
  100. onedal/ensemble/forest.py +736 -0
  101. onedal/ensemble/tests/test_random_forest.py +97 -0
  102. onedal/linear_model/__init__.py +27 -0
  103. onedal/linear_model/incremental_linear_model.py +292 -0
  104. onedal/linear_model/linear_model.py +325 -0
  105. onedal/linear_model/logistic_regression.py +247 -0
  106. onedal/linear_model/tests/test_incremental_linear_regression.py +213 -0
  107. onedal/linear_model/tests/test_incremental_ridge_regression.py +171 -0
  108. onedal/linear_model/tests/test_linear_regression.py +259 -0
  109. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  110. onedal/linear_model/tests/test_ridge.py +95 -0
  111. onedal/neighbors/__init__.py +19 -0
  112. onedal/neighbors/neighbors.py +763 -0
  113. onedal/neighbors/tests/test_knn_classification.py +49 -0
  114. onedal/primitives/__init__.py +27 -0
  115. onedal/primitives/get_tree.py +25 -0
  116. onedal/primitives/kernel_functions.py +152 -0
  117. onedal/primitives/tests/test_kernel_functions.py +159 -0
  118. onedal/spmd/__init__.py +25 -0
  119. onedal/spmd/_base.py +30 -0
  120. onedal/spmd/basic_statistics/__init__.py +20 -0
  121. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  122. onedal/spmd/basic_statistics/incremental_basic_statistics.py +71 -0
  123. onedal/spmd/cluster/__init__.py +28 -0
  124. onedal/spmd/cluster/dbscan.py +23 -0
  125. onedal/spmd/cluster/kmeans.py +56 -0
  126. onedal/spmd/covariance/__init__.py +20 -0
  127. onedal/spmd/covariance/covariance.py +26 -0
  128. onedal/spmd/covariance/incremental_covariance.py +83 -0
  129. onedal/spmd/decomposition/__init__.py +20 -0
  130. onedal/spmd/decomposition/incremental_pca.py +124 -0
  131. onedal/spmd/decomposition/pca.py +26 -0
  132. onedal/spmd/ensemble/__init__.py +19 -0
  133. onedal/spmd/ensemble/forest.py +28 -0
  134. onedal/spmd/linear_model/__init__.py +21 -0
  135. onedal/spmd/linear_model/incremental_linear_model.py +101 -0
  136. onedal/spmd/linear_model/linear_model.py +30 -0
  137. onedal/spmd/linear_model/logistic_regression.py +38 -0
  138. onedal/spmd/neighbors/__init__.py +19 -0
  139. onedal/spmd/neighbors/neighbors.py +75 -0
  140. onedal/svm/__init__.py +19 -0
  141. onedal/svm/svm.py +556 -0
  142. onedal/svm/tests/test_csr_svm.py +351 -0
  143. onedal/svm/tests/test_nusvc.py +204 -0
  144. onedal/svm/tests/test_nusvr.py +210 -0
  145. onedal/svm/tests/test_svc.py +176 -0
  146. onedal/svm/tests/test_svr.py +243 -0
  147. onedal/tests/test_common.py +57 -0
  148. onedal/tests/utils/_dataframes_support.py +162 -0
  149. onedal/tests/utils/_device_selection.py +102 -0
  150. onedal/utils/__init__.py +49 -0
  151. onedal/utils/_array_api.py +81 -0
  152. onedal/utils/_dpep_helpers.py +56 -0
  153. onedal/utils/tests/test_validation.py +142 -0
  154. onedal/utils/validation.py +464 -0
  155. scikit_learn_intelex-2025.4.0.dist-info/LICENSE.txt +202 -0
  156. scikit_learn_intelex-2025.4.0.dist-info/METADATA +190 -0
  157. scikit_learn_intelex-2025.4.0.dist-info/RECORD +282 -0
  158. scikit_learn_intelex-2025.4.0.dist-info/WHEEL +5 -0
  159. scikit_learn_intelex-2025.4.0.dist-info/top_level.txt +3 -0
  160. sklearnex/__init__.py +66 -0
  161. sklearnex/__main__.py +58 -0
  162. sklearnex/_config.py +116 -0
  163. sklearnex/_device_offload.py +126 -0
  164. sklearnex/_utils.py +177 -0
  165. sklearnex/basic_statistics/__init__.py +20 -0
  166. sklearnex/basic_statistics/basic_statistics.py +261 -0
  167. sklearnex/basic_statistics/incremental_basic_statistics.py +352 -0
  168. sklearnex/basic_statistics/tests/test_basic_statistics.py +405 -0
  169. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +455 -0
  170. sklearnex/cluster/__init__.py +20 -0
  171. sklearnex/cluster/dbscan.py +197 -0
  172. sklearnex/cluster/k_means.py +397 -0
  173. sklearnex/cluster/tests/test_dbscan.py +38 -0
  174. sklearnex/cluster/tests/test_kmeans.py +157 -0
  175. sklearnex/conftest.py +82 -0
  176. sklearnex/covariance/__init__.py +19 -0
  177. sklearnex/covariance/incremental_covariance.py +405 -0
  178. sklearnex/covariance/tests/test_incremental_covariance.py +287 -0
  179. sklearnex/decomposition/__init__.py +19 -0
  180. sklearnex/decomposition/pca.py +427 -0
  181. sklearnex/decomposition/tests/test_pca.py +58 -0
  182. sklearnex/dispatcher.py +534 -0
  183. sklearnex/doc/third-party-programs.txt +424 -0
  184. sklearnex/ensemble/__init__.py +29 -0
  185. sklearnex/ensemble/_forest.py +2029 -0
  186. sklearnex/ensemble/tests/test_forest.py +140 -0
  187. sklearnex/glob/__main__.py +72 -0
  188. sklearnex/glob/dispatcher.py +101 -0
  189. sklearnex/linear_model/__init__.py +32 -0
  190. sklearnex/linear_model/coordinate_descent.py +30 -0
  191. sklearnex/linear_model/incremental_linear.py +495 -0
  192. sklearnex/linear_model/incremental_ridge.py +432 -0
  193. sklearnex/linear_model/linear.py +346 -0
  194. sklearnex/linear_model/logistic_regression.py +415 -0
  195. sklearnex/linear_model/ridge.py +390 -0
  196. sklearnex/linear_model/tests/test_incremental_linear.py +267 -0
  197. sklearnex/linear_model/tests/test_incremental_ridge.py +214 -0
  198. sklearnex/linear_model/tests/test_linear.py +142 -0
  199. sklearnex/linear_model/tests/test_logreg.py +134 -0
  200. sklearnex/linear_model/tests/test_ridge.py +256 -0
  201. sklearnex/manifold/__init__.py +19 -0
  202. sklearnex/manifold/t_sne.py +26 -0
  203. sklearnex/manifold/tests/test_tsne.py +250 -0
  204. sklearnex/metrics/__init__.py +23 -0
  205. sklearnex/metrics/pairwise.py +22 -0
  206. sklearnex/metrics/ranking.py +20 -0
  207. sklearnex/metrics/tests/test_metrics.py +39 -0
  208. sklearnex/model_selection/__init__.py +21 -0
  209. sklearnex/model_selection/split.py +22 -0
  210. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  211. sklearnex/neighbors/__init__.py +27 -0
  212. sklearnex/neighbors/_lof.py +236 -0
  213. sklearnex/neighbors/common.py +310 -0
  214. sklearnex/neighbors/knn_classification.py +231 -0
  215. sklearnex/neighbors/knn_regression.py +207 -0
  216. sklearnex/neighbors/knn_unsupervised.py +178 -0
  217. sklearnex/neighbors/tests/test_neighbors.py +82 -0
  218. sklearnex/preview/__init__.py +17 -0
  219. sklearnex/preview/covariance/__init__.py +19 -0
  220. sklearnex/preview/covariance/covariance.py +142 -0
  221. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  222. sklearnex/preview/decomposition/__init__.py +19 -0
  223. sklearnex/preview/decomposition/incremental_pca.py +244 -0
  224. sklearnex/preview/decomposition/tests/test_incremental_pca.py +336 -0
  225. sklearnex/spmd/__init__.py +25 -0
  226. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  227. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  228. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  229. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  230. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +306 -0
  231. sklearnex/spmd/cluster/__init__.py +30 -0
  232. sklearnex/spmd/cluster/dbscan.py +50 -0
  233. sklearnex/spmd/cluster/kmeans.py +21 -0
  234. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  235. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +173 -0
  236. sklearnex/spmd/covariance/__init__.py +20 -0
  237. sklearnex/spmd/covariance/covariance.py +21 -0
  238. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  239. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  240. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  241. sklearnex/spmd/decomposition/__init__.py +20 -0
  242. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  243. sklearnex/spmd/decomposition/pca.py +21 -0
  244. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  245. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  246. sklearnex/spmd/ensemble/__init__.py +19 -0
  247. sklearnex/spmd/ensemble/forest.py +71 -0
  248. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  249. sklearnex/spmd/linear_model/__init__.py +21 -0
  250. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  251. sklearnex/spmd/linear_model/linear_model.py +21 -0
  252. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  253. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +331 -0
  254. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  255. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
  256. sklearnex/spmd/neighbors/__init__.py +19 -0
  257. sklearnex/spmd/neighbors/neighbors.py +25 -0
  258. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  259. sklearnex/svm/__init__.py +29 -0
  260. sklearnex/svm/_common.py +339 -0
  261. sklearnex/svm/nusvc.py +371 -0
  262. sklearnex/svm/nusvr.py +170 -0
  263. sklearnex/svm/svc.py +399 -0
  264. sklearnex/svm/svr.py +167 -0
  265. sklearnex/svm/tests/test_svm.py +93 -0
  266. sklearnex/tests/test_common.py +491 -0
  267. sklearnex/tests/test_config.py +123 -0
  268. sklearnex/tests/test_hyperparameters.py +43 -0
  269. sklearnex/tests/test_memory_usage.py +347 -0
  270. sklearnex/tests/test_monkeypatch.py +269 -0
  271. sklearnex/tests/test_n_jobs_support.py +108 -0
  272. sklearnex/tests/test_parallel.py +48 -0
  273. sklearnex/tests/test_patching.py +377 -0
  274. sklearnex/tests/test_run_to_run_stability.py +326 -0
  275. sklearnex/tests/utils/__init__.py +48 -0
  276. sklearnex/tests/utils/base.py +436 -0
  277. sklearnex/tests/utils/spmd.py +198 -0
  278. sklearnex/utils/__init__.py +19 -0
  279. sklearnex/utils/_array_api.py +82 -0
  280. sklearnex/utils/parallel.py +59 -0
  281. sklearnex/utils/tests/test_validation.py +238 -0
  282. sklearnex/utils/validation.py +208 -0
@@ -0,0 +1,405 @@
1
+ # ===============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import numbers
18
+ import warnings
19
+
20
+ import numpy as np
21
+ from scipy import linalg
22
+ from sklearn.base import BaseEstimator, clone
23
+ from sklearn.covariance import EmpiricalCovariance as _sklearn_EmpiricalCovariance
24
+ from sklearn.covariance import log_likelihood
25
+ from sklearn.utils import check_array, gen_batches
26
+ from sklearn.utils.validation import _num_features, check_is_fitted
27
+
28
+ from daal4py.sklearn._n_jobs_support import control_n_jobs
29
+ from daal4py.sklearn._utils import daal_check_version, sklearn_check_version
30
+ from onedal.covariance import (
31
+ IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance,
32
+ )
33
+ from sklearnex import config_context
34
+
35
+ from .._device_offload import dispatch, wrap_output_data
36
+ from .._utils import IntelEstimator, PatchingConditionsChain, register_hyperparameters
37
+ from ..metrics import pairwise_distances
38
+ from ..utils._array_api import get_namespace
39
+
40
+ if sklearn_check_version("1.2"):
41
+ from sklearn.utils._param_validation import Interval
42
+
43
+ if sklearn_check_version("1.6"):
44
+ from sklearn.utils.validation import validate_data
45
+ else:
46
+ validate_data = BaseEstimator._validate_data
47
+
48
+
49
+ @control_n_jobs(decorated_methods=["partial_fit", "fit", "_onedal_finalize_fit"])
50
+ class IncrementalEmpiricalCovariance(IntelEstimator, BaseEstimator):
51
+ """
52
+ Maximum likelihood covariance estimator that allows for the estimation when the data are split into
53
+ batches. The user can use the ``partial_fit`` method to provide a single batch of data or use the ``fit`` method to provide
54
+ the entire dataset.
55
+
56
+ Parameters
57
+ ----------
58
+ store_precision : bool, default=False
59
+ Specifies if the estimated precision is stored.
60
+
61
+ assume_centered : bool, default=False
62
+ If True, data are not centered before computation.
63
+ Useful when working with data whose mean is almost, but not exactly
64
+ zero.
65
+ If False (default), data are centered before computation.
66
+
67
+ batch_size : int, default=None
68
+ The number of samples to use for each batch. Only used when calling
69
+ ``fit``. If ``batch_size`` is ``None``, then ``batch_size``
70
+ is inferred from the data and set to ``5 * n_features``, to provide a
71
+ balance between approximation accuracy and memory consumption.
72
+
73
+ copy : bool, default=True
74
+ If False, X will be overwritten. ``copy=False`` can be used to
75
+ save memory but is unsafe for general use.
76
+
77
+ Attributes
78
+ ----------
79
+ location_ : ndarray of shape (n_features,)
80
+ Estimated location, i.e. the estimated mean.
81
+
82
+ covariance_ : ndarray of shape (n_features, n_features)
83
+ Estimated covariance matrix
84
+
85
+ n_samples_seen_ : int
86
+ The number of samples processed by the estimator. Will be reset on
87
+ new calls to ``fit``, but increments across ``partial_fit`` calls.
88
+
89
+ batch_size_ : int
90
+ Inferred batch size from ``batch_size``.
91
+
92
+ n_features_in_ : int
93
+ Number of features seen during ``fit`` or ``partial_fit``.
94
+
95
+ Note
96
+ ----
97
+ Serializing instances of this class will trigger a forced finalization of calculations.
98
+ Since finalize_fit can't be dispatched without directly provided queue
99
+ and the dispatching policy can't be serialized, the computation is finalized
100
+ during serialization and the policy is not saved in serialized data.
101
+
102
+ Examples
103
+ --------
104
+ >>> import numpy as np
105
+ >>> from sklearnex.covariance import IncrementalEmpiricalCovariance
106
+ >>> inccov = IncrementalEmpiricalCovariance(batch_size=1)
107
+ >>> X = np.array([[1, 2], [3, 4]])
108
+ >>> inccov.partial_fit(X[:1])
109
+ >>> inccov.partial_fit(X[1:])
110
+ >>> inccov.covariance_
111
+ np.array([[1., 1.],[1., 1.]])
112
+ >>> inccov.location_
113
+ np.array([2., 3.])
114
+ >>> inccov.fit(X)
115
+ >>> inccov.covariance_
116
+ np.array([[1., 1.],[1., 1.]])
117
+ >>> inccov.location_
118
+ np.array([2., 3.])
119
+ """
120
+
121
+ _onedal_incremental_covariance = staticmethod(onedal_IncrementalEmpiricalCovariance)
122
+
123
+ if sklearn_check_version("1.2"):
124
+ _parameter_constraints: dict = {
125
+ "store_precision": ["boolean"],
126
+ "assume_centered": ["boolean"],
127
+ "batch_size": [Interval(numbers.Integral, 1, None, closed="left"), None],
128
+ "copy": ["boolean"],
129
+ }
130
+
131
+ get_precision = _sklearn_EmpiricalCovariance.get_precision
132
+ error_norm = wrap_output_data(_sklearn_EmpiricalCovariance.error_norm)
133
+
134
+ def __init__(
135
+ self, *, store_precision=False, assume_centered=False, batch_size=None, copy=True
136
+ ):
137
+ self.assume_centered = assume_centered
138
+ self.store_precision = store_precision
139
+ self.batch_size = batch_size
140
+ self.copy = copy
141
+
142
+ def _onedal_supported(self, method_name, *data):
143
+ patching_status = PatchingConditionsChain(
144
+ f"sklearn.covariance.{self.__class__.__name__}.{method_name}"
145
+ )
146
+ return patching_status
147
+
148
+ def _onedal_finalize_fit(self, queue=None):
149
+ assert hasattr(self, "_onedal_estimator")
150
+ self._onedal_estimator.finalize_fit(queue=queue)
151
+ self._need_to_finalize = False
152
+
153
+ if not daal_check_version((2024, "P", 400)) and self.assume_centered:
154
+ location = self._onedal_estimator.location_[None, :]
155
+ self._onedal_estimator.covariance_ += np.dot(location.T, location)
156
+ self._onedal_estimator.location_ = np.zeros_like(np.squeeze(location))
157
+ if self.store_precision:
158
+ self.precision_ = linalg.pinvh(
159
+ self._onedal_estimator.covariance_, check_finite=False
160
+ )
161
+ else:
162
+ self.precision_ = None
163
+
164
+ @property
165
+ def covariance_(self):
166
+ if hasattr(self, "_onedal_estimator"):
167
+ if self._need_to_finalize:
168
+ self._onedal_finalize_fit()
169
+ return self._onedal_estimator.covariance_
170
+ else:
171
+ raise AttributeError(
172
+ f"'{self.__class__.__name__}' object has no attribute 'covariance_'"
173
+ )
174
+
175
+ @property
176
+ def location_(self):
177
+ if hasattr(self, "_onedal_estimator"):
178
+ if self._need_to_finalize:
179
+ self._onedal_finalize_fit()
180
+ return self._onedal_estimator.location_
181
+ else:
182
+ raise AttributeError(
183
+ f"'{self.__class__.__name__}' object has no attribute 'location_'"
184
+ )
185
+
186
+ def _onedal_partial_fit(self, X, queue=None, check_input=True):
187
+
188
+ first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0
189
+
190
+ # finite check occurs on onedal side
191
+ if check_input:
192
+ if sklearn_check_version("1.2"):
193
+ self._validate_params()
194
+
195
+ if sklearn_check_version("1.0"):
196
+ X = validate_data(
197
+ self,
198
+ X,
199
+ dtype=[np.float64, np.float32],
200
+ reset=first_pass,
201
+ copy=self.copy,
202
+ force_all_finite=False,
203
+ )
204
+ else:
205
+ X = check_array(
206
+ X,
207
+ dtype=[np.float64, np.float32],
208
+ copy=self.copy,
209
+ force_all_finite=False,
210
+ )
211
+
212
+ onedal_params = {
213
+ "method": "dense",
214
+ "bias": True,
215
+ "assume_centered": self.assume_centered,
216
+ }
217
+ if not hasattr(self, "_onedal_estimator"):
218
+ self._onedal_estimator = self._onedal_incremental_covariance(**onedal_params)
219
+ try:
220
+ if first_pass:
221
+ self.n_samples_seen_ = X.shape[0]
222
+ self.n_features_in_ = X.shape[1]
223
+ else:
224
+ self.n_samples_seen_ += X.shape[0]
225
+
226
+ self._onedal_estimator.partial_fit(X, queue=queue)
227
+ finally:
228
+ self._need_to_finalize = True
229
+
230
+ return self
231
+
232
+ @wrap_output_data
233
+ def score(self, X_test, y=None):
234
+ xp, _ = get_namespace(X_test)
235
+
236
+ check_is_fitted(self)
237
+ location = self.location_
238
+ if sklearn_check_version("1.0"):
239
+ X = validate_data(
240
+ self,
241
+ X_test,
242
+ dtype=[np.float64, np.float32],
243
+ reset=False,
244
+ )
245
+ else:
246
+ X = check_array(
247
+ X_test,
248
+ dtype=[np.float64, np.float32],
249
+ )
250
+
251
+ if "numpy" not in xp.__name__:
252
+ location = xp.asarray(location, device=X_test.device)
253
+ # depending on the sklearn version, check_array
254
+ # and validate_data will return only numpy arrays
255
+ # which will break dpnp/dpctl support. If the
256
+ # array namespace isn't from numpy and the data
257
+ # is now a numpy array, it has been validated and
258
+ # the original can be used.
259
+ if isinstance(X, np.ndarray):
260
+ X = X_test
261
+
262
+ est = clone(self)
263
+ est.set_params(**{"assume_centered": True})
264
+
265
+ # test_cov is a numpy array, but calculated on device
266
+ test_cov = est.fit(X - location).covariance_
267
+ res = log_likelihood(test_cov, self.get_precision())
268
+
269
+ return res
270
+
271
+ def partial_fit(self, X, y=None, check_input=True):
272
+ """
273
+ Incremental fit with X. All of X is processed as a single batch.
274
+
275
+ Parameters
276
+ ----------
277
+ X : array-like of shape (n_samples, n_features)
278
+ Training data, where `n_samples` is the number of samples and
279
+ `n_features` is the number of features.
280
+
281
+ y : Ignored
282
+ Not used, present for API consistency by convention.
283
+
284
+ check_input : bool, default=True
285
+ Run check_array on X.
286
+
287
+ Returns
288
+ -------
289
+ self : object
290
+ Returns the instance itself.
291
+ """
292
+ return dispatch(
293
+ self,
294
+ "partial_fit",
295
+ {
296
+ "onedal": self.__class__._onedal_partial_fit,
297
+ "sklearn": None,
298
+ },
299
+ X,
300
+ check_input=check_input,
301
+ )
302
+
303
+ def fit(self, X, y=None):
304
+ """
305
+ Fit the model with X, using minibatches of size batch_size.
306
+
307
+ Parameters
308
+ ----------
309
+ X : array-like of shape (n_samples, n_features)
310
+ Training data, where `n_samples` is the number of samples and
311
+ `n_features` is the number of features.
312
+
313
+ y : Ignored
314
+ Not used, present for API consistency by convention.
315
+
316
+ Returns
317
+ -------
318
+ self : object
319
+ Returns the instance itself.
320
+ """
321
+
322
+ return dispatch(
323
+ self,
324
+ "fit",
325
+ {
326
+ "onedal": self.__class__._onedal_fit,
327
+ "sklearn": None,
328
+ },
329
+ X,
330
+ )
331
+
332
+ def _onedal_fit(self, X, queue=None):
333
+ self.n_samples_seen_ = 0
334
+ if hasattr(self, "_onedal_estimator"):
335
+ self._onedal_estimator._reset()
336
+
337
+ if sklearn_check_version("1.2"):
338
+ self._validate_params()
339
+
340
+ # finite check occurs on onedal side
341
+ if sklearn_check_version("1.0"):
342
+ X = validate_data(
343
+ self,
344
+ X,
345
+ dtype=[np.float64, np.float32],
346
+ copy=self.copy,
347
+ force_all_finite=False,
348
+ )
349
+ else:
350
+ X = check_array(
351
+ X, dtype=[np.float64, np.float32], copy=self.copy, force_all_finite=False
352
+ )
353
+ self.n_features_in_ = X.shape[1]
354
+
355
+ self.batch_size_ = self.batch_size if self.batch_size else 5 * self.n_features_in_
356
+
357
+ if X.shape[0] == 1:
358
+ warnings.warn(
359
+ "Only one sample available. You may want to reshape your data array"
360
+ )
361
+
362
+ for batch in gen_batches(X.shape[0], self.batch_size_):
363
+ X_batch = X[batch]
364
+ self._onedal_partial_fit(X_batch, queue=queue, check_input=False)
365
+
366
+ self._onedal_finalize_fit(queue=queue)
367
+
368
+ return self
369
+
370
+ # expose sklearnex pairwise_distances if mahalanobis distance eventually supported
371
+ def mahalanobis(self, X):
372
+ if sklearn_check_version("1.0"):
373
+ self._check_feature_names(X, reset=False)
374
+
375
+ xp, _ = get_namespace(X)
376
+ precision = self.get_precision()
377
+ # compute mahalanobis distances
378
+ # pairwise_distances will check n_features (via n_feature matching with
379
+ # self.location_) , and will check for finiteness via check array
380
+ # check_feature_names will match _validate_data functionally
381
+ location = self.location_[np.newaxis, :]
382
+ if "numpy" not in xp.__name__:
383
+ # Guarantee that inputs to pairwise_distances match in type and location
384
+ location = xp.asarray(location, device=X.device)
385
+
386
+ try:
387
+ dist = pairwise_distances(X, location, metric="mahalanobis", VI=precision)
388
+ except ValueError as e:
389
+ # Throw the expected sklearn error in an n_feature length violation
390
+ if "Incompatible dimension for X and Y matrices: X.shape[1] ==" in str(e):
391
+ raise ValueError(
392
+ f"X has {_num_features(X)} features, but {self.__class__.__name__} "
393
+ f"is expecting {self.n_features_in_} features as input."
394
+ )
395
+ else:
396
+ raise e
397
+
398
+ return (xp.reshape(dist, (-1,))) ** 2
399
+
400
+ _onedal_cpu_supported = _onedal_supported
401
+ _onedal_gpu_supported = _onedal_supported
402
+
403
+ mahalanobis.__doc__ = _sklearn_EmpiricalCovariance.mahalanobis.__doc__
404
+ error_norm.__doc__ = _sklearn_EmpiricalCovariance.error_norm.__doc__
405
+ score.__doc__ = _sklearn_EmpiricalCovariance.score.__doc__
@@ -0,0 +1,287 @@
1
+ # ===============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ from os import environ
18
+
19
+ from daal4py.sklearn._utils import sklearn_check_version
20
+
21
+ # sklearn requires manual enabling of Scipy array API support
22
+ # if `array-api-compat` package is present in environment
23
+ # TODO: create generic approach to handle this for all tests
24
+ if sklearn_check_version("1.6"):
25
+ environ["SCIPY_ARRAY_API"] = "1"
26
+
27
+
28
+ import numpy as np
29
+ import pytest
30
+ from numpy.linalg import slogdet
31
+ from numpy.testing import assert_allclose
32
+ from scipy.linalg import pinvh
33
+ from sklearn.covariance.tests.test_covariance import (
34
+ test_covariance,
35
+ test_EmpiricalCovariance_validates_mahalanobis,
36
+ )
37
+ from sklearn.datasets import load_diabetes
38
+ from sklearn.decomposition import PCA
39
+
40
+ from daal4py.sklearn._utils import daal_check_version
41
+ from onedal.tests.utils._dataframes_support import (
42
+ _as_numpy,
43
+ _convert_to_dataframe,
44
+ get_dataframes_and_queues,
45
+ )
46
+
47
+
48
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
49
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
50
+ @pytest.mark.parametrize("assume_centered", [True, False])
51
+ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype, assume_centered):
52
+ is_gpu = queue is not None and queue.sycl_device.is_gpu
53
+ if assume_centered and is_gpu and not daal_check_version((2025, "P", 0)):
54
+ pytest.skip(
55
+ "Due to a bug on oneDAL side, means are not set to zero when assume_centered=True"
56
+ )
57
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
58
+
59
+ X = np.array([[0, 1], [0, 1]])
60
+ X = X.astype(dtype)
61
+ X_split = np.array_split(X, 2)
62
+ inccov = IncrementalEmpiricalCovariance(assume_centered=assume_centered)
63
+
64
+ for i in range(2):
65
+ X_split_df = _convert_to_dataframe(
66
+ X_split[i], sycl_queue=queue, target_df=dataframe
67
+ )
68
+ result = inccov.partial_fit(X_split_df)
69
+
70
+ if assume_centered:
71
+ expected_covariance = np.array([[0, 0], [0, 1]])
72
+ expected_means = np.array([0, 0])
73
+ else:
74
+ expected_covariance = np.array([[0, 0], [0, 0]])
75
+ expected_means = np.array([0, 1])
76
+
77
+ assert_allclose(expected_covariance, result.covariance_)
78
+ assert_allclose(expected_means, result.location_)
79
+
80
+ X = np.array([[1, 2], [3, 6]])
81
+ X = X.astype(dtype)
82
+ X_split = np.array_split(X, 2)
83
+ inccov = IncrementalEmpiricalCovariance(assume_centered=assume_centered)
84
+
85
+ for i in range(2):
86
+ X_split_df = _convert_to_dataframe(
87
+ X_split[i], sycl_queue=queue, target_df=dataframe
88
+ )
89
+ result = inccov.partial_fit(X_split_df)
90
+
91
+ if assume_centered:
92
+ expected_covariance = np.array([[5, 10], [10, 20]])
93
+ expected_means = np.array([0, 0])
94
+ else:
95
+ expected_covariance = np.array([[1, 2], [2, 4]])
96
+ expected_means = np.array([2, 4])
97
+
98
+ assert_allclose(expected_covariance, result.covariance_)
99
+ assert_allclose(expected_means, result.location_)
100
+
101
+
102
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
103
+ @pytest.mark.parametrize("batch_size", [2, 4])
104
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
105
+ def test_sklearnex_fit_on_gold_data(dataframe, queue, batch_size, dtype):
106
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
107
+
108
+ X = np.array([[0, 1, 2, 3], [0, -1, -2, -3], [0, 1, 2, 3], [0, 1, 2, 3]])
109
+ X = X.astype(dtype)
110
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
111
+ inccov = IncrementalEmpiricalCovariance(batch_size=batch_size)
112
+
113
+ result = inccov.fit(X_df)
114
+
115
+ expected_covariance = np.array(
116
+ [[0, 0, 0, 0], [0, 0.75, 1.5, 2.25], [0, 1.5, 3, 4.5], [0, 2.25, 4.5, 6.75]]
117
+ )
118
+ expected_means = np.array([0, 0.5, 1, 1.5])
119
+
120
+ assert_allclose(expected_covariance, result.covariance_)
121
+ assert_allclose(expected_means, result.location_)
122
+
123
+
124
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
125
+ @pytest.mark.parametrize("num_batches", [2, 10])
126
+ @pytest.mark.parametrize("row_count", [100, 1000])
127
+ @pytest.mark.parametrize("column_count", [10, 100])
128
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
129
+ def test_sklearnex_partial_fit_on_random_data(
130
+ dataframe, queue, num_batches, row_count, column_count, dtype
131
+ ):
132
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
133
+
134
+ seed = 77
135
+ gen = np.random.default_rng(seed)
136
+ X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
137
+ X = X.astype(dtype)
138
+ X_split = np.array_split(X, num_batches)
139
+ inccov = IncrementalEmpiricalCovariance()
140
+
141
+ for i in range(num_batches):
142
+ X_split_df = _convert_to_dataframe(
143
+ X_split[i], sycl_queue=queue, target_df=dataframe
144
+ )
145
+ result = inccov.partial_fit(X_split_df)
146
+
147
+ expected_covariance = np.cov(X.T, bias=1)
148
+ expected_means = np.mean(X, axis=0)
149
+
150
+ assert_allclose(expected_covariance, result.covariance_, atol=1e-6)
151
+ assert_allclose(expected_means, result.location_, atol=1e-6)
152
+
153
+
154
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
155
+ @pytest.mark.parametrize("num_batches", [2, 10])
156
+ @pytest.mark.parametrize("row_count", [100, 1000])
157
+ @pytest.mark.parametrize("column_count", [10, 100])
158
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
159
+ @pytest.mark.parametrize("assume_centered", [True, False])
160
+ def test_sklearnex_fit_on_random_data(
161
+ dataframe, queue, num_batches, row_count, column_count, dtype, assume_centered
162
+ ):
163
+ is_gpu = queue is not None and queue.sycl_device.is_gpu
164
+ if assume_centered and is_gpu and not daal_check_version((2025, "P", 0)):
165
+ pytest.skip(
166
+ "Due to a bug on oneDAL side, means are not set to zero when assume_centered=True"
167
+ )
168
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
169
+
170
+ seed = 77
171
+ gen = np.random.default_rng(seed)
172
+ X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
173
+ X = X.astype(dtype)
174
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
175
+ batch_size = row_count // num_batches
176
+ inccov = IncrementalEmpiricalCovariance(
177
+ batch_size=batch_size, assume_centered=assume_centered
178
+ )
179
+
180
+ result = inccov.fit(X_df)
181
+
182
+ if assume_centered:
183
+ expected_covariance = np.dot(X.T, X) / X.shape[0]
184
+ expected_means = np.zeros_like(X[0])
185
+ else:
186
+ expected_covariance = np.cov(X.T, bias=1)
187
+ expected_means = np.mean(X, axis=0)
188
+
189
+ assert_allclose(expected_covariance, result.covariance_, atol=1e-6)
190
+ assert_allclose(expected_means, result.location_, atol=1e-6)
191
+
192
+
193
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
194
+ def test_whitened_toy_score(dataframe, queue):
195
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
196
+
197
+ # Load a sklearn toy dataset with sufficient data
198
+ X, _ = load_diabetes(return_X_y=True)
199
+ n = X.shape[1]
200
+
201
+ # Transform the data into uncorrelated, unity variance components
202
+ X = PCA(whiten=True).fit_transform(X)
203
+
204
+ # change dataframe
205
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
206
+
207
+ # fit data
208
+ est = IncrementalEmpiricalCovariance()
209
+ est.fit(X_df)
210
+ # location_ attribute approximately zero (10,), covariance_ identity (10,10)
211
+
212
+ # The log-likelihood can be calculated simply due to covariance_
213
+ # use of scipy.linalg.pinvh, np.linalg.sloget and np.cov for estimator
214
+ # independence
215
+ expected_result = (
216
+ -(n - slogdet(pinvh(np.cov(X.T, bias=1)))[1] + n * np.log(2 * np.pi)) / 2
217
+ )
218
+ # expected_result = -14.1780602988
219
+ result = _as_numpy(est.score(X_df))
220
+ assert_allclose(expected_result, result, atol=1e-6)
221
+
222
+
223
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
224
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
225
+ def test_sklearnex_incremental_estimatior_pickle(dataframe, queue, dtype):
226
+ import pickle
227
+
228
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
229
+
230
+ inccov = IncrementalEmpiricalCovariance()
231
+
232
+ # Check that estimator can be serialized without any data.
233
+ dump = pickle.dumps(inccov)
234
+ inccov_loaded = pickle.loads(dump)
235
+
236
+ seed = 77
237
+ gen = np.random.default_rng(seed)
238
+ X = gen.uniform(low=-0.3, high=+0.7, size=(10, 10))
239
+ X = X.astype(dtype)
240
+ X_split = np.array_split(X, 2)
241
+ X_split_df = _convert_to_dataframe(X_split[0], sycl_queue=queue, target_df=dataframe)
242
+ inccov.partial_fit(X_split_df)
243
+ inccov_loaded.partial_fit(X_split_df)
244
+
245
+ # Check that estimator can be serialized after partial_fit call.
246
+ dump = pickle.dumps(inccov_loaded)
247
+ inccov_loaded = pickle.loads(dump)
248
+
249
+ assert inccov.batch_size == inccov_loaded.batch_size
250
+ assert inccov.n_features_in_ == inccov_loaded.n_features_in_
251
+ assert inccov.n_samples_seen_ == inccov_loaded.n_samples_seen_
252
+ if hasattr(inccov, "_parameter_constraints"):
253
+ assert inccov._parameter_constraints == inccov_loaded._parameter_constraints
254
+ assert inccov.n_jobs == inccov_loaded.n_jobs
255
+
256
+ X_split_df = _convert_to_dataframe(X_split[1], sycl_queue=queue, target_df=dataframe)
257
+ inccov.partial_fit(X_split_df)
258
+ inccov_loaded.partial_fit(X_split_df)
259
+ dump = pickle.dumps(inccov)
260
+ inccov_loaded = pickle.loads(dump)
261
+
262
+ assert_allclose(inccov.location_, inccov_loaded.location_, atol=1e-6)
263
+ assert_allclose(inccov.covariance_, inccov_loaded.covariance_, atol=1e-6)
264
+
265
+ # Check that finalized estimator can be serialized.
266
+ dump = pickle.dumps(inccov_loaded)
267
+ inccov_loaded = pickle.loads(dump)
268
+
269
+ assert_allclose(inccov.location_, inccov_loaded.location_, atol=1e-6)
270
+ assert_allclose(inccov.covariance_, inccov_loaded.covariance_, atol=1e-6)
271
+
272
+
273
+ # Monkeypatch IncrementalEmpiricalCovariance into relevant sklearn.covariance tests
274
+ @pytest.mark.allow_sklearn_fallback
275
+ @pytest.mark.parametrize(
276
+ "sklearn_test",
277
+ [
278
+ test_covariance,
279
+ test_EmpiricalCovariance_validates_mahalanobis,
280
+ ],
281
+ )
282
+ def test_IncrementalEmpiricalCovariance_against_sklearn(monkeypatch, sklearn_test):
283
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
284
+
285
+ class_name = ".".join([sklearn_test.__module__, "EmpiricalCovariance"])
286
+ monkeypatch.setattr(class_name, IncrementalEmpiricalCovariance)
287
+ sklearn_test()