scikit-learn-intelex 2025.1.0__py39-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (280) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-39-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-39-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +248 -0
  10. daal4py/sklearn/_utils.py +245 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +236 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +693 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +54 -0
  61. onedal/_device_offload.py +222 -0
  62. onedal/_onedal_py_dpc.cpython-39-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-39-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-39-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +160 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
  70. onedal/cluster/__init__.py +27 -0
  71. onedal/cluster/dbscan.py +110 -0
  72. onedal/cluster/kmeans.py +564 -0
  73. onedal/cluster/kmeans_init.py +115 -0
  74. onedal/cluster/tests/test_dbscan.py +125 -0
  75. onedal/cluster/tests/test_kmeans.py +88 -0
  76. onedal/cluster/tests/test_kmeans_init.py +93 -0
  77. onedal/common/_base.py +38 -0
  78. onedal/common/_estimator_checks.py +47 -0
  79. onedal/common/_mixin.py +62 -0
  80. onedal/common/_policy.py +59 -0
  81. onedal/common/_spmd_policy.py +30 -0
  82. onedal/common/hyperparameters.py +125 -0
  83. onedal/common/tests/test_policy.py +76 -0
  84. onedal/covariance/__init__.py +20 -0
  85. onedal/covariance/covariance.py +125 -0
  86. onedal/covariance/incremental_covariance.py +146 -0
  87. onedal/covariance/tests/test_covariance.py +50 -0
  88. onedal/covariance/tests/test_incremental_covariance.py +122 -0
  89. onedal/datatypes/__init__.py +19 -0
  90. onedal/datatypes/_data_conversion.py +154 -0
  91. onedal/datatypes/tests/common.py +126 -0
  92. onedal/datatypes/tests/test_data.py +414 -0
  93. onedal/decomposition/__init__.py +20 -0
  94. onedal/decomposition/incremental_pca.py +204 -0
  95. onedal/decomposition/pca.py +186 -0
  96. onedal/decomposition/tests/test_incremental_pca.py +198 -0
  97. onedal/ensemble/__init__.py +29 -0
  98. onedal/ensemble/forest.py +727 -0
  99. onedal/ensemble/tests/test_random_forest.py +97 -0
  100. onedal/linear_model/__init__.py +27 -0
  101. onedal/linear_model/incremental_linear_model.py +258 -0
  102. onedal/linear_model/linear_model.py +329 -0
  103. onedal/linear_model/logistic_regression.py +249 -0
  104. onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
  105. onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
  106. onedal/linear_model/tests/test_linear_regression.py +250 -0
  107. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  108. onedal/linear_model/tests/test_ridge.py +95 -0
  109. onedal/neighbors/__init__.py +19 -0
  110. onedal/neighbors/neighbors.py +767 -0
  111. onedal/neighbors/tests/test_knn_classification.py +49 -0
  112. onedal/primitives/__init__.py +27 -0
  113. onedal/primitives/get_tree.py +25 -0
  114. onedal/primitives/kernel_functions.py +153 -0
  115. onedal/primitives/tests/test_kernel_functions.py +159 -0
  116. onedal/spmd/__init__.py +25 -0
  117. onedal/spmd/_base.py +30 -0
  118. onedal/spmd/basic_statistics/__init__.py +20 -0
  119. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  120. onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
  121. onedal/spmd/cluster/__init__.py +28 -0
  122. onedal/spmd/cluster/dbscan.py +23 -0
  123. onedal/spmd/cluster/kmeans.py +56 -0
  124. onedal/spmd/covariance/__init__.py +20 -0
  125. onedal/spmd/covariance/covariance.py +26 -0
  126. onedal/spmd/covariance/incremental_covariance.py +82 -0
  127. onedal/spmd/decomposition/__init__.py +20 -0
  128. onedal/spmd/decomposition/incremental_pca.py +117 -0
  129. onedal/spmd/decomposition/pca.py +26 -0
  130. onedal/spmd/ensemble/__init__.py +19 -0
  131. onedal/spmd/ensemble/forest.py +28 -0
  132. onedal/spmd/linear_model/__init__.py +21 -0
  133. onedal/spmd/linear_model/incremental_linear_model.py +97 -0
  134. onedal/spmd/linear_model/linear_model.py +30 -0
  135. onedal/spmd/linear_model/logistic_regression.py +38 -0
  136. onedal/spmd/neighbors/__init__.py +19 -0
  137. onedal/spmd/neighbors/neighbors.py +75 -0
  138. onedal/svm/__init__.py +19 -0
  139. onedal/svm/svm.py +556 -0
  140. onedal/svm/tests/test_csr_svm.py +351 -0
  141. onedal/svm/tests/test_nusvc.py +204 -0
  142. onedal/svm/tests/test_nusvr.py +210 -0
  143. onedal/svm/tests/test_svc.py +176 -0
  144. onedal/svm/tests/test_svr.py +243 -0
  145. onedal/tests/test_common.py +57 -0
  146. onedal/tests/utils/_dataframes_support.py +162 -0
  147. onedal/tests/utils/_device_selection.py +102 -0
  148. onedal/utils/__init__.py +49 -0
  149. onedal/utils/_array_api.py +81 -0
  150. onedal/utils/_dpep_helpers.py +56 -0
  151. onedal/utils/validation.py +440 -0
  152. scikit_learn_intelex-2025.1.0.dist-info/LICENSE.txt +202 -0
  153. scikit_learn_intelex-2025.1.0.dist-info/METADATA +231 -0
  154. scikit_learn_intelex-2025.1.0.dist-info/RECORD +280 -0
  155. scikit_learn_intelex-2025.1.0.dist-info/WHEEL +5 -0
  156. scikit_learn_intelex-2025.1.0.dist-info/top_level.txt +3 -0
  157. sklearnex/__init__.py +66 -0
  158. sklearnex/__main__.py +58 -0
  159. sklearnex/_config.py +116 -0
  160. sklearnex/_device_offload.py +126 -0
  161. sklearnex/_utils.py +132 -0
  162. sklearnex/basic_statistics/__init__.py +20 -0
  163. sklearnex/basic_statistics/basic_statistics.py +230 -0
  164. sklearnex/basic_statistics/incremental_basic_statistics.py +345 -0
  165. sklearnex/basic_statistics/tests/test_basic_statistics.py +270 -0
  166. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +404 -0
  167. sklearnex/cluster/__init__.py +20 -0
  168. sklearnex/cluster/dbscan.py +197 -0
  169. sklearnex/cluster/k_means.py +395 -0
  170. sklearnex/cluster/tests/test_dbscan.py +38 -0
  171. sklearnex/cluster/tests/test_kmeans.py +159 -0
  172. sklearnex/conftest.py +82 -0
  173. sklearnex/covariance/__init__.py +19 -0
  174. sklearnex/covariance/incremental_covariance.py +398 -0
  175. sklearnex/covariance/tests/test_incremental_covariance.py +237 -0
  176. sklearnex/decomposition/__init__.py +19 -0
  177. sklearnex/decomposition/pca.py +425 -0
  178. sklearnex/decomposition/tests/test_pca.py +58 -0
  179. sklearnex/dispatcher.py +543 -0
  180. sklearnex/doc/third-party-programs.txt +424 -0
  181. sklearnex/ensemble/__init__.py +29 -0
  182. sklearnex/ensemble/_forest.py +2029 -0
  183. sklearnex/ensemble/tests/test_forest.py +135 -0
  184. sklearnex/glob/__main__.py +72 -0
  185. sklearnex/glob/dispatcher.py +101 -0
  186. sklearnex/linear_model/__init__.py +32 -0
  187. sklearnex/linear_model/coordinate_descent.py +30 -0
  188. sklearnex/linear_model/incremental_linear.py +482 -0
  189. sklearnex/linear_model/incremental_ridge.py +425 -0
  190. sklearnex/linear_model/linear.py +341 -0
  191. sklearnex/linear_model/logistic_regression.py +413 -0
  192. sklearnex/linear_model/ridge.py +24 -0
  193. sklearnex/linear_model/tests/test_incremental_linear.py +207 -0
  194. sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
  195. sklearnex/linear_model/tests/test_linear.py +167 -0
  196. sklearnex/linear_model/tests/test_logreg.py +134 -0
  197. sklearnex/manifold/__init__.py +19 -0
  198. sklearnex/manifold/t_sne.py +21 -0
  199. sklearnex/manifold/tests/test_tsne.py +26 -0
  200. sklearnex/metrics/__init__.py +23 -0
  201. sklearnex/metrics/pairwise.py +22 -0
  202. sklearnex/metrics/ranking.py +20 -0
  203. sklearnex/metrics/tests/test_metrics.py +39 -0
  204. sklearnex/model_selection/__init__.py +21 -0
  205. sklearnex/model_selection/split.py +22 -0
  206. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  207. sklearnex/neighbors/__init__.py +27 -0
  208. sklearnex/neighbors/_lof.py +236 -0
  209. sklearnex/neighbors/common.py +310 -0
  210. sklearnex/neighbors/knn_classification.py +231 -0
  211. sklearnex/neighbors/knn_regression.py +207 -0
  212. sklearnex/neighbors/knn_unsupervised.py +178 -0
  213. sklearnex/neighbors/tests/test_neighbors.py +82 -0
  214. sklearnex/preview/__init__.py +17 -0
  215. sklearnex/preview/covariance/__init__.py +19 -0
  216. sklearnex/preview/covariance/covariance.py +138 -0
  217. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  218. sklearnex/preview/decomposition/__init__.py +19 -0
  219. sklearnex/preview/decomposition/incremental_pca.py +233 -0
  220. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  221. sklearnex/preview/linear_model/__init__.py +19 -0
  222. sklearnex/preview/linear_model/ridge.py +424 -0
  223. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  224. sklearnex/spmd/__init__.py +25 -0
  225. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  226. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  227. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  228. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  229. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
  230. sklearnex/spmd/cluster/__init__.py +30 -0
  231. sklearnex/spmd/cluster/dbscan.py +50 -0
  232. sklearnex/spmd/cluster/kmeans.py +21 -0
  233. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  234. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  235. sklearnex/spmd/covariance/__init__.py +20 -0
  236. sklearnex/spmd/covariance/covariance.py +21 -0
  237. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  238. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  239. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  240. sklearnex/spmd/decomposition/__init__.py +20 -0
  241. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  242. sklearnex/spmd/decomposition/pca.py +21 -0
  243. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  244. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  245. sklearnex/spmd/ensemble/__init__.py +19 -0
  246. sklearnex/spmd/ensemble/forest.py +71 -0
  247. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  248. sklearnex/spmd/linear_model/__init__.py +21 -0
  249. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  250. sklearnex/spmd/linear_model/linear_model.py +21 -0
  251. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  252. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
  253. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  254. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
  255. sklearnex/spmd/neighbors/__init__.py +19 -0
  256. sklearnex/spmd/neighbors/neighbors.py +25 -0
  257. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  258. sklearnex/svm/__init__.py +29 -0
  259. sklearnex/svm/_common.py +339 -0
  260. sklearnex/svm/nusvc.py +371 -0
  261. sklearnex/svm/nusvr.py +170 -0
  262. sklearnex/svm/svc.py +399 -0
  263. sklearnex/svm/svr.py +167 -0
  264. sklearnex/svm/tests/test_svm.py +93 -0
  265. sklearnex/tests/test_common.py +390 -0
  266. sklearnex/tests/test_config.py +123 -0
  267. sklearnex/tests/test_memory_usage.py +379 -0
  268. sklearnex/tests/test_monkeypatch.py +276 -0
  269. sklearnex/tests/test_n_jobs_support.py +108 -0
  270. sklearnex/tests/test_parallel.py +48 -0
  271. sklearnex/tests/test_patching.py +385 -0
  272. sklearnex/tests/test_run_to_run_stability.py +321 -0
  273. sklearnex/tests/utils/__init__.py +44 -0
  274. sklearnex/tests/utils/base.py +371 -0
  275. sklearnex/tests/utils/spmd.py +198 -0
  276. sklearnex/utils/__init__.py +19 -0
  277. sklearnex/utils/_array_api.py +82 -0
  278. sklearnex/utils/parallel.py +59 -0
  279. sklearnex/utils/tests/test_finite.py +89 -0
  280. sklearnex/utils/validation.py +17 -0
@@ -0,0 +1,398 @@
1
+ # ===============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import numbers
18
+ import warnings
19
+
20
+ import numpy as np
21
+ from scipy import linalg
22
+ from sklearn.base import BaseEstimator, clone
23
+ from sklearn.covariance import EmpiricalCovariance as _sklearn_EmpiricalCovariance
24
+ from sklearn.covariance import log_likelihood
25
+ from sklearn.utils import check_array, gen_batches
26
+ from sklearn.utils.validation import _num_features, check_is_fitted
27
+
28
+ from daal4py.sklearn._n_jobs_support import control_n_jobs
29
+ from daal4py.sklearn._utils import daal_check_version, sklearn_check_version
30
+ from onedal.covariance import (
31
+ IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance,
32
+ )
33
+ from sklearnex import config_context
34
+
35
+ from .._device_offload import dispatch, wrap_output_data
36
+ from .._utils import IntelEstimator, PatchingConditionsChain, register_hyperparameters
37
+ from ..metrics import pairwise_distances
38
+ from ..utils._array_api import get_namespace
39
+
40
+ if sklearn_check_version("1.2"):
41
+ from sklearn.utils._param_validation import Interval
42
+
43
+ if sklearn_check_version("1.6"):
44
+ from sklearn.utils.validation import validate_data
45
+ else:
46
+ validate_data = BaseEstimator._validate_data
47
+
48
+
49
+ @control_n_jobs(decorated_methods=["partial_fit", "fit", "_onedal_finalize_fit"])
50
+ class IncrementalEmpiricalCovariance(IntelEstimator, BaseEstimator):
51
+ """
52
+ Maximum likelihood covariance estimator that allows for the estimation when the data are split into
53
+ batches. The user can use the ``partial_fit`` method to provide a single batch of data or use the ``fit`` method to provide
54
+ the entire dataset.
55
+
56
+ Parameters
57
+ ----------
58
+ store_precision : bool, default=False
59
+ Specifies if the estimated precision is stored.
60
+
61
+ assume_centered : bool, default=False
62
+ If True, data are not centered before computation.
63
+ Useful when working with data whose mean is almost, but not exactly
64
+ zero.
65
+ If False (default), data are centered before computation.
66
+
67
+ batch_size : int, default=None
68
+ The number of samples to use for each batch. Only used when calling
69
+ ``fit``. If ``batch_size`` is ``None``, then ``batch_size``
70
+ is inferred from the data and set to ``5 * n_features``, to provide a
71
+ balance between approximation accuracy and memory consumption.
72
+
73
+ copy : bool, default=True
74
+ If False, X will be overwritten. ``copy=False`` can be used to
75
+ save memory but is unsafe for general use.
76
+
77
+ Attributes
78
+ ----------
79
+ location_ : ndarray of shape (n_features,)
80
+ Estimated location, i.e. the estimated mean.
81
+
82
+ covariance_ : ndarray of shape (n_features, n_features)
83
+ Estimated covariance matrix
84
+
85
+ n_samples_seen_ : int
86
+ The number of samples processed by the estimator. Will be reset on
87
+ new calls to ``fit``, but increments across ``partial_fit`` calls.
88
+
89
+ batch_size_ : int
90
+ Inferred batch size from ``batch_size``.
91
+
92
+ n_features_in_ : int
93
+ Number of features seen during ``fit`` or ``partial_fit``.
94
+
95
+ Examples
96
+ --------
97
+ >>> import numpy as np
98
+ >>> from sklearnex.covariance import IncrementalEmpiricalCovariance
99
+ >>> inccov = IncrementalEmpiricalCovariance(batch_size=1)
100
+ >>> X = np.array([[1, 2], [3, 4]])
101
+ >>> inccov.partial_fit(X[:1])
102
+ >>> inccov.partial_fit(X[1:])
103
+ >>> inccov.covariance_
104
+ np.array([[1., 1.],[1., 1.]])
105
+ >>> inccov.location_
106
+ np.array([2., 3.])
107
+ >>> inccov.fit(X)
108
+ >>> inccov.covariance_
109
+ np.array([[1., 1.],[1., 1.]])
110
+ >>> inccov.location_
111
+ np.array([2., 3.])
112
+ """
113
+
114
+ _onedal_incremental_covariance = staticmethod(onedal_IncrementalEmpiricalCovariance)
115
+
116
+ if sklearn_check_version("1.2"):
117
+ _parameter_constraints: dict = {
118
+ "store_precision": ["boolean"],
119
+ "assume_centered": ["boolean"],
120
+ "batch_size": [Interval(numbers.Integral, 1, None, closed="left"), None],
121
+ "copy": ["boolean"],
122
+ }
123
+
124
+ get_precision = _sklearn_EmpiricalCovariance.get_precision
125
+ error_norm = wrap_output_data(_sklearn_EmpiricalCovariance.error_norm)
126
+
127
+ def __init__(
128
+ self, *, store_precision=False, assume_centered=False, batch_size=None, copy=True
129
+ ):
130
+ self.assume_centered = assume_centered
131
+ self.store_precision = store_precision
132
+ self.batch_size = batch_size
133
+ self.copy = copy
134
+
135
+ def _onedal_supported(self, method_name, *data):
136
+ patching_status = PatchingConditionsChain(
137
+ f"sklearn.covariance.{self.__class__.__name__}.{method_name}"
138
+ )
139
+ return patching_status
140
+
141
+ def _onedal_finalize_fit(self, queue=None):
142
+ assert hasattr(self, "_onedal_estimator")
143
+ self._onedal_estimator.finalize_fit(queue=queue)
144
+ self._need_to_finalize = False
145
+
146
+ if not daal_check_version((2024, "P", 400)) and self.assume_centered:
147
+ location = self._onedal_estimator.location_[None, :]
148
+ self._onedal_estimator.covariance_ += np.dot(location.T, location)
149
+ self._onedal_estimator.location_ = np.zeros_like(np.squeeze(location))
150
+ if self.store_precision:
151
+ self.precision_ = linalg.pinvh(
152
+ self._onedal_estimator.covariance_, check_finite=False
153
+ )
154
+ else:
155
+ self.precision_ = None
156
+
157
+ @property
158
+ def covariance_(self):
159
+ if hasattr(self, "_onedal_estimator"):
160
+ if self._need_to_finalize:
161
+ self._onedal_finalize_fit()
162
+ return self._onedal_estimator.covariance_
163
+ else:
164
+ raise AttributeError(
165
+ f"'{self.__class__.__name__}' object has no attribute 'covariance_'"
166
+ )
167
+
168
+ @property
169
+ def location_(self):
170
+ if hasattr(self, "_onedal_estimator"):
171
+ if self._need_to_finalize:
172
+ self._onedal_finalize_fit()
173
+ return self._onedal_estimator.location_
174
+ else:
175
+ raise AttributeError(
176
+ f"'{self.__class__.__name__}' object has no attribute 'location_'"
177
+ )
178
+
179
+ def _onedal_partial_fit(self, X, queue=None, check_input=True):
180
+
181
+ first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0
182
+
183
+ # finite check occurs on onedal side
184
+ if check_input:
185
+ if sklearn_check_version("1.2"):
186
+ self._validate_params()
187
+
188
+ if sklearn_check_version("1.0"):
189
+ X = validate_data(
190
+ self,
191
+ X,
192
+ dtype=[np.float64, np.float32],
193
+ reset=first_pass,
194
+ copy=self.copy,
195
+ force_all_finite=False,
196
+ )
197
+ else:
198
+ X = check_array(
199
+ X,
200
+ dtype=[np.float64, np.float32],
201
+ copy=self.copy,
202
+ force_all_finite=False,
203
+ )
204
+
205
+ onedal_params = {
206
+ "method": "dense",
207
+ "bias": True,
208
+ "assume_centered": self.assume_centered,
209
+ }
210
+ if not hasattr(self, "_onedal_estimator"):
211
+ self._onedal_estimator = self._onedal_incremental_covariance(**onedal_params)
212
+ try:
213
+ if first_pass:
214
+ self.n_samples_seen_ = X.shape[0]
215
+ self.n_features_in_ = X.shape[1]
216
+ else:
217
+ self.n_samples_seen_ += X.shape[0]
218
+
219
+ self._onedal_estimator.partial_fit(X, queue=queue)
220
+ finally:
221
+ self._need_to_finalize = True
222
+
223
+ return self
224
+
225
+ @wrap_output_data
226
+ def score(self, X_test, y=None):
227
+ xp, _ = get_namespace(X_test)
228
+
229
+ check_is_fitted(self)
230
+ location = self.location_
231
+ if sklearn_check_version("1.0"):
232
+ X = validate_data(
233
+ self,
234
+ X_test,
235
+ dtype=[np.float64, np.float32],
236
+ reset=False,
237
+ )
238
+ else:
239
+ X = check_array(
240
+ X_test,
241
+ dtype=[np.float64, np.float32],
242
+ )
243
+
244
+ if "numpy" not in xp.__name__:
245
+ location = xp.asarray(location, device=X_test.device)
246
+ # depending on the sklearn version, check_array
247
+ # and validate_data will return only numpy arrays
248
+ # which will break dpnp/dpctl support. If the
249
+ # array namespace isn't from numpy and the data
250
+ # is now a numpy array, it has been validated and
251
+ # the original can be used.
252
+ if isinstance(X, np.ndarray):
253
+ X = X_test
254
+
255
+ est = clone(self)
256
+ est.set_params(**{"assume_centered": True})
257
+
258
+ # test_cov is a numpy array, but calculated on device
259
+ test_cov = est.fit(X - location).covariance_
260
+ res = log_likelihood(test_cov, self.get_precision())
261
+
262
+ return res
263
+
264
+ def partial_fit(self, X, y=None, check_input=True):
265
+ """
266
+ Incremental fit with X. All of X is processed as a single batch.
267
+
268
+ Parameters
269
+ ----------
270
+ X : array-like of shape (n_samples, n_features)
271
+ Training data, where `n_samples` is the number of samples and
272
+ `n_features` is the number of features.
273
+
274
+ y : Ignored
275
+ Not used, present for API consistency by convention.
276
+
277
+ check_input : bool, default=True
278
+ Run check_array on X.
279
+
280
+ Returns
281
+ -------
282
+ self : object
283
+ Returns the instance itself.
284
+ """
285
+ return dispatch(
286
+ self,
287
+ "partial_fit",
288
+ {
289
+ "onedal": self.__class__._onedal_partial_fit,
290
+ "sklearn": None,
291
+ },
292
+ X,
293
+ check_input=check_input,
294
+ )
295
+
296
+ def fit(self, X, y=None):
297
+ """
298
+ Fit the model with X, using minibatches of size batch_size.
299
+
300
+ Parameters
301
+ ----------
302
+ X : array-like of shape (n_samples, n_features)
303
+ Training data, where `n_samples` is the number of samples and
304
+ `n_features` is the number of features.
305
+
306
+ y : Ignored
307
+ Not used, present for API consistency by convention.
308
+
309
+ Returns
310
+ -------
311
+ self : object
312
+ Returns the instance itself.
313
+ """
314
+
315
+ return dispatch(
316
+ self,
317
+ "fit",
318
+ {
319
+ "onedal": self.__class__._onedal_fit,
320
+ "sklearn": None,
321
+ },
322
+ X,
323
+ )
324
+
325
+ def _onedal_fit(self, X, queue=None):
326
+ self.n_samples_seen_ = 0
327
+ if hasattr(self, "_onedal_estimator"):
328
+ self._onedal_estimator._reset()
329
+
330
+ if sklearn_check_version("1.2"):
331
+ self._validate_params()
332
+
333
+ # finite check occurs on onedal side
334
+ if sklearn_check_version("1.0"):
335
+ X = validate_data(
336
+ self,
337
+ X,
338
+ dtype=[np.float64, np.float32],
339
+ copy=self.copy,
340
+ force_all_finite=False,
341
+ )
342
+ else:
343
+ X = check_array(
344
+ X, dtype=[np.float64, np.float32], copy=self.copy, force_all_finite=False
345
+ )
346
+ self.n_features_in_ = X.shape[1]
347
+
348
+ self.batch_size_ = self.batch_size if self.batch_size else 5 * self.n_features_in_
349
+
350
+ if X.shape[0] == 1:
351
+ warnings.warn(
352
+ "Only one sample available. You may want to reshape your data array"
353
+ )
354
+
355
+ for batch in gen_batches(X.shape[0], self.batch_size_):
356
+ X_batch = X[batch]
357
+ self._onedal_partial_fit(X_batch, queue=queue, check_input=False)
358
+
359
+ self._onedal_finalize_fit(queue=queue)
360
+
361
+ return self
362
+
363
+ # expose sklearnex pairwise_distances if mahalanobis distance eventually supported
364
+ def mahalanobis(self, X):
365
+ if sklearn_check_version("1.0"):
366
+ self._check_feature_names(X, reset=False)
367
+
368
+ xp, _ = get_namespace(X)
369
+ precision = self.get_precision()
370
+ # compute mahalanobis distances
371
+ # pairwise_distances will check n_features (via n_feature matching with
372
+ # self.location_) , and will check for finiteness via check array
373
+ # check_feature_names will match _validate_data functionally
374
+ location = self.location_[np.newaxis, :]
375
+ if "numpy" not in xp.__name__:
376
+ # Guarantee that inputs to pairwise_distances match in type and location
377
+ location = xp.asarray(location, device=X.device)
378
+
379
+ try:
380
+ dist = pairwise_distances(X, location, metric="mahalanobis", VI=precision)
381
+ except ValueError as e:
382
+ # Throw the expected sklearn error in an n_feature length violation
383
+ if "Incompatible dimension for X and Y matrices: X.shape[1] ==" in str(e):
384
+ raise ValueError(
385
+ f"X has {_num_features(X)} features, but {self.__class__.__name__} "
386
+ f"is expecting {self.n_features_in_} features as input."
387
+ )
388
+ else:
389
+ raise e
390
+
391
+ return (xp.reshape(dist, (-1,))) ** 2
392
+
393
+ _onedal_cpu_supported = _onedal_supported
394
+ _onedal_gpu_supported = _onedal_supported
395
+
396
+ mahalanobis.__doc__ = _sklearn_EmpiricalCovariance.mahalanobis.__doc__
397
+ error_norm.__doc__ = _sklearn_EmpiricalCovariance.error_norm.__doc__
398
+ score.__doc__ = _sklearn_EmpiricalCovariance.score.__doc__
@@ -0,0 +1,237 @@
1
+ # ===============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ from os import environ
18
+
19
+ from daal4py.sklearn._utils import sklearn_check_version
20
+
21
+ # sklearn requires manual enabling of Scipy array API support
22
+ # if `array-api-compat` package is present in environment
23
+ # TODO: create generic approach to handle this for all tests
24
+ if sklearn_check_version("1.6"):
25
+ environ["SCIPY_ARRAY_API"] = "1"
26
+
27
+
28
+ import numpy as np
29
+ import pytest
30
+ from numpy.linalg import slogdet
31
+ from numpy.testing import assert_allclose
32
+ from scipy.linalg import pinvh
33
+ from sklearn.covariance.tests.test_covariance import (
34
+ test_covariance,
35
+ test_EmpiricalCovariance_validates_mahalanobis,
36
+ )
37
+ from sklearn.datasets import load_diabetes
38
+ from sklearn.decomposition import PCA
39
+
40
+ from daal4py.sklearn._utils import daal_check_version
41
+ from onedal.tests.utils._dataframes_support import (
42
+ _as_numpy,
43
+ _convert_to_dataframe,
44
+ get_dataframes_and_queues,
45
+ )
46
+
47
+
48
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
49
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
50
+ @pytest.mark.parametrize("assume_centered", [True, False])
51
+ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype, assume_centered):
52
+ is_gpu = queue is not None and queue.sycl_device.is_gpu
53
+ if assume_centered and is_gpu and not daal_check_version((2025, "P", 0)):
54
+ pytest.skip(
55
+ "Due to a bug on oneDAL side, means are not set to zero when assume_centered=True"
56
+ )
57
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
58
+
59
+ X = np.array([[0, 1], [0, 1]])
60
+ X = X.astype(dtype)
61
+ X_split = np.array_split(X, 2)
62
+ inccov = IncrementalEmpiricalCovariance(assume_centered=assume_centered)
63
+
64
+ for i in range(2):
65
+ X_split_df = _convert_to_dataframe(
66
+ X_split[i], sycl_queue=queue, target_df=dataframe
67
+ )
68
+ result = inccov.partial_fit(X_split_df)
69
+
70
+ if assume_centered:
71
+ expected_covariance = np.array([[0, 0], [0, 1]])
72
+ expected_means = np.array([0, 0])
73
+ else:
74
+ expected_covariance = np.array([[0, 0], [0, 0]])
75
+ expected_means = np.array([0, 1])
76
+
77
+ assert_allclose(expected_covariance, result.covariance_)
78
+ assert_allclose(expected_means, result.location_)
79
+
80
+ X = np.array([[1, 2], [3, 6]])
81
+ X = X.astype(dtype)
82
+ X_split = np.array_split(X, 2)
83
+ inccov = IncrementalEmpiricalCovariance(assume_centered=assume_centered)
84
+
85
+ for i in range(2):
86
+ X_split_df = _convert_to_dataframe(
87
+ X_split[i], sycl_queue=queue, target_df=dataframe
88
+ )
89
+ result = inccov.partial_fit(X_split_df)
90
+
91
+ if assume_centered:
92
+ expected_covariance = np.array([[5, 10], [10, 20]])
93
+ expected_means = np.array([0, 0])
94
+ else:
95
+ expected_covariance = np.array([[1, 2], [2, 4]])
96
+ expected_means = np.array([2, 4])
97
+
98
+ assert_allclose(expected_covariance, result.covariance_)
99
+ assert_allclose(expected_means, result.location_)
100
+
101
+
102
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
103
+ @pytest.mark.parametrize("batch_size", [2, 4])
104
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
105
+ def test_sklearnex_fit_on_gold_data(dataframe, queue, batch_size, dtype):
106
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
107
+
108
+ X = np.array([[0, 1, 2, 3], [0, -1, -2, -3], [0, 1, 2, 3], [0, 1, 2, 3]])
109
+ X = X.astype(dtype)
110
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
111
+ inccov = IncrementalEmpiricalCovariance(batch_size=batch_size)
112
+
113
+ result = inccov.fit(X_df)
114
+
115
+ expected_covariance = np.array(
116
+ [[0, 0, 0, 0], [0, 0.75, 1.5, 2.25], [0, 1.5, 3, 4.5], [0, 2.25, 4.5, 6.75]]
117
+ )
118
+ expected_means = np.array([0, 0.5, 1, 1.5])
119
+
120
+ assert_allclose(expected_covariance, result.covariance_)
121
+ assert_allclose(expected_means, result.location_)
122
+
123
+
124
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
125
+ @pytest.mark.parametrize("num_batches", [2, 10])
126
+ @pytest.mark.parametrize("row_count", [100, 1000])
127
+ @pytest.mark.parametrize("column_count", [10, 100])
128
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
129
+ def test_sklearnex_partial_fit_on_random_data(
130
+ dataframe, queue, num_batches, row_count, column_count, dtype
131
+ ):
132
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
133
+
134
+ seed = 77
135
+ gen = np.random.default_rng(seed)
136
+ X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
137
+ X = X.astype(dtype)
138
+ X_split = np.array_split(X, num_batches)
139
+ inccov = IncrementalEmpiricalCovariance()
140
+
141
+ for i in range(num_batches):
142
+ X_split_df = _convert_to_dataframe(
143
+ X_split[i], sycl_queue=queue, target_df=dataframe
144
+ )
145
+ result = inccov.partial_fit(X_split_df)
146
+
147
+ expected_covariance = np.cov(X.T, bias=1)
148
+ expected_means = np.mean(X, axis=0)
149
+
150
+ assert_allclose(expected_covariance, result.covariance_, atol=1e-6)
151
+ assert_allclose(expected_means, result.location_, atol=1e-6)
152
+
153
+
154
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
155
+ @pytest.mark.parametrize("num_batches", [2, 10])
156
+ @pytest.mark.parametrize("row_count", [100, 1000])
157
+ @pytest.mark.parametrize("column_count", [10, 100])
158
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
159
+ @pytest.mark.parametrize("assume_centered", [True, False])
160
+ def test_sklearnex_fit_on_random_data(
161
+ dataframe, queue, num_batches, row_count, column_count, dtype, assume_centered
162
+ ):
163
+ is_gpu = queue is not None and queue.sycl_device.is_gpu
164
+ if assume_centered and is_gpu and not daal_check_version((2025, "P", 0)):
165
+ pytest.skip(
166
+ "Due to a bug on oneDAL side, means are not set to zero when assume_centered=True"
167
+ )
168
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
169
+
170
+ seed = 77
171
+ gen = np.random.default_rng(seed)
172
+ X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
173
+ X = X.astype(dtype)
174
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
175
+ batch_size = row_count // num_batches
176
+ inccov = IncrementalEmpiricalCovariance(
177
+ batch_size=batch_size, assume_centered=assume_centered
178
+ )
179
+
180
+ result = inccov.fit(X_df)
181
+
182
+ if assume_centered:
183
+ expected_covariance = np.dot(X.T, X) / X.shape[0]
184
+ expected_means = np.zeros_like(X[0])
185
+ else:
186
+ expected_covariance = np.cov(X.T, bias=1)
187
+ expected_means = np.mean(X, axis=0)
188
+
189
+ assert_allclose(expected_covariance, result.covariance_, atol=1e-6)
190
+ assert_allclose(expected_means, result.location_, atol=1e-6)
191
+
192
+
193
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
194
+ def test_whitened_toy_score(dataframe, queue):
195
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
196
+
197
+ # Load a sklearn toy dataset with sufficient data
198
+ X, _ = load_diabetes(return_X_y=True)
199
+ n = X.shape[1]
200
+
201
+ # Transform the data into uncorrelated, unity variance components
202
+ X = PCA(whiten=True).fit_transform(X)
203
+
204
+ # change dataframe
205
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
206
+
207
+ # fit data
208
+ est = IncrementalEmpiricalCovariance()
209
+ est.fit(X_df)
210
+ # location_ attribute approximately zero (10,), covariance_ identity (10,10)
211
+
212
+ # The log-likelihood can be calculated simply due to covariance_
213
+ # use of scipy.linalg.pinvh, np.linalg.sloget and np.cov for estimator
214
+ # independence
215
+ expected_result = (
216
+ -(n - slogdet(pinvh(np.cov(X.T, bias=1)))[1] + n * np.log(2 * np.pi)) / 2
217
+ )
218
+ # expected_result = -14.1780602988
219
+ result = _as_numpy(est.score(X_df))
220
+ assert_allclose(expected_result, result, atol=1e-6)
221
+
222
+
223
+ # Monkeypatch IncrementalEmpiricalCovariance into relevant sklearn.covariance tests
224
+ @pytest.mark.allow_sklearn_fallback
225
+ @pytest.mark.parametrize(
226
+ "sklearn_test",
227
+ [
228
+ test_covariance,
229
+ test_EmpiricalCovariance_validates_mahalanobis,
230
+ ],
231
+ )
232
+ def test_IncrementalEmpiricalCovariance_against_sklearn(monkeypatch, sklearn_test):
233
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
234
+
235
+ class_name = ".".join([sklearn_test.__module__, "EmpiricalCovariance"])
236
+ monkeypatch.setattr(class_name, IncrementalEmpiricalCovariance)
237
+ sklearn_test()
@@ -0,0 +1,19 @@
1
+ # ===============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ from .pca import PCA
18
+
19
+ __all__ = ["PCA"]