scikit-learn-intelex 2025.0.0__py310-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (278) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-310-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-310-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +242 -0
  10. daal4py/sklearn/_utils.py +241 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +192 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +318 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +196 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +155 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +87 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +118 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +693 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +53 -0
  61. onedal/_device_offload.py +229 -0
  62. onedal/_onedal_py_dpc.cpython-310-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-310-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-310-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +160 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
  70. onedal/cluster/__init__.py +27 -0
  71. onedal/cluster/dbscan.py +110 -0
  72. onedal/cluster/kmeans.py +560 -0
  73. onedal/cluster/kmeans_init.py +115 -0
  74. onedal/cluster/tests/test_dbscan.py +125 -0
  75. onedal/cluster/tests/test_kmeans.py +88 -0
  76. onedal/cluster/tests/test_kmeans_init.py +93 -0
  77. onedal/common/_base.py +38 -0
  78. onedal/common/_estimator_checks.py +47 -0
  79. onedal/common/_mixin.py +62 -0
  80. onedal/common/_policy.py +59 -0
  81. onedal/common/_spmd_policy.py +30 -0
  82. onedal/common/hyperparameters.py +116 -0
  83. onedal/common/tests/test_policy.py +75 -0
  84. onedal/covariance/__init__.py +20 -0
  85. onedal/covariance/covariance.py +125 -0
  86. onedal/covariance/incremental_covariance.py +146 -0
  87. onedal/covariance/tests/test_covariance.py +50 -0
  88. onedal/covariance/tests/test_incremental_covariance.py +122 -0
  89. onedal/datatypes/__init__.py +19 -0
  90. onedal/datatypes/_data_conversion.py +95 -0
  91. onedal/datatypes/tests/test_data.py +235 -0
  92. onedal/decomposition/__init__.py +20 -0
  93. onedal/decomposition/incremental_pca.py +204 -0
  94. onedal/decomposition/pca.py +186 -0
  95. onedal/decomposition/tests/test_incremental_pca.py +198 -0
  96. onedal/ensemble/__init__.py +29 -0
  97. onedal/ensemble/forest.py +720 -0
  98. onedal/ensemble/tests/test_random_forest.py +97 -0
  99. onedal/linear_model/__init__.py +27 -0
  100. onedal/linear_model/incremental_linear_model.py +258 -0
  101. onedal/linear_model/linear_model.py +329 -0
  102. onedal/linear_model/logistic_regression.py +249 -0
  103. onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
  104. onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
  105. onedal/linear_model/tests/test_linear_regression.py +149 -0
  106. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  107. onedal/linear_model/tests/test_ridge.py +95 -0
  108. onedal/neighbors/__init__.py +19 -0
  109. onedal/neighbors/neighbors.py +778 -0
  110. onedal/neighbors/tests/test_knn_classification.py +49 -0
  111. onedal/primitives/__init__.py +27 -0
  112. onedal/primitives/get_tree.py +25 -0
  113. onedal/primitives/kernel_functions.py +153 -0
  114. onedal/primitives/tests/test_kernel_functions.py +159 -0
  115. onedal/spmd/__init__.py +25 -0
  116. onedal/spmd/_base.py +30 -0
  117. onedal/spmd/basic_statistics/__init__.py +20 -0
  118. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  119. onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
  120. onedal/spmd/cluster/__init__.py +28 -0
  121. onedal/spmd/cluster/dbscan.py +23 -0
  122. onedal/spmd/cluster/kmeans.py +56 -0
  123. onedal/spmd/covariance/__init__.py +20 -0
  124. onedal/spmd/covariance/covariance.py +26 -0
  125. onedal/spmd/covariance/incremental_covariance.py +82 -0
  126. onedal/spmd/decomposition/__init__.py +20 -0
  127. onedal/spmd/decomposition/incremental_pca.py +117 -0
  128. onedal/spmd/decomposition/pca.py +26 -0
  129. onedal/spmd/ensemble/__init__.py +19 -0
  130. onedal/spmd/ensemble/forest.py +28 -0
  131. onedal/spmd/linear_model/__init__.py +21 -0
  132. onedal/spmd/linear_model/incremental_linear_model.py +97 -0
  133. onedal/spmd/linear_model/linear_model.py +30 -0
  134. onedal/spmd/linear_model/logistic_regression.py +38 -0
  135. onedal/spmd/neighbors/__init__.py +19 -0
  136. onedal/spmd/neighbors/neighbors.py +75 -0
  137. onedal/svm/__init__.py +19 -0
  138. onedal/svm/svm.py +556 -0
  139. onedal/svm/tests/test_csr_svm.py +351 -0
  140. onedal/svm/tests/test_nusvc.py +204 -0
  141. onedal/svm/tests/test_nusvr.py +210 -0
  142. onedal/svm/tests/test_svc.py +168 -0
  143. onedal/svm/tests/test_svr.py +243 -0
  144. onedal/tests/test_common.py +41 -0
  145. onedal/tests/utils/_dataframes_support.py +168 -0
  146. onedal/tests/utils/_device_selection.py +107 -0
  147. onedal/utils/__init__.py +49 -0
  148. onedal/utils/_array_api.py +91 -0
  149. onedal/utils/validation.py +432 -0
  150. scikit_learn_intelex-2025.0.0.dist-info/LICENSE.txt +202 -0
  151. scikit_learn_intelex-2025.0.0.dist-info/METADATA +231 -0
  152. scikit_learn_intelex-2025.0.0.dist-info/RECORD +278 -0
  153. scikit_learn_intelex-2025.0.0.dist-info/WHEEL +5 -0
  154. scikit_learn_intelex-2025.0.0.dist-info/top_level.txt +3 -0
  155. sklearnex/__init__.py +65 -0
  156. sklearnex/__main__.py +58 -0
  157. sklearnex/_config.py +98 -0
  158. sklearnex/_device_offload.py +121 -0
  159. sklearnex/_utils.py +109 -0
  160. sklearnex/basic_statistics/__init__.py +20 -0
  161. sklearnex/basic_statistics/basic_statistics.py +140 -0
  162. sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
  163. sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
  164. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +384 -0
  165. sklearnex/cluster/__init__.py +20 -0
  166. sklearnex/cluster/dbscan.py +192 -0
  167. sklearnex/cluster/k_means.py +383 -0
  168. sklearnex/cluster/tests/test_dbscan.py +38 -0
  169. sklearnex/cluster/tests/test_kmeans.py +153 -0
  170. sklearnex/conftest.py +73 -0
  171. sklearnex/covariance/__init__.py +19 -0
  172. sklearnex/covariance/incremental_covariance.py +368 -0
  173. sklearnex/covariance/tests/test_incremental_covariance.py +226 -0
  174. sklearnex/decomposition/__init__.py +19 -0
  175. sklearnex/decomposition/pca.py +414 -0
  176. sklearnex/decomposition/tests/test_pca.py +58 -0
  177. sklearnex/dispatcher.py +543 -0
  178. sklearnex/doc/third-party-programs.txt +424 -0
  179. sklearnex/ensemble/__init__.py +29 -0
  180. sklearnex/ensemble/_forest.py +2016 -0
  181. sklearnex/ensemble/tests/test_forest.py +120 -0
  182. sklearnex/glob/__main__.py +72 -0
  183. sklearnex/glob/dispatcher.py +101 -0
  184. sklearnex/linear_model/__init__.py +32 -0
  185. sklearnex/linear_model/coordinate_descent.py +30 -0
  186. sklearnex/linear_model/incremental_linear.py +463 -0
  187. sklearnex/linear_model/incremental_ridge.py +418 -0
  188. sklearnex/linear_model/linear.py +302 -0
  189. sklearnex/linear_model/logistic_path.py +17 -0
  190. sklearnex/linear_model/logistic_regression.py +403 -0
  191. sklearnex/linear_model/ridge.py +24 -0
  192. sklearnex/linear_model/tests/test_incremental_linear.py +203 -0
  193. sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
  194. sklearnex/linear_model/tests/test_linear.py +142 -0
  195. sklearnex/linear_model/tests/test_logreg.py +134 -0
  196. sklearnex/manifold/__init__.py +19 -0
  197. sklearnex/manifold/t_sne.py +21 -0
  198. sklearnex/manifold/tests/test_tsne.py +26 -0
  199. sklearnex/metrics/__init__.py +23 -0
  200. sklearnex/metrics/pairwise.py +22 -0
  201. sklearnex/metrics/ranking.py +20 -0
  202. sklearnex/metrics/tests/test_metrics.py +39 -0
  203. sklearnex/model_selection/__init__.py +21 -0
  204. sklearnex/model_selection/split.py +22 -0
  205. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  206. sklearnex/neighbors/__init__.py +27 -0
  207. sklearnex/neighbors/_lof.py +231 -0
  208. sklearnex/neighbors/common.py +310 -0
  209. sklearnex/neighbors/knn_classification.py +226 -0
  210. sklearnex/neighbors/knn_regression.py +203 -0
  211. sklearnex/neighbors/knn_unsupervised.py +170 -0
  212. sklearnex/neighbors/tests/test_neighbors.py +80 -0
  213. sklearnex/preview/__init__.py +17 -0
  214. sklearnex/preview/covariance/__init__.py +19 -0
  215. sklearnex/preview/covariance/covariance.py +133 -0
  216. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  217. sklearnex/preview/decomposition/__init__.py +19 -0
  218. sklearnex/preview/decomposition/incremental_pca.py +228 -0
  219. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  220. sklearnex/preview/linear_model/__init__.py +19 -0
  221. sklearnex/preview/linear_model/ridge.py +419 -0
  222. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  223. sklearnex/spmd/__init__.py +25 -0
  224. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  225. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  226. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  227. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  228. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
  229. sklearnex/spmd/cluster/__init__.py +30 -0
  230. sklearnex/spmd/cluster/dbscan.py +50 -0
  231. sklearnex/spmd/cluster/kmeans.py +21 -0
  232. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  233. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  234. sklearnex/spmd/covariance/__init__.py +20 -0
  235. sklearnex/spmd/covariance/covariance.py +21 -0
  236. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  237. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  238. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  239. sklearnex/spmd/decomposition/__init__.py +20 -0
  240. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  241. sklearnex/spmd/decomposition/pca.py +21 -0
  242. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  243. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  244. sklearnex/spmd/ensemble/__init__.py +19 -0
  245. sklearnex/spmd/ensemble/forest.py +71 -0
  246. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  247. sklearnex/spmd/linear_model/__init__.py +21 -0
  248. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  249. sklearnex/spmd/linear_model/linear_model.py +21 -0
  250. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  251. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
  252. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  253. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +166 -0
  254. sklearnex/spmd/neighbors/__init__.py +19 -0
  255. sklearnex/spmd/neighbors/neighbors.py +25 -0
  256. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  257. sklearnex/svm/__init__.py +29 -0
  258. sklearnex/svm/_common.py +328 -0
  259. sklearnex/svm/nusvc.py +332 -0
  260. sklearnex/svm/nusvr.py +148 -0
  261. sklearnex/svm/svc.py +360 -0
  262. sklearnex/svm/svr.py +149 -0
  263. sklearnex/svm/tests/test_svm.py +93 -0
  264. sklearnex/tests/_utils.py +328 -0
  265. sklearnex/tests/_utils_spmd.py +198 -0
  266. sklearnex/tests/test_common.py +54 -0
  267. sklearnex/tests/test_config.py +43 -0
  268. sklearnex/tests/test_memory_usage.py +291 -0
  269. sklearnex/tests/test_monkeypatch.py +276 -0
  270. sklearnex/tests/test_n_jobs_support.py +103 -0
  271. sklearnex/tests/test_parallel.py +48 -0
  272. sklearnex/tests/test_patching.py +385 -0
  273. sklearnex/tests/test_run_to_run_stability.py +296 -0
  274. sklearnex/utils/__init__.py +19 -0
  275. sklearnex/utils/_array_api.py +82 -0
  276. sklearnex/utils/parallel.py +59 -0
  277. sklearnex/utils/tests/test_finite.py +89 -0
  278. sklearnex/utils/validation.py +17 -0
@@ -0,0 +1,368 @@
1
+ # ===============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import numbers
18
+ import warnings
19
+
20
+ import numpy as np
21
+ from scipy import linalg
22
+ from sklearn.base import BaseEstimator, clone
23
+ from sklearn.covariance import EmpiricalCovariance as sklearn_EmpiricalCovariance
24
+ from sklearn.covariance import log_likelihood
25
+ from sklearn.utils import check_array, gen_batches
26
+ from sklearn.utils.validation import _num_features
27
+
28
+ from daal4py.sklearn._n_jobs_support import control_n_jobs
29
+ from daal4py.sklearn._utils import daal_check_version, sklearn_check_version
30
+ from onedal.covariance import (
31
+ IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance,
32
+ )
33
+ from sklearnex import config_context
34
+
35
+ from .._device_offload import dispatch, wrap_output_data
36
+ from .._utils import PatchingConditionsChain, register_hyperparameters
37
+ from ..metrics import pairwise_distances
38
+ from ..utils._array_api import get_namespace
39
+
40
+ if sklearn_check_version("1.2"):
41
+ from sklearn.utils._param_validation import Interval
42
+
43
+
44
+ @control_n_jobs(decorated_methods=["partial_fit", "fit", "_onedal_finalize_fit"])
45
+ class IncrementalEmpiricalCovariance(BaseEstimator):
46
+ """
47
+ Incremental estimator for covariance.
48
+ Allows to compute empirical covariance estimated by maximum
49
+ likelihood method if data are splitted into batches.
50
+
51
+ Parameters
52
+ ----------
53
+ store_precision : bool, default=False
54
+ Specifies if the estimated precision is stored.
55
+
56
+ assume_centered : bool, default=False
57
+ If True, data are not centered before computation.
58
+ Useful when working with data whose mean is almost, but not exactly
59
+ zero.
60
+ If False (default), data are centered before computation.
61
+
62
+ batch_size : int, default=None
63
+ The number of samples to use for each batch. Only used when calling
64
+ ``fit``. If ``batch_size`` is ``None``, then ``batch_size``
65
+ is inferred from the data and set to ``5 * n_features``, to provide a
66
+ balance between approximation accuracy and memory consumption.
67
+
68
+ copy : bool, default=True
69
+ If False, X will be overwritten. ``copy=False`` can be used to
70
+ save memory but is unsafe for general use.
71
+
72
+ Attributes
73
+ ----------
74
+ location_ : ndarray of shape (n_features,)
75
+ Estimated location, i.e. the estimated mean.
76
+
77
+ covariance_ : ndarray of shape (n_features, n_features)
78
+ Estimated covariance matrix
79
+
80
+ n_samples_seen_ : int
81
+ The number of samples processed by the estimator. Will be reset on
82
+ new calls to fit, but increments across ``partial_fit`` calls.
83
+
84
+ batch_size_ : int
85
+ Inferred batch size from ``batch_size``.
86
+
87
+ n_features_in_ : int
88
+ Number of features seen during :term:`fit` `partial_fit`.
89
+ """
90
+
91
+ _onedal_incremental_covariance = staticmethod(onedal_IncrementalEmpiricalCovariance)
92
+
93
+ if sklearn_check_version("1.2"):
94
+ _parameter_constraints: dict = {
95
+ "store_precision": ["boolean"],
96
+ "assume_centered": ["boolean"],
97
+ "batch_size": [Interval(numbers.Integral, 1, None, closed="left"), None],
98
+ "copy": ["boolean"],
99
+ }
100
+
101
+ get_precision = sklearn_EmpiricalCovariance.get_precision
102
+ error_norm = wrap_output_data(sklearn_EmpiricalCovariance.error_norm)
103
+
104
+ def __init__(
105
+ self, *, store_precision=False, assume_centered=False, batch_size=None, copy=True
106
+ ):
107
+ self.assume_centered = assume_centered
108
+ self.store_precision = store_precision
109
+ self.batch_size = batch_size
110
+ self.copy = copy
111
+
112
+ def _onedal_supported(self, method_name, *data):
113
+ patching_status = PatchingConditionsChain(
114
+ f"sklearn.covariance.{self.__class__.__name__}.{method_name}"
115
+ )
116
+ return patching_status
117
+
118
+ def _onedal_finalize_fit(self, queue=None):
119
+ assert hasattr(self, "_onedal_estimator")
120
+ self._onedal_estimator.finalize_fit(queue=queue)
121
+ self._need_to_finalize = False
122
+
123
+ if not daal_check_version((2024, "P", 400)) and self.assume_centered:
124
+ location = self._onedal_estimator.location_[None, :]
125
+ self._onedal_estimator.covariance_ += np.dot(location.T, location)
126
+ self._onedal_estimator.location_ = np.zeros_like(np.squeeze(location))
127
+ if self.store_precision:
128
+ self.precision_ = linalg.pinvh(
129
+ self._onedal_estimator.covariance_, check_finite=False
130
+ )
131
+ else:
132
+ self.precision_ = None
133
+
134
+ @property
135
+ def covariance_(self):
136
+ if hasattr(self, "_onedal_estimator"):
137
+ if self._need_to_finalize:
138
+ self._onedal_finalize_fit()
139
+ return self._onedal_estimator.covariance_
140
+ else:
141
+ raise AttributeError(
142
+ f"'{self.__class__.__name__}' object has no attribute 'covariance_'"
143
+ )
144
+
145
+ @property
146
+ def location_(self):
147
+ if hasattr(self, "_onedal_estimator"):
148
+ if self._need_to_finalize:
149
+ self._onedal_finalize_fit()
150
+ return self._onedal_estimator.location_
151
+ else:
152
+ raise AttributeError(
153
+ f"'{self.__class__.__name__}' object has no attribute 'location_'"
154
+ )
155
+
156
+ def _onedal_partial_fit(self, X, queue=None, check_input=True):
157
+
158
+ first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0
159
+
160
+ # finite check occurs on onedal side
161
+ if check_input:
162
+ if sklearn_check_version("1.2"):
163
+ self._validate_params()
164
+
165
+ if sklearn_check_version("1.0"):
166
+ X = self._validate_data(
167
+ X,
168
+ dtype=[np.float64, np.float32],
169
+ reset=first_pass,
170
+ copy=self.copy,
171
+ force_all_finite=False,
172
+ )
173
+ else:
174
+ X = check_array(
175
+ X,
176
+ dtype=[np.float64, np.float32],
177
+ copy=self.copy,
178
+ force_all_finite=False,
179
+ )
180
+
181
+ onedal_params = {
182
+ "method": "dense",
183
+ "bias": True,
184
+ "assume_centered": self.assume_centered,
185
+ }
186
+ if not hasattr(self, "_onedal_estimator"):
187
+ self._onedal_estimator = self._onedal_incremental_covariance(**onedal_params)
188
+ try:
189
+ if first_pass:
190
+ self.n_samples_seen_ = X.shape[0]
191
+ self.n_features_in_ = X.shape[1]
192
+ else:
193
+ self.n_samples_seen_ += X.shape[0]
194
+
195
+ self._onedal_estimator.partial_fit(X, queue=queue)
196
+ finally:
197
+ self._need_to_finalize = True
198
+
199
+ return self
200
+
201
+ @wrap_output_data
202
+ def score(self, X_test, y=None):
203
+ xp, _ = get_namespace(X_test)
204
+
205
+ location = self.location_
206
+ if sklearn_check_version("1.0"):
207
+ X = self._validate_data(
208
+ X_test,
209
+ dtype=[np.float64, np.float32],
210
+ reset=False,
211
+ )
212
+ else:
213
+ X = check_array(
214
+ X_test,
215
+ dtype=[np.float64, np.float32],
216
+ )
217
+
218
+ if "numpy" not in xp.__name__:
219
+ location = xp.asarray(location, device=X_test.device)
220
+ # depending on the sklearn version, check_array
221
+ # and validate_data will return only numpy arrays
222
+ # which will break dpnp/dpctl support. If the
223
+ # array namespace isn't from numpy and the data
224
+ # is now a numpy array, it has been validated and
225
+ # the original can be used.
226
+ if isinstance(X, np.ndarray):
227
+ X = X_test
228
+
229
+ est = clone(self)
230
+ est.set_params(**{"assume_centered": True})
231
+
232
+ # test_cov is a numpy array, but calculated on device
233
+ test_cov = est.fit(X - location).covariance_
234
+ res = log_likelihood(test_cov, self.get_precision())
235
+
236
+ return res
237
+
238
+ def partial_fit(self, X, y=None, check_input=True):
239
+ """
240
+ Incremental fit with X. All of X is processed as a single batch.
241
+
242
+ Parameters
243
+ ----------
244
+ X : array-like of shape (n_samples, n_features)
245
+ Training data, where `n_samples` is the number of samples and
246
+ `n_features` is the number of features.
247
+
248
+ y : Ignored
249
+ Not used, present for API consistency by convention.
250
+
251
+ check_input : bool, default=True
252
+ Run check_array on X.
253
+
254
+ Returns
255
+ -------
256
+ self : object
257
+ Returns the instance itself.
258
+ """
259
+ return dispatch(
260
+ self,
261
+ "partial_fit",
262
+ {
263
+ "onedal": self.__class__._onedal_partial_fit,
264
+ "sklearn": None,
265
+ },
266
+ X,
267
+ check_input=check_input,
268
+ )
269
+
270
+ def fit(self, X, y=None):
271
+ """
272
+ Fit the model with X, using minibatches of size batch_size.
273
+
274
+ Parameters
275
+ ----------
276
+ X : array-like of shape (n_samples, n_features)
277
+ Training data, where `n_samples` is the number of samples and
278
+ `n_features` is the number of features.
279
+
280
+ y : Ignored
281
+ Not used, present for API consistency by convention.
282
+
283
+ Returns
284
+ -------
285
+ self : object
286
+ Returns the instance itself.
287
+ """
288
+
289
+ return dispatch(
290
+ self,
291
+ "fit",
292
+ {
293
+ "onedal": self.__class__._onedal_fit,
294
+ "sklearn": None,
295
+ },
296
+ X,
297
+ )
298
+
299
+ def _onedal_fit(self, X, queue=None):
300
+ self.n_samples_seen_ = 0
301
+ if hasattr(self, "_onedal_estimator"):
302
+ self._onedal_estimator._reset()
303
+
304
+ if sklearn_check_version("1.2"):
305
+ self._validate_params()
306
+
307
+ # finite check occurs on onedal side
308
+ if sklearn_check_version("1.0"):
309
+ X = self._validate_data(
310
+ X, dtype=[np.float64, np.float32], copy=self.copy, force_all_finite=False
311
+ )
312
+ else:
313
+ X = check_array(
314
+ X, dtype=[np.float64, np.float32], copy=self.copy, force_all_finite=False
315
+ )
316
+ self.n_features_in_ = X.shape[1]
317
+
318
+ self.batch_size_ = self.batch_size if self.batch_size else 5 * self.n_features_in_
319
+
320
+ if X.shape[0] == 1:
321
+ warnings.warn(
322
+ "Only one sample available. You may want to reshape your data array"
323
+ )
324
+
325
+ for batch in gen_batches(X.shape[0], self.batch_size_):
326
+ X_batch = X[batch]
327
+ self._onedal_partial_fit(X_batch, queue=queue, check_input=False)
328
+
329
+ self._onedal_finalize_fit(queue=queue)
330
+
331
+ return self
332
+
333
+ # expose sklearnex pairwise_distances if mahalanobis distance eventually supported
334
+ def mahalanobis(self, X):
335
+ if sklearn_check_version("1.0"):
336
+ self._check_feature_names(X, reset=False)
337
+
338
+ xp, _ = get_namespace(X)
339
+ precision = self.get_precision()
340
+ # compute mahalanobis distances
341
+ # pairwise_distances will check n_features (via n_feature matching with
342
+ # self.location_) , and will check for finiteness via check array
343
+ # check_feature_names will match _validate_data functionally
344
+ location = self.location_[np.newaxis, :]
345
+ if "numpy" not in xp.__name__:
346
+ # Guarantee that inputs to pairwise_distances match in type and location
347
+ location = xp.asarray(location, device=X.device)
348
+
349
+ try:
350
+ dist = pairwise_distances(X, location, metric="mahalanobis", VI=precision)
351
+ except ValueError as e:
352
+ # Throw the expected sklearn error in an n_feature length violation
353
+ if "Incompatible dimension for X and Y matrices: X.shape[1] ==" in str(e):
354
+ raise ValueError(
355
+ f"X has {_num_features(X)} features, but {self.__class__.__name__} "
356
+ f"is expecting {self.n_features_in_} features as input."
357
+ )
358
+ else:
359
+ raise e
360
+
361
+ return (xp.reshape(dist, (-1,))) ** 2
362
+
363
+ _onedal_cpu_supported = _onedal_supported
364
+ _onedal_gpu_supported = _onedal_supported
365
+
366
+ mahalanobis.__doc__ = sklearn_EmpiricalCovariance.mahalanobis.__doc__
367
+ error_norm.__doc__ = sklearn_EmpiricalCovariance.error_norm.__doc__
368
+ score.__doc__ = sklearn_EmpiricalCovariance.score.__doc__
@@ -0,0 +1,226 @@
1
+ # ===============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.linalg import slogdet
20
+ from numpy.testing import assert_allclose
21
+ from scipy.linalg import pinvh
22
+ from sklearn.covariance.tests.test_covariance import (
23
+ test_covariance,
24
+ test_EmpiricalCovariance_validates_mahalanobis,
25
+ )
26
+ from sklearn.datasets import load_diabetes
27
+ from sklearn.decomposition import PCA
28
+
29
+ from daal4py.sklearn._utils import daal_check_version
30
+ from onedal.tests.utils._dataframes_support import (
31
+ _as_numpy,
32
+ _convert_to_dataframe,
33
+ get_dataframes_and_queues,
34
+ )
35
+
36
+
37
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
38
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
39
+ @pytest.mark.parametrize("assume_centered", [True, False])
40
+ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype, assume_centered):
41
+ is_gpu = queue is not None and queue.sycl_device.is_gpu
42
+ if assume_centered and is_gpu and not daal_check_version((2025, "P", 0)):
43
+ pytest.skip(
44
+ "Due to a bug on oneDAL side, means are not set to zero when assume_centered=True"
45
+ )
46
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
47
+
48
+ X = np.array([[0, 1], [0, 1]])
49
+ X = X.astype(dtype)
50
+ X_split = np.array_split(X, 2)
51
+ inccov = IncrementalEmpiricalCovariance(assume_centered=assume_centered)
52
+
53
+ for i in range(2):
54
+ X_split_df = _convert_to_dataframe(
55
+ X_split[i], sycl_queue=queue, target_df=dataframe
56
+ )
57
+ result = inccov.partial_fit(X_split_df)
58
+
59
+ if assume_centered:
60
+ expected_covariance = np.array([[0, 0], [0, 1]])
61
+ expected_means = np.array([0, 0])
62
+ else:
63
+ expected_covariance = np.array([[0, 0], [0, 0]])
64
+ expected_means = np.array([0, 1])
65
+
66
+ assert_allclose(expected_covariance, result.covariance_)
67
+ assert_allclose(expected_means, result.location_)
68
+
69
+ X = np.array([[1, 2], [3, 6]])
70
+ X = X.astype(dtype)
71
+ X_split = np.array_split(X, 2)
72
+ inccov = IncrementalEmpiricalCovariance(assume_centered=assume_centered)
73
+
74
+ for i in range(2):
75
+ X_split_df = _convert_to_dataframe(
76
+ X_split[i], sycl_queue=queue, target_df=dataframe
77
+ )
78
+ result = inccov.partial_fit(X_split_df)
79
+
80
+ if assume_centered:
81
+ expected_covariance = np.array([[5, 10], [10, 20]])
82
+ expected_means = np.array([0, 0])
83
+ else:
84
+ expected_covariance = np.array([[1, 2], [2, 4]])
85
+ expected_means = np.array([2, 4])
86
+
87
+ assert_allclose(expected_covariance, result.covariance_)
88
+ assert_allclose(expected_means, result.location_)
89
+
90
+
91
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
92
+ @pytest.mark.parametrize("batch_size", [2, 4])
93
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
94
+ def test_sklearnex_fit_on_gold_data(dataframe, queue, batch_size, dtype):
95
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
96
+
97
+ X = np.array([[0, 1, 2, 3], [0, -1, -2, -3], [0, 1, 2, 3], [0, 1, 2, 3]])
98
+ X = X.astype(dtype)
99
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
100
+ inccov = IncrementalEmpiricalCovariance(batch_size=batch_size)
101
+
102
+ result = inccov.fit(X_df)
103
+
104
+ expected_covariance = np.array(
105
+ [[0, 0, 0, 0], [0, 0.75, 1.5, 2.25], [0, 1.5, 3, 4.5], [0, 2.25, 4.5, 6.75]]
106
+ )
107
+ expected_means = np.array([0, 0.5, 1, 1.5])
108
+
109
+ assert_allclose(expected_covariance, result.covariance_)
110
+ assert_allclose(expected_means, result.location_)
111
+
112
+
113
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
114
+ @pytest.mark.parametrize("num_batches", [2, 10])
115
+ @pytest.mark.parametrize("row_count", [100, 1000])
116
+ @pytest.mark.parametrize("column_count", [10, 100])
117
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
118
+ def test_sklearnex_partial_fit_on_random_data(
119
+ dataframe, queue, num_batches, row_count, column_count, dtype
120
+ ):
121
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
122
+
123
+ seed = 77
124
+ gen = np.random.default_rng(seed)
125
+ X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
126
+ X = X.astype(dtype)
127
+ X_split = np.array_split(X, num_batches)
128
+ inccov = IncrementalEmpiricalCovariance()
129
+
130
+ for i in range(num_batches):
131
+ X_split_df = _convert_to_dataframe(
132
+ X_split[i], sycl_queue=queue, target_df=dataframe
133
+ )
134
+ result = inccov.partial_fit(X_split_df)
135
+
136
+ expected_covariance = np.cov(X.T, bias=1)
137
+ expected_means = np.mean(X, axis=0)
138
+
139
+ assert_allclose(expected_covariance, result.covariance_, atol=1e-6)
140
+ assert_allclose(expected_means, result.location_, atol=1e-6)
141
+
142
+
143
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
144
+ @pytest.mark.parametrize("num_batches", [2, 10])
145
+ @pytest.mark.parametrize("row_count", [100, 1000])
146
+ @pytest.mark.parametrize("column_count", [10, 100])
147
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
148
+ @pytest.mark.parametrize("assume_centered", [True, False])
149
+ def test_sklearnex_fit_on_random_data(
150
+ dataframe, queue, num_batches, row_count, column_count, dtype, assume_centered
151
+ ):
152
+ is_gpu = queue is not None and queue.sycl_device.is_gpu
153
+ if assume_centered and is_gpu and not daal_check_version((2025, "P", 0)):
154
+ pytest.skip(
155
+ "Due to a bug on oneDAL side, means are not set to zero when assume_centered=True"
156
+ )
157
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
158
+
159
+ seed = 77
160
+ gen = np.random.default_rng(seed)
161
+ X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
162
+ X = X.astype(dtype)
163
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
164
+ batch_size = row_count // num_batches
165
+ inccov = IncrementalEmpiricalCovariance(
166
+ batch_size=batch_size, assume_centered=assume_centered
167
+ )
168
+
169
+ result = inccov.fit(X_df)
170
+
171
+ if assume_centered:
172
+ expected_covariance = np.dot(X.T, X) / X.shape[0]
173
+ expected_means = np.zeros_like(X[0])
174
+ else:
175
+ expected_covariance = np.cov(X.T, bias=1)
176
+ expected_means = np.mean(X, axis=0)
177
+
178
+ assert_allclose(expected_covariance, result.covariance_, atol=1e-6)
179
+ assert_allclose(expected_means, result.location_, atol=1e-6)
180
+
181
+
182
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
183
+ def test_whitened_toy_score(dataframe, queue):
184
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
185
+
186
+ # Load a sklearn toy dataset with sufficient data
187
+ X, _ = load_diabetes(return_X_y=True)
188
+ n = X.shape[1]
189
+
190
+ # Transform the data into uncorrelated, unity variance components
191
+ X = PCA(whiten=True).fit_transform(X)
192
+
193
+ # change dataframe
194
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
195
+
196
+ # fit data
197
+ est = IncrementalEmpiricalCovariance()
198
+ est.fit(X_df)
199
+ # location_ attribute approximately zero (10,), covariance_ identity (10,10)
200
+
201
+ # The log-likelihood can be calculated simply due to covariance_
202
+ # use of scipy.linalg.pinvh, np.linalg.sloget and np.cov for estimator
203
+ # independence
204
+ expected_result = (
205
+ -(n - slogdet(pinvh(np.cov(X.T, bias=1)))[1] + n * np.log(2 * np.pi)) / 2
206
+ )
207
+ # expected_result = -14.1780602988
208
+ result = _as_numpy(est.score(X_df))
209
+ assert_allclose(expected_result, result, atol=1e-6)
210
+
211
+
212
+ # Monkeypatch IncrementalEmpiricalCovariance into relevant sklearn.covariance tests
213
+ @pytest.mark.allow_sklearn_fallback
214
+ @pytest.mark.parametrize(
215
+ "sklearn_test",
216
+ [
217
+ test_covariance,
218
+ test_EmpiricalCovariance_validates_mahalanobis,
219
+ ],
220
+ )
221
+ def test_IncrementalEmpiricalCovariance_against_sklearn(monkeypatch, sklearn_test):
222
+ from sklearnex.covariance import IncrementalEmpiricalCovariance
223
+
224
+ class_name = ".".join([sklearn_test.__module__, "EmpiricalCovariance"])
225
+ monkeypatch.setattr(class_name, IncrementalEmpiricalCovariance)
226
+ sklearn_test()
@@ -0,0 +1,19 @@
1
+ # ===============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ from .pca import PCA
18
+
19
+ __all__ = ["PCA"]