scikit-learn-intelex 2025.0.0__py39-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (278) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-39-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-39-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +242 -0
  10. daal4py/sklearn/_utils.py +241 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +192 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +318 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +196 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +155 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +87 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +118 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +693 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +53 -0
  61. onedal/_device_offload.py +229 -0
  62. onedal/_onedal_py_dpc.cpython-39-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-39-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-39-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +160 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
  70. onedal/cluster/__init__.py +27 -0
  71. onedal/cluster/dbscan.py +110 -0
  72. onedal/cluster/kmeans.py +560 -0
  73. onedal/cluster/kmeans_init.py +115 -0
  74. onedal/cluster/tests/test_dbscan.py +125 -0
  75. onedal/cluster/tests/test_kmeans.py +88 -0
  76. onedal/cluster/tests/test_kmeans_init.py +93 -0
  77. onedal/common/_base.py +38 -0
  78. onedal/common/_estimator_checks.py +47 -0
  79. onedal/common/_mixin.py +62 -0
  80. onedal/common/_policy.py +59 -0
  81. onedal/common/_spmd_policy.py +30 -0
  82. onedal/common/hyperparameters.py +116 -0
  83. onedal/common/tests/test_policy.py +75 -0
  84. onedal/covariance/__init__.py +20 -0
  85. onedal/covariance/covariance.py +125 -0
  86. onedal/covariance/incremental_covariance.py +146 -0
  87. onedal/covariance/tests/test_covariance.py +50 -0
  88. onedal/covariance/tests/test_incremental_covariance.py +122 -0
  89. onedal/datatypes/__init__.py +19 -0
  90. onedal/datatypes/_data_conversion.py +95 -0
  91. onedal/datatypes/tests/test_data.py +235 -0
  92. onedal/decomposition/__init__.py +20 -0
  93. onedal/decomposition/incremental_pca.py +204 -0
  94. onedal/decomposition/pca.py +186 -0
  95. onedal/decomposition/tests/test_incremental_pca.py +198 -0
  96. onedal/ensemble/__init__.py +29 -0
  97. onedal/ensemble/forest.py +720 -0
  98. onedal/ensemble/tests/test_random_forest.py +97 -0
  99. onedal/linear_model/__init__.py +27 -0
  100. onedal/linear_model/incremental_linear_model.py +258 -0
  101. onedal/linear_model/linear_model.py +329 -0
  102. onedal/linear_model/logistic_regression.py +249 -0
  103. onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
  104. onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
  105. onedal/linear_model/tests/test_linear_regression.py +149 -0
  106. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  107. onedal/linear_model/tests/test_ridge.py +95 -0
  108. onedal/neighbors/__init__.py +19 -0
  109. onedal/neighbors/neighbors.py +778 -0
  110. onedal/neighbors/tests/test_knn_classification.py +49 -0
  111. onedal/primitives/__init__.py +27 -0
  112. onedal/primitives/get_tree.py +25 -0
  113. onedal/primitives/kernel_functions.py +153 -0
  114. onedal/primitives/tests/test_kernel_functions.py +159 -0
  115. onedal/spmd/__init__.py +25 -0
  116. onedal/spmd/_base.py +30 -0
  117. onedal/spmd/basic_statistics/__init__.py +20 -0
  118. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  119. onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
  120. onedal/spmd/cluster/__init__.py +28 -0
  121. onedal/spmd/cluster/dbscan.py +23 -0
  122. onedal/spmd/cluster/kmeans.py +56 -0
  123. onedal/spmd/covariance/__init__.py +20 -0
  124. onedal/spmd/covariance/covariance.py +26 -0
  125. onedal/spmd/covariance/incremental_covariance.py +82 -0
  126. onedal/spmd/decomposition/__init__.py +20 -0
  127. onedal/spmd/decomposition/incremental_pca.py +117 -0
  128. onedal/spmd/decomposition/pca.py +26 -0
  129. onedal/spmd/ensemble/__init__.py +19 -0
  130. onedal/spmd/ensemble/forest.py +28 -0
  131. onedal/spmd/linear_model/__init__.py +21 -0
  132. onedal/spmd/linear_model/incremental_linear_model.py +97 -0
  133. onedal/spmd/linear_model/linear_model.py +30 -0
  134. onedal/spmd/linear_model/logistic_regression.py +38 -0
  135. onedal/spmd/neighbors/__init__.py +19 -0
  136. onedal/spmd/neighbors/neighbors.py +75 -0
  137. onedal/svm/__init__.py +19 -0
  138. onedal/svm/svm.py +556 -0
  139. onedal/svm/tests/test_csr_svm.py +351 -0
  140. onedal/svm/tests/test_nusvc.py +204 -0
  141. onedal/svm/tests/test_nusvr.py +210 -0
  142. onedal/svm/tests/test_svc.py +168 -0
  143. onedal/svm/tests/test_svr.py +243 -0
  144. onedal/tests/test_common.py +41 -0
  145. onedal/tests/utils/_dataframes_support.py +168 -0
  146. onedal/tests/utils/_device_selection.py +107 -0
  147. onedal/utils/__init__.py +49 -0
  148. onedal/utils/_array_api.py +91 -0
  149. onedal/utils/validation.py +432 -0
  150. scikit_learn_intelex-2025.0.0.dist-info/LICENSE.txt +202 -0
  151. scikit_learn_intelex-2025.0.0.dist-info/METADATA +231 -0
  152. scikit_learn_intelex-2025.0.0.dist-info/RECORD +278 -0
  153. scikit_learn_intelex-2025.0.0.dist-info/WHEEL +5 -0
  154. scikit_learn_intelex-2025.0.0.dist-info/top_level.txt +3 -0
  155. sklearnex/__init__.py +65 -0
  156. sklearnex/__main__.py +58 -0
  157. sklearnex/_config.py +98 -0
  158. sklearnex/_device_offload.py +121 -0
  159. sklearnex/_utils.py +109 -0
  160. sklearnex/basic_statistics/__init__.py +20 -0
  161. sklearnex/basic_statistics/basic_statistics.py +140 -0
  162. sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
  163. sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
  164. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +384 -0
  165. sklearnex/cluster/__init__.py +20 -0
  166. sklearnex/cluster/dbscan.py +192 -0
  167. sklearnex/cluster/k_means.py +383 -0
  168. sklearnex/cluster/tests/test_dbscan.py +38 -0
  169. sklearnex/cluster/tests/test_kmeans.py +153 -0
  170. sklearnex/conftest.py +73 -0
  171. sklearnex/covariance/__init__.py +19 -0
  172. sklearnex/covariance/incremental_covariance.py +368 -0
  173. sklearnex/covariance/tests/test_incremental_covariance.py +226 -0
  174. sklearnex/decomposition/__init__.py +19 -0
  175. sklearnex/decomposition/pca.py +414 -0
  176. sklearnex/decomposition/tests/test_pca.py +58 -0
  177. sklearnex/dispatcher.py +543 -0
  178. sklearnex/doc/third-party-programs.txt +424 -0
  179. sklearnex/ensemble/__init__.py +29 -0
  180. sklearnex/ensemble/_forest.py +2016 -0
  181. sklearnex/ensemble/tests/test_forest.py +120 -0
  182. sklearnex/glob/__main__.py +72 -0
  183. sklearnex/glob/dispatcher.py +101 -0
  184. sklearnex/linear_model/__init__.py +32 -0
  185. sklearnex/linear_model/coordinate_descent.py +30 -0
  186. sklearnex/linear_model/incremental_linear.py +463 -0
  187. sklearnex/linear_model/incremental_ridge.py +418 -0
  188. sklearnex/linear_model/linear.py +302 -0
  189. sklearnex/linear_model/logistic_path.py +17 -0
  190. sklearnex/linear_model/logistic_regression.py +403 -0
  191. sklearnex/linear_model/ridge.py +24 -0
  192. sklearnex/linear_model/tests/test_incremental_linear.py +203 -0
  193. sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
  194. sklearnex/linear_model/tests/test_linear.py +142 -0
  195. sklearnex/linear_model/tests/test_logreg.py +134 -0
  196. sklearnex/manifold/__init__.py +19 -0
  197. sklearnex/manifold/t_sne.py +21 -0
  198. sklearnex/manifold/tests/test_tsne.py +26 -0
  199. sklearnex/metrics/__init__.py +23 -0
  200. sklearnex/metrics/pairwise.py +22 -0
  201. sklearnex/metrics/ranking.py +20 -0
  202. sklearnex/metrics/tests/test_metrics.py +39 -0
  203. sklearnex/model_selection/__init__.py +21 -0
  204. sklearnex/model_selection/split.py +22 -0
  205. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  206. sklearnex/neighbors/__init__.py +27 -0
  207. sklearnex/neighbors/_lof.py +231 -0
  208. sklearnex/neighbors/common.py +310 -0
  209. sklearnex/neighbors/knn_classification.py +226 -0
  210. sklearnex/neighbors/knn_regression.py +203 -0
  211. sklearnex/neighbors/knn_unsupervised.py +170 -0
  212. sklearnex/neighbors/tests/test_neighbors.py +80 -0
  213. sklearnex/preview/__init__.py +17 -0
  214. sklearnex/preview/covariance/__init__.py +19 -0
  215. sklearnex/preview/covariance/covariance.py +133 -0
  216. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  217. sklearnex/preview/decomposition/__init__.py +19 -0
  218. sklearnex/preview/decomposition/incremental_pca.py +228 -0
  219. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  220. sklearnex/preview/linear_model/__init__.py +19 -0
  221. sklearnex/preview/linear_model/ridge.py +419 -0
  222. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  223. sklearnex/spmd/__init__.py +25 -0
  224. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  225. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  226. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  227. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  228. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
  229. sklearnex/spmd/cluster/__init__.py +30 -0
  230. sklearnex/spmd/cluster/dbscan.py +50 -0
  231. sklearnex/spmd/cluster/kmeans.py +21 -0
  232. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  233. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  234. sklearnex/spmd/covariance/__init__.py +20 -0
  235. sklearnex/spmd/covariance/covariance.py +21 -0
  236. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  237. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  238. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  239. sklearnex/spmd/decomposition/__init__.py +20 -0
  240. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  241. sklearnex/spmd/decomposition/pca.py +21 -0
  242. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  243. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  244. sklearnex/spmd/ensemble/__init__.py +19 -0
  245. sklearnex/spmd/ensemble/forest.py +71 -0
  246. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  247. sklearnex/spmd/linear_model/__init__.py +21 -0
  248. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  249. sklearnex/spmd/linear_model/linear_model.py +21 -0
  250. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  251. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
  252. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  253. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +166 -0
  254. sklearnex/spmd/neighbors/__init__.py +19 -0
  255. sklearnex/spmd/neighbors/neighbors.py +25 -0
  256. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  257. sklearnex/svm/__init__.py +29 -0
  258. sklearnex/svm/_common.py +328 -0
  259. sklearnex/svm/nusvc.py +332 -0
  260. sklearnex/svm/nusvr.py +148 -0
  261. sklearnex/svm/svc.py +360 -0
  262. sklearnex/svm/svr.py +149 -0
  263. sklearnex/svm/tests/test_svm.py +93 -0
  264. sklearnex/tests/_utils.py +328 -0
  265. sklearnex/tests/_utils_spmd.py +198 -0
  266. sklearnex/tests/test_common.py +54 -0
  267. sklearnex/tests/test_config.py +43 -0
  268. sklearnex/tests/test_memory_usage.py +291 -0
  269. sklearnex/tests/test_monkeypatch.py +276 -0
  270. sklearnex/tests/test_n_jobs_support.py +103 -0
  271. sklearnex/tests/test_parallel.py +48 -0
  272. sklearnex/tests/test_patching.py +385 -0
  273. sklearnex/tests/test_run_to_run_stability.py +296 -0
  274. sklearnex/utils/__init__.py +19 -0
  275. sklearnex/utils/_array_api.py +82 -0
  276. sklearnex/utils/parallel.py +59 -0
  277. sklearnex/utils/tests/test_finite.py +89 -0
  278. sklearnex/utils/validation.py +17 -0
@@ -0,0 +1,34 @@
1
+ # ===============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import numpy as np
18
+ from numpy.testing import assert_allclose
19
+
20
+
21
+ # TODO:
22
+ # add pytest params for checking different dataframe inputs/outputs.
23
+ def test_sklearnex_import_train_test_split():
24
+ from sklearnex.model_selection import train_test_split
25
+
26
+ X = np.arange(100).reshape((10, 10))
27
+ y = np.arange(10)
28
+
29
+ split = train_test_split(X, y, test_size=None, train_size=0.5)
30
+ X_train, X_test, y_train, y_test = split
31
+ assert len(y_test) == len(y_train)
32
+
33
+ assert_allclose(X_train[:, 0], y_train * 10)
34
+ assert_allclose(X_test[:, 0], y_test * 10)
@@ -0,0 +1,27 @@
1
+ # ===============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ from ._lof import LocalOutlierFactor
18
+ from .knn_classification import KNeighborsClassifier
19
+ from .knn_regression import KNeighborsRegressor
20
+ from .knn_unsupervised import NearestNeighbors
21
+
22
+ __all__ = [
23
+ "KNeighborsClassifier",
24
+ "KNeighborsRegressor",
25
+ "LocalOutlierFactor",
26
+ "NearestNeighbors",
27
+ ]
@@ -0,0 +1,231 @@
1
+ # ===============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import warnings
18
+
19
+ import numpy as np
20
+ from sklearn.neighbors import LocalOutlierFactor as sklearn_LocalOutlierFactor
21
+ from sklearn.utils.metaestimators import available_if
22
+ from sklearn.utils.validation import check_is_fitted
23
+
24
+ from daal4py.sklearn._n_jobs_support import control_n_jobs
25
+ from daal4py.sklearn._utils import sklearn_check_version
26
+ from sklearnex._device_offload import dispatch, wrap_output_data
27
+ from sklearnex.neighbors.common import KNeighborsDispatchingBase
28
+ from sklearnex.neighbors.knn_unsupervised import NearestNeighbors
29
+
30
+ from ..utils._array_api import get_namespace
31
+
32
+
33
+ @control_n_jobs(decorated_methods=["fit", "_kneighbors"])
34
+ class LocalOutlierFactor(KNeighborsDispatchingBase, sklearn_LocalOutlierFactor):
35
+ __doc__ = (
36
+ sklearn_LocalOutlierFactor.__doc__
37
+ + "\n NOTE: When X=None, methods kneighbors, kneighbors_graph, and predict will"
38
+ + "\n only output numpy arrays. In that case, the only way to offload to gpu"
39
+ + "\n is to use a global queue (e.g. using config_context)"
40
+ )
41
+ if sklearn_check_version("1.2"):
42
+ _parameter_constraints: dict = {
43
+ **sklearn_LocalOutlierFactor._parameter_constraints
44
+ }
45
+
46
+ # Only certain methods should be taken from knn to prevent code
47
+ # duplication. Inheriting would yield a complicated inheritance
48
+ # structure and violate the sklearn inheritance path.
49
+ _save_attributes = NearestNeighbors._save_attributes
50
+ _onedal_knn_fit = NearestNeighbors._onedal_fit
51
+ _onedal_kneighbors = NearestNeighbors._onedal_kneighbors
52
+
53
+ def _onedal_fit(self, X, y, queue=None):
54
+ if sklearn_check_version("1.2"):
55
+ self._validate_params()
56
+
57
+ self._onedal_knn_fit(X, y, queue)
58
+
59
+ if self.contamination != "auto":
60
+ if not (0.0 < self.contamination <= 0.5):
61
+ raise ValueError(
62
+ "contamination must be in (0, 0.5], " "got: %f" % self.contamination
63
+ )
64
+
65
+ n_samples = self.n_samples_fit_
66
+
67
+ if self.n_neighbors > n_samples:
68
+ warnings.warn(
69
+ "n_neighbors (%s) is greater than the "
70
+ "total number of samples (%s). n_neighbors "
71
+ "will be set to (n_samples - 1) for estimation."
72
+ % (self.n_neighbors, n_samples)
73
+ )
74
+ self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))
75
+
76
+ (
77
+ self._distances_fit_X_,
78
+ _neighbors_indices_fit_X_,
79
+ ) = self._onedal_kneighbors(n_neighbors=self.n_neighbors_, queue=queue)
80
+
81
+ # Sklearn includes a check for float32 at this point which may not be
82
+ # necessary for onedal
83
+
84
+ self._lrd = self._local_reachability_density(
85
+ self._distances_fit_X_, _neighbors_indices_fit_X_
86
+ )
87
+
88
+ # Compute lof score over training samples to define offset_:
89
+ lrd_ratios_array = self._lrd[_neighbors_indices_fit_X_] / self._lrd[:, np.newaxis]
90
+
91
+ self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)
92
+
93
+ if self.contamination == "auto":
94
+ # inliers score around -1 (the higher, the less abnormal).
95
+ self.offset_ = -1.5
96
+ else:
97
+ self.offset_ = np.percentile(
98
+ self.negative_outlier_factor_, 100.0 * self.contamination
99
+ )
100
+
101
+ # adoption of warning for data with duplicated samples from
102
+ # https://github.com/scikit-learn/scikit-learn/pull/28773
103
+ if sklearn_check_version("1.6"):
104
+ if np.min(self.negative_outlier_factor_) < -1e7 and not self.novelty:
105
+ warnings.warn(
106
+ "Duplicate values are leading to incorrect results. "
107
+ "Increase the number of neighbors for more accurate results."
108
+ )
109
+
110
+ return self
111
+
112
+ def fit(self, X, y=None):
113
+ result = dispatch(
114
+ self,
115
+ "fit",
116
+ {
117
+ "onedal": self.__class__._onedal_fit,
118
+ "sklearn": sklearn_LocalOutlierFactor.fit,
119
+ },
120
+ X,
121
+ None,
122
+ )
123
+ return result
124
+
125
+ def _predict(self, X=None):
126
+ check_is_fitted(self)
127
+
128
+ if X is not None:
129
+ xp, _ = get_namespace(X)
130
+ output = self.decision_function(X) < 0
131
+ is_inlier = xp.ones_like(output, dtype=int)
132
+ is_inlier[output] = -1
133
+ else:
134
+ is_inlier = np.ones(self.n_samples_fit_, dtype=int)
135
+ is_inlier[self.negative_outlier_factor_ < self.offset_] = -1
136
+
137
+ return is_inlier
138
+
139
+ # This had to be done because predict loses the queue when no
140
+ # argument is given and it is a dpctl tensor or dpnp array.
141
+ # This would cause issues in fit_predict. Also, available_if
142
+ # is hard to unwrap, and this is the most straighforward way.
143
+ @available_if(sklearn_LocalOutlierFactor._check_novelty_fit_predict)
144
+ @wrap_output_data
145
+ def fit_predict(self, X, y=None):
146
+ """Fit the model to the training set X and return the labels.
147
+
148
+ **Not available for novelty detection (when novelty is set to True).**
149
+ Label is 1 for an inlier and -1 for an outlier according to the LOF
150
+ score and the contamination parameter.
151
+
152
+ Parameters
153
+ ----------
154
+ X : {array-like, sparse matrix} of shape (n_samples, n_features), default=None
155
+ The query sample or samples to compute the Local Outlier Factor
156
+ w.r.t. the training samples.
157
+
158
+ y : Ignored
159
+ Not used, present for API consistency by convention.
160
+
161
+ Returns
162
+ -------
163
+ is_inlier : ndarray of shape (n_samples,)
164
+ Returns -1 for anomalies/outliers and 1 for inliers.
165
+ """
166
+ return self.fit(X)._predict()
167
+
168
+ def _kneighbors(self, X=None, n_neighbors=None, return_distance=True):
169
+ check_is_fitted(self)
170
+ if sklearn_check_version("1.0") and X is not None:
171
+ self._check_feature_names(X, reset=False)
172
+ return dispatch(
173
+ self,
174
+ "kneighbors",
175
+ {
176
+ "onedal": self.__class__._onedal_kneighbors,
177
+ "sklearn": sklearn_LocalOutlierFactor.kneighbors,
178
+ },
179
+ X,
180
+ n_neighbors=n_neighbors,
181
+ return_distance=return_distance,
182
+ )
183
+
184
+ kneighbors = wrap_output_data(_kneighbors)
185
+
186
+ @available_if(sklearn_LocalOutlierFactor._check_novelty_score_samples)
187
+ @wrap_output_data
188
+ def score_samples(self, X):
189
+ """Opposite of the Local Outlier Factor of X.
190
+
191
+ It is the opposite as bigger is better, i.e. large values correspond
192
+ to inliers.
193
+
194
+ **Only available for novelty detection (when novelty is set to True).**
195
+ The argument X is supposed to contain *new data*: if X contains a
196
+ point from training, it considers the later in its own neighborhood.
197
+ Also, the samples in X are not considered in the neighborhood of any
198
+ point. Because of this, the scores obtained via ``score_samples`` may
199
+ differ from the standard LOF scores.
200
+ The standard LOF scores for the training data is available via the
201
+ ``negative_outlier_factor_`` attribute.
202
+
203
+ Parameters
204
+ ----------
205
+ X : {array-like, sparse matrix} of shape (n_samples, n_features)
206
+ The query sample or samples to compute the Local Outlier Factor
207
+ w.r.t. the training samples.
208
+
209
+ Returns
210
+ -------
211
+ opposite_lof_scores : ndarray of shape (n_samples,)
212
+ The opposite of the Local Outlier Factor of each input samples.
213
+ The lower, the more abnormal.
214
+ """
215
+ check_is_fitted(self)
216
+
217
+ distances_X, neighbors_indices_X = self._kneighbors(
218
+ X, n_neighbors=self.n_neighbors_
219
+ )
220
+
221
+ X_lrd = self._local_reachability_density(
222
+ distances_X,
223
+ neighbors_indices_X,
224
+ )
225
+
226
+ lrd_ratios_array = self._lrd[neighbors_indices_X] / X_lrd[:, np.newaxis]
227
+
228
+ return -np.mean(lrd_ratios_array, axis=1)
229
+
230
+ fit.__doc__ = sklearn_LocalOutlierFactor.fit.__doc__
231
+ kneighbors.__doc__ = sklearn_LocalOutlierFactor.kneighbors.__doc__
@@ -0,0 +1,310 @@
1
+ # ==============================================================================
2
+ # Copyright 2023 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import warnings
18
+
19
+ import numpy as np
20
+ from scipy import sparse as sp
21
+ from sklearn.neighbors._ball_tree import BallTree
22
+ from sklearn.neighbors._base import VALID_METRICS, KNeighborsMixin
23
+ from sklearn.neighbors._base import NeighborsBase as sklearn_NeighborsBase
24
+ from sklearn.neighbors._kd_tree import KDTree
25
+ from sklearn.utils.validation import check_is_fitted
26
+
27
+ from daal4py.sklearn._utils import sklearn_check_version
28
+ from onedal.utils import _check_array, _num_features, _num_samples
29
+
30
+ from .._utils import PatchingConditionsChain
31
+ from ..utils._array_api import get_namespace
32
+
33
+
34
+ class KNeighborsDispatchingBase:
35
+ def _fit_validation(self, X, y=None):
36
+ if sklearn_check_version("1.2"):
37
+ self._validate_params()
38
+ if sklearn_check_version("1.0"):
39
+ self._check_feature_names(X, reset=True)
40
+ if self.metric_params is not None and "p" in self.metric_params:
41
+ if self.p is not None:
42
+ warnings.warn(
43
+ "Parameter p is found in metric_params. "
44
+ "The corresponding parameter from __init__ "
45
+ "is ignored.",
46
+ SyntaxWarning,
47
+ stacklevel=2,
48
+ )
49
+ self.effective_metric_params_ = self.metric_params.copy()
50
+ effective_p = self.metric_params["p"]
51
+ else:
52
+ self.effective_metric_params_ = {}
53
+ effective_p = self.p
54
+
55
+ self.effective_metric_params_["p"] = effective_p
56
+ self.effective_metric_ = self.metric
57
+ # For minkowski distance, use more efficient methods where available
58
+ if self.metric == "minkowski":
59
+ p = self.effective_metric_params_["p"]
60
+ if p == 1:
61
+ self.effective_metric_ = "manhattan"
62
+ elif p == 2:
63
+ self.effective_metric_ = "euclidean"
64
+ elif p == np.inf:
65
+ self.effective_metric_ = "chebyshev"
66
+
67
+ if not isinstance(X, (KDTree, BallTree, sklearn_NeighborsBase)):
68
+ self._fit_X = _check_array(
69
+ X, dtype=[np.float64, np.float32], accept_sparse=True
70
+ )
71
+ self.n_samples_fit_ = _num_samples(self._fit_X)
72
+ self.n_features_in_ = _num_features(self._fit_X)
73
+
74
+ if self.algorithm == "auto":
75
+ # A tree approach is better for small number of neighbors or small
76
+ # number of features, with KDTree generally faster when available
77
+ is_n_neighbors_valid_for_brute = (
78
+ self.n_neighbors is not None
79
+ and self.n_neighbors >= self._fit_X.shape[0] // 2
80
+ )
81
+ if self._fit_X.shape[1] > 15 or is_n_neighbors_valid_for_brute:
82
+ self._fit_method = "brute"
83
+ else:
84
+ if self.effective_metric_ in VALID_METRICS["kd_tree"]:
85
+ self._fit_method = "kd_tree"
86
+ elif (
87
+ callable(self.effective_metric_)
88
+ or self.effective_metric_ in VALID_METRICS["ball_tree"]
89
+ ):
90
+ self._fit_method = "ball_tree"
91
+ else:
92
+ self._fit_method = "brute"
93
+ else:
94
+ self._fit_method = self.algorithm
95
+
96
+ if hasattr(self, "_onedal_estimator"):
97
+ delattr(self, "_onedal_estimator")
98
+ # To cover test case when we pass patched
99
+ # estimator as an input for other estimator
100
+ if isinstance(X, sklearn_NeighborsBase):
101
+ self._fit_X = X._fit_X
102
+ self._tree = X._tree
103
+ self._fit_method = X._fit_method
104
+ self.n_samples_fit_ = X.n_samples_fit_
105
+ self.n_features_in_ = X.n_features_in_
106
+ if hasattr(X, "_onedal_estimator"):
107
+ self.effective_metric_params_.pop("p")
108
+ if self._fit_method == "ball_tree":
109
+ X._tree = BallTree(
110
+ X._fit_X,
111
+ self.leaf_size,
112
+ metric=self.effective_metric_,
113
+ **self.effective_metric_params_,
114
+ )
115
+ elif self._fit_method == "kd_tree":
116
+ X._tree = KDTree(
117
+ X._fit_X,
118
+ self.leaf_size,
119
+ metric=self.effective_metric_,
120
+ **self.effective_metric_params_,
121
+ )
122
+ elif self._fit_method == "brute":
123
+ X._tree = None
124
+ else:
125
+ raise ValueError("algorithm = '%s' not recognized" % self.algorithm)
126
+
127
+ elif isinstance(X, BallTree):
128
+ self._fit_X = X.data
129
+ self._tree = X
130
+ self._fit_method = "ball_tree"
131
+ self.n_samples_fit_ = X.data.shape[0]
132
+ self.n_features_in_ = X.data.shape[1]
133
+
134
+ elif isinstance(X, KDTree):
135
+ self._fit_X = X.data
136
+ self._tree = X
137
+ self._fit_method = "kd_tree"
138
+ self.n_samples_fit_ = X.data.shape[0]
139
+ self.n_features_in_ = X.data.shape[1]
140
+
141
+ def _onedal_supported(self, device, method_name, *data):
142
+ if method_name == "fit":
143
+ self._fit_validation(data[0], data[1])
144
+
145
+ class_name = self.__class__.__name__
146
+ is_classifier = "Classifier" in class_name
147
+ is_regressor = "Regressor" in class_name
148
+ is_unsupervised = not (is_classifier or is_regressor)
149
+ patching_status = PatchingConditionsChain(
150
+ f"sklearn.neighbors.{class_name}.{method_name}"
151
+ )
152
+ if not patching_status.and_condition(
153
+ "radius" not in method_name, "RadiusNeighbors not implemented in sklearnex"
154
+ ):
155
+ return patching_status
156
+
157
+ if not patching_status.and_condition(
158
+ not isinstance(data[0], (KDTree, BallTree, sklearn_NeighborsBase)),
159
+ f"Input type {type(data[0])} is not supported.",
160
+ ):
161
+ return patching_status
162
+
163
+ if self._fit_method in ["auto", "ball_tree"]:
164
+ condition = (
165
+ self.n_neighbors is not None
166
+ and self.n_neighbors >= self.n_samples_fit_ // 2
167
+ )
168
+ if self.n_features_in_ > 15 or condition:
169
+ result_method = "brute"
170
+ else:
171
+ if self.effective_metric_ in ["euclidean"]:
172
+ result_method = "kd_tree"
173
+ else:
174
+ result_method = "brute"
175
+ else:
176
+ result_method = self._fit_method
177
+
178
+ p_less_than_one = (
179
+ "p" in self.effective_metric_params_.keys()
180
+ and self.effective_metric_params_["p"] < 1
181
+ )
182
+ if not patching_status.and_condition(
183
+ not p_less_than_one, '"p" metric parameter is less than 1'
184
+ ):
185
+ return patching_status
186
+
187
+ if not patching_status.and_condition(
188
+ not sp.issparse(data[0]), "Sparse input is not supported."
189
+ ):
190
+ return patching_status
191
+
192
+ if not is_unsupervised:
193
+ is_valid_weights = self.weights in ["uniform", "distance"]
194
+ if is_classifier:
195
+ class_count = 1
196
+ is_single_output = False
197
+ y = None
198
+ # To check multioutput, might be overhead
199
+ if len(data) > 1:
200
+ y = np.asarray(data[1])
201
+ if is_classifier:
202
+ class_count = len(np.unique(y))
203
+ if hasattr(self, "_onedal_estimator"):
204
+ y = self._onedal_estimator._y
205
+ if y is not None and hasattr(y, "ndim") and hasattr(y, "shape"):
206
+ is_single_output = y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1
207
+
208
+ # TODO: add native support for these metric names
209
+ metrics_map = {"manhattan": ["l1", "cityblock"], "euclidean": ["l2"]}
210
+ for origin, aliases in metrics_map.items():
211
+ if self.effective_metric_ in aliases:
212
+ self.effective_metric_ = origin
213
+ break
214
+ if self.effective_metric_ == "manhattan":
215
+ self.effective_metric_params_["p"] = 1
216
+ elif self.effective_metric_ == "euclidean":
217
+ self.effective_metric_params_["p"] = 2
218
+
219
+ onedal_brute_metrics = [
220
+ "manhattan",
221
+ "minkowski",
222
+ "euclidean",
223
+ "chebyshev",
224
+ "cosine",
225
+ ]
226
+ onedal_kdtree_metrics = ["euclidean"]
227
+ is_valid_for_brute = (
228
+ result_method == "brute" and self.effective_metric_ in onedal_brute_metrics
229
+ )
230
+ is_valid_for_kd_tree = (
231
+ result_method == "kd_tree" and self.effective_metric_ in onedal_kdtree_metrics
232
+ )
233
+ if result_method == "kd_tree":
234
+ if not patching_status.and_condition(
235
+ device != "gpu", '"kd_tree" method is not supported on GPU.'
236
+ ):
237
+ return patching_status
238
+
239
+ if not patching_status.and_condition(
240
+ is_valid_for_kd_tree or is_valid_for_brute,
241
+ f"{result_method} with {self.effective_metric_} metric is not supported.",
242
+ ):
243
+ return patching_status
244
+ if not is_unsupervised:
245
+ if not patching_status.and_conditions(
246
+ [
247
+ (is_single_output, "Only single output is supported."),
248
+ (
249
+ is_valid_weights,
250
+ f'"{type(self.weights)}" weights type is not supported.',
251
+ ),
252
+ ]
253
+ ):
254
+ return patching_status
255
+ if method_name == "fit":
256
+ if is_classifier:
257
+ patching_status.and_condition(
258
+ class_count >= 2, "One-class case is not supported."
259
+ )
260
+ return patching_status
261
+ if method_name in ["predict", "predict_proba", "kneighbors", "score"]:
262
+ patching_status.and_condition(
263
+ hasattr(self, "_onedal_estimator"), "oneDAL model was not trained."
264
+ )
265
+ return patching_status
266
+ raise RuntimeError(f"Unknown method {method_name} in {class_name}")
267
+
268
+ def _onedal_gpu_supported(self, method_name, *data):
269
+ return self._onedal_supported("gpu", method_name, *data)
270
+
271
+ def _onedal_cpu_supported(self, method_name, *data):
272
+ return self._onedal_supported("cpu", method_name, *data)
273
+
274
+ def kneighbors_graph(self, X=None, n_neighbors=None, mode="connectivity"):
275
+ check_is_fitted(self)
276
+ if n_neighbors is None:
277
+ n_neighbors = self.n_neighbors
278
+
279
+ # check the input only in self.kneighbors
280
+
281
+ # construct CSR matrix representation of the k-NN graph
282
+ if mode == "connectivity":
283
+ A_ind = self.kneighbors(X, n_neighbors, return_distance=False)
284
+ xp, _ = get_namespace(A_ind)
285
+ n_queries = A_ind.shape[0]
286
+ A_data = xp.ones(n_queries * n_neighbors)
287
+
288
+ elif mode == "distance":
289
+ A_data, A_ind = self.kneighbors(X, n_neighbors, return_distance=True)
290
+ xp, _ = get_namespace(A_ind)
291
+ A_data = xp.reshape(A_data, (-1,))
292
+
293
+ else:
294
+ raise ValueError(
295
+ 'Unsupported mode, must be one of "connectivity", '
296
+ f'or "distance" but got "{mode}" instead'
297
+ )
298
+
299
+ n_queries = A_ind.shape[0]
300
+ n_samples_fit = self.n_samples_fit_
301
+ n_nonzero = n_queries * n_neighbors
302
+ A_indptr = xp.arange(0, n_nonzero + 1, n_neighbors)
303
+
304
+ kneighbors_graph = sp.csr_matrix(
305
+ (A_data, xp.reshape(A_ind, (-1,)), A_indptr), shape=(n_queries, n_samples_fit)
306
+ )
307
+
308
+ return kneighbors_graph
309
+
310
+ kneighbors_graph.__doc__ = KNeighborsMixin.kneighbors_graph.__doc__