scikit-learn-intelex 2025.4.0__py313-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (282) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-313-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-313-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +248 -0
  10. daal4py/sklearn/_utils.py +245 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +236 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +696 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +54 -0
  61. onedal/_device_offload.py +204 -0
  62. onedal/_onedal_py_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-313-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +175 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +242 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +279 -0
  70. onedal/basic_statistics/tests/utils.py +50 -0
  71. onedal/cluster/__init__.py +27 -0
  72. onedal/cluster/dbscan.py +105 -0
  73. onedal/cluster/kmeans.py +557 -0
  74. onedal/cluster/kmeans_init.py +112 -0
  75. onedal/cluster/tests/test_dbscan.py +125 -0
  76. onedal/cluster/tests/test_kmeans.py +88 -0
  77. onedal/cluster/tests/test_kmeans_init.py +93 -0
  78. onedal/common/_base.py +38 -0
  79. onedal/common/_estimator_checks.py +47 -0
  80. onedal/common/_mixin.py +62 -0
  81. onedal/common/_policy.py +55 -0
  82. onedal/common/_spmd_policy.py +30 -0
  83. onedal/common/hyperparameters.py +125 -0
  84. onedal/common/tests/test_policy.py +76 -0
  85. onedal/common/tests/test_sycl.py +128 -0
  86. onedal/covariance/__init__.py +20 -0
  87. onedal/covariance/covariance.py +122 -0
  88. onedal/covariance/incremental_covariance.py +161 -0
  89. onedal/covariance/tests/test_covariance.py +50 -0
  90. onedal/covariance/tests/test_incremental_covariance.py +190 -0
  91. onedal/datatypes/__init__.py +19 -0
  92. onedal/datatypes/_data_conversion.py +121 -0
  93. onedal/datatypes/tests/common.py +126 -0
  94. onedal/datatypes/tests/test_data.py +475 -0
  95. onedal/decomposition/__init__.py +20 -0
  96. onedal/decomposition/incremental_pca.py +214 -0
  97. onedal/decomposition/pca.py +186 -0
  98. onedal/decomposition/tests/test_incremental_pca.py +285 -0
  99. onedal/ensemble/__init__.py +29 -0
  100. onedal/ensemble/forest.py +736 -0
  101. onedal/ensemble/tests/test_random_forest.py +97 -0
  102. onedal/linear_model/__init__.py +27 -0
  103. onedal/linear_model/incremental_linear_model.py +292 -0
  104. onedal/linear_model/linear_model.py +325 -0
  105. onedal/linear_model/logistic_regression.py +247 -0
  106. onedal/linear_model/tests/test_incremental_linear_regression.py +213 -0
  107. onedal/linear_model/tests/test_incremental_ridge_regression.py +171 -0
  108. onedal/linear_model/tests/test_linear_regression.py +259 -0
  109. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  110. onedal/linear_model/tests/test_ridge.py +95 -0
  111. onedal/neighbors/__init__.py +19 -0
  112. onedal/neighbors/neighbors.py +763 -0
  113. onedal/neighbors/tests/test_knn_classification.py +49 -0
  114. onedal/primitives/__init__.py +27 -0
  115. onedal/primitives/get_tree.py +25 -0
  116. onedal/primitives/kernel_functions.py +152 -0
  117. onedal/primitives/tests/test_kernel_functions.py +159 -0
  118. onedal/spmd/__init__.py +25 -0
  119. onedal/spmd/_base.py +30 -0
  120. onedal/spmd/basic_statistics/__init__.py +20 -0
  121. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  122. onedal/spmd/basic_statistics/incremental_basic_statistics.py +71 -0
  123. onedal/spmd/cluster/__init__.py +28 -0
  124. onedal/spmd/cluster/dbscan.py +23 -0
  125. onedal/spmd/cluster/kmeans.py +56 -0
  126. onedal/spmd/covariance/__init__.py +20 -0
  127. onedal/spmd/covariance/covariance.py +26 -0
  128. onedal/spmd/covariance/incremental_covariance.py +83 -0
  129. onedal/spmd/decomposition/__init__.py +20 -0
  130. onedal/spmd/decomposition/incremental_pca.py +124 -0
  131. onedal/spmd/decomposition/pca.py +26 -0
  132. onedal/spmd/ensemble/__init__.py +19 -0
  133. onedal/spmd/ensemble/forest.py +28 -0
  134. onedal/spmd/linear_model/__init__.py +21 -0
  135. onedal/spmd/linear_model/incremental_linear_model.py +101 -0
  136. onedal/spmd/linear_model/linear_model.py +30 -0
  137. onedal/spmd/linear_model/logistic_regression.py +38 -0
  138. onedal/spmd/neighbors/__init__.py +19 -0
  139. onedal/spmd/neighbors/neighbors.py +75 -0
  140. onedal/svm/__init__.py +19 -0
  141. onedal/svm/svm.py +556 -0
  142. onedal/svm/tests/test_csr_svm.py +351 -0
  143. onedal/svm/tests/test_nusvc.py +204 -0
  144. onedal/svm/tests/test_nusvr.py +210 -0
  145. onedal/svm/tests/test_svc.py +176 -0
  146. onedal/svm/tests/test_svr.py +243 -0
  147. onedal/tests/test_common.py +57 -0
  148. onedal/tests/utils/_dataframes_support.py +162 -0
  149. onedal/tests/utils/_device_selection.py +102 -0
  150. onedal/utils/__init__.py +49 -0
  151. onedal/utils/_array_api.py +81 -0
  152. onedal/utils/_dpep_helpers.py +56 -0
  153. onedal/utils/tests/test_validation.py +142 -0
  154. onedal/utils/validation.py +464 -0
  155. scikit_learn_intelex-2025.4.0.dist-info/LICENSE.txt +202 -0
  156. scikit_learn_intelex-2025.4.0.dist-info/METADATA +190 -0
  157. scikit_learn_intelex-2025.4.0.dist-info/RECORD +282 -0
  158. scikit_learn_intelex-2025.4.0.dist-info/WHEEL +5 -0
  159. scikit_learn_intelex-2025.4.0.dist-info/top_level.txt +3 -0
  160. sklearnex/__init__.py +66 -0
  161. sklearnex/__main__.py +58 -0
  162. sklearnex/_config.py +116 -0
  163. sklearnex/_device_offload.py +126 -0
  164. sklearnex/_utils.py +177 -0
  165. sklearnex/basic_statistics/__init__.py +20 -0
  166. sklearnex/basic_statistics/basic_statistics.py +261 -0
  167. sklearnex/basic_statistics/incremental_basic_statistics.py +352 -0
  168. sklearnex/basic_statistics/tests/test_basic_statistics.py +405 -0
  169. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +455 -0
  170. sklearnex/cluster/__init__.py +20 -0
  171. sklearnex/cluster/dbscan.py +197 -0
  172. sklearnex/cluster/k_means.py +397 -0
  173. sklearnex/cluster/tests/test_dbscan.py +38 -0
  174. sklearnex/cluster/tests/test_kmeans.py +157 -0
  175. sklearnex/conftest.py +82 -0
  176. sklearnex/covariance/__init__.py +19 -0
  177. sklearnex/covariance/incremental_covariance.py +405 -0
  178. sklearnex/covariance/tests/test_incremental_covariance.py +287 -0
  179. sklearnex/decomposition/__init__.py +19 -0
  180. sklearnex/decomposition/pca.py +427 -0
  181. sklearnex/decomposition/tests/test_pca.py +58 -0
  182. sklearnex/dispatcher.py +534 -0
  183. sklearnex/doc/third-party-programs.txt +424 -0
  184. sklearnex/ensemble/__init__.py +29 -0
  185. sklearnex/ensemble/_forest.py +2029 -0
  186. sklearnex/ensemble/tests/test_forest.py +140 -0
  187. sklearnex/glob/__main__.py +72 -0
  188. sklearnex/glob/dispatcher.py +101 -0
  189. sklearnex/linear_model/__init__.py +32 -0
  190. sklearnex/linear_model/coordinate_descent.py +30 -0
  191. sklearnex/linear_model/incremental_linear.py +495 -0
  192. sklearnex/linear_model/incremental_ridge.py +432 -0
  193. sklearnex/linear_model/linear.py +346 -0
  194. sklearnex/linear_model/logistic_regression.py +415 -0
  195. sklearnex/linear_model/ridge.py +390 -0
  196. sklearnex/linear_model/tests/test_incremental_linear.py +267 -0
  197. sklearnex/linear_model/tests/test_incremental_ridge.py +214 -0
  198. sklearnex/linear_model/tests/test_linear.py +142 -0
  199. sklearnex/linear_model/tests/test_logreg.py +134 -0
  200. sklearnex/linear_model/tests/test_ridge.py +256 -0
  201. sklearnex/manifold/__init__.py +19 -0
  202. sklearnex/manifold/t_sne.py +26 -0
  203. sklearnex/manifold/tests/test_tsne.py +250 -0
  204. sklearnex/metrics/__init__.py +23 -0
  205. sklearnex/metrics/pairwise.py +22 -0
  206. sklearnex/metrics/ranking.py +20 -0
  207. sklearnex/metrics/tests/test_metrics.py +39 -0
  208. sklearnex/model_selection/__init__.py +21 -0
  209. sklearnex/model_selection/split.py +22 -0
  210. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  211. sklearnex/neighbors/__init__.py +27 -0
  212. sklearnex/neighbors/_lof.py +236 -0
  213. sklearnex/neighbors/common.py +310 -0
  214. sklearnex/neighbors/knn_classification.py +231 -0
  215. sklearnex/neighbors/knn_regression.py +207 -0
  216. sklearnex/neighbors/knn_unsupervised.py +178 -0
  217. sklearnex/neighbors/tests/test_neighbors.py +82 -0
  218. sklearnex/preview/__init__.py +17 -0
  219. sklearnex/preview/covariance/__init__.py +19 -0
  220. sklearnex/preview/covariance/covariance.py +142 -0
  221. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  222. sklearnex/preview/decomposition/__init__.py +19 -0
  223. sklearnex/preview/decomposition/incremental_pca.py +244 -0
  224. sklearnex/preview/decomposition/tests/test_incremental_pca.py +336 -0
  225. sklearnex/spmd/__init__.py +25 -0
  226. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  227. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  228. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  229. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  230. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +306 -0
  231. sklearnex/spmd/cluster/__init__.py +30 -0
  232. sklearnex/spmd/cluster/dbscan.py +50 -0
  233. sklearnex/spmd/cluster/kmeans.py +21 -0
  234. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  235. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +173 -0
  236. sklearnex/spmd/covariance/__init__.py +20 -0
  237. sklearnex/spmd/covariance/covariance.py +21 -0
  238. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  239. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  240. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  241. sklearnex/spmd/decomposition/__init__.py +20 -0
  242. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  243. sklearnex/spmd/decomposition/pca.py +21 -0
  244. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  245. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  246. sklearnex/spmd/ensemble/__init__.py +19 -0
  247. sklearnex/spmd/ensemble/forest.py +71 -0
  248. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  249. sklearnex/spmd/linear_model/__init__.py +21 -0
  250. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  251. sklearnex/spmd/linear_model/linear_model.py +21 -0
  252. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  253. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +331 -0
  254. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  255. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
  256. sklearnex/spmd/neighbors/__init__.py +19 -0
  257. sklearnex/spmd/neighbors/neighbors.py +25 -0
  258. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  259. sklearnex/svm/__init__.py +29 -0
  260. sklearnex/svm/_common.py +339 -0
  261. sklearnex/svm/nusvc.py +371 -0
  262. sklearnex/svm/nusvr.py +170 -0
  263. sklearnex/svm/svc.py +399 -0
  264. sklearnex/svm/svr.py +167 -0
  265. sklearnex/svm/tests/test_svm.py +93 -0
  266. sklearnex/tests/test_common.py +491 -0
  267. sklearnex/tests/test_config.py +123 -0
  268. sklearnex/tests/test_hyperparameters.py +43 -0
  269. sklearnex/tests/test_memory_usage.py +347 -0
  270. sklearnex/tests/test_monkeypatch.py +269 -0
  271. sklearnex/tests/test_n_jobs_support.py +108 -0
  272. sklearnex/tests/test_parallel.py +48 -0
  273. sklearnex/tests/test_patching.py +377 -0
  274. sklearnex/tests/test_run_to_run_stability.py +326 -0
  275. sklearnex/tests/utils/__init__.py +48 -0
  276. sklearnex/tests/utils/base.py +436 -0
  277. sklearnex/tests/utils/spmd.py +198 -0
  278. sklearnex/utils/__init__.py +19 -0
  279. sklearnex/utils/_array_api.py +82 -0
  280. sklearnex/utils/parallel.py +59 -0
  281. sklearnex/utils/tests/test_validation.py +238 -0
  282. sklearnex/utils/validation.py +208 -0
@@ -0,0 +1,763 @@
1
+ # ==============================================================================
2
+ # Copyright 2022 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ from abc import ABCMeta
18
+ from numbers import Integral
19
+
20
+ import numpy as np
21
+
22
+ from daal4py import (
23
+ bf_knn_classification_model,
24
+ bf_knn_classification_prediction,
25
+ bf_knn_classification_training,
26
+ kdtree_knn_classification_model,
27
+ kdtree_knn_classification_prediction,
28
+ kdtree_knn_classification_training,
29
+ )
30
+
31
+ from ..common._base import BaseEstimator
32
+ from ..common._estimator_checks import _check_is_fitted, _is_classifier, _is_regressor
33
+ from ..common._mixin import ClassifierMixin, RegressorMixin
34
+ from ..datatypes import from_table, to_table
35
+ from ..utils import (
36
+ _check_array,
37
+ _check_classification_targets,
38
+ _check_n_features,
39
+ _check_X_y,
40
+ _column_or_1d,
41
+ _num_samples,
42
+ )
43
+
44
+
45
+ class NeighborsCommonBase(BaseEstimator, metaclass=ABCMeta):
46
+ def _parse_auto_method(self, method, n_samples, n_features):
47
+ result_method = method
48
+
49
+ if method in ["auto", "ball_tree"]:
50
+ condition = (
51
+ self.n_neighbors is not None and self.n_neighbors >= n_samples // 2
52
+ )
53
+ if self.metric == "precomputed" or n_features > 15 or condition:
54
+ result_method = "brute"
55
+ else:
56
+ if self.metric == "euclidean":
57
+ result_method = "kd_tree"
58
+ else:
59
+ result_method = "brute"
60
+
61
+ return result_method
62
+
63
+ def _validate_data(
64
+ self, X, y=None, reset=True, validate_separately=False, **check_params
65
+ ):
66
+ if y is None:
67
+ if self.requires_y:
68
+ raise ValueError(
69
+ f"This {self.__class__.__name__} estimator "
70
+ f"requires y to be passed, but the target y is None."
71
+ )
72
+ X = _check_array(X, **check_params)
73
+ out = X, y
74
+ else:
75
+ if validate_separately:
76
+ # We need this because some estimators validate X and y
77
+ # separately, and in general, separately calling _check_array()
78
+ # on X and y isn't equivalent to just calling _check_X_y()
79
+ # :(
80
+ check_X_params, check_y_params = validate_separately
81
+ X = _check_array(X, **check_X_params)
82
+ y = _check_array(y, **check_y_params)
83
+ else:
84
+ X, y = _check_X_y(X, y, **check_params)
85
+ out = X, y
86
+
87
+ if check_params.get("ensure_2d", True):
88
+ _check_n_features(self, X, reset=reset)
89
+
90
+ return out
91
+
92
+ def _get_weights(self, dist, weights):
93
+ if weights in (None, "uniform"):
94
+ return None
95
+ if weights == "distance":
96
+ # if user attempts to classify a point that was zero distance from one
97
+ # or more training points, those training points are weighted as 1.0
98
+ # and the other points as 0.0
99
+ if dist.dtype is np.dtype(object):
100
+ for point_dist_i, point_dist in enumerate(dist):
101
+ # check if point_dist is iterable
102
+ # (ex: RadiusNeighborClassifier.predict may set an element of
103
+ # dist to 1e-6 to represent an 'outlier')
104
+ if hasattr(point_dist, "__contains__") and 0.0 in point_dist:
105
+ dist[point_dist_i] = point_dist == 0.0
106
+ else:
107
+ dist[point_dist_i] = 1.0 / point_dist
108
+ else:
109
+ with np.errstate(divide="ignore"):
110
+ dist = 1.0 / dist
111
+ inf_mask = np.isinf(dist)
112
+ inf_row = np.any(inf_mask, axis=1)
113
+ dist[inf_row] = inf_mask[inf_row]
114
+ return dist
115
+ elif callable(weights):
116
+ return weights(dist)
117
+ else:
118
+ raise ValueError(
119
+ "weights not recognized: should be 'uniform', "
120
+ "'distance', or a callable function"
121
+ )
122
+
123
+ def _get_onedal_params(self, X, y=None, n_neighbors=None):
124
+ class_count = 0 if self.classes_ is None else len(self.classes_)
125
+ weights = getattr(self, "weights", "uniform")
126
+ if self.effective_metric_ == "manhattan":
127
+ p = 1.0
128
+ elif self.effective_metric_ == "euclidean":
129
+ p = 2.0
130
+ else:
131
+ p = self.p
132
+ return {
133
+ "fptype": X.dtype,
134
+ "vote_weights": "uniform" if weights == "uniform" else "distance",
135
+ "method": self._fit_method,
136
+ "radius": self.radius,
137
+ "class_count": class_count,
138
+ "neighbor_count": self.n_neighbors if n_neighbors is None else n_neighbors,
139
+ "metric": self.effective_metric_,
140
+ "p": p,
141
+ "metric_params": self.effective_metric_params_,
142
+ "result_option": "indices|distances" if y is None else "responses",
143
+ }
144
+
145
+ def _get_daal_params(self, data, n_neighbors=None):
146
+ class_count = 0 if self.classes_ is None else len(self.classes_)
147
+ weights = getattr(self, "weights", "uniform")
148
+ params = {
149
+ "fptype": "float" if data.dtype == np.float32 else "double",
150
+ "method": "defaultDense",
151
+ "k": self.n_neighbors if n_neighbors is None else n_neighbors,
152
+ "voteWeights": "voteUniform" if weights == "uniform" else "voteDistance",
153
+ "resultsToCompute": "computeIndicesOfNeighbors|computeDistances",
154
+ "resultsToEvaluate": (
155
+ "none"
156
+ if getattr(self, "_y", None) is None or _is_regressor(self)
157
+ else "computeClassLabels"
158
+ ),
159
+ }
160
+ if class_count != 0:
161
+ params["nClasses"] = class_count
162
+ return params
163
+
164
+
165
+ class NeighborsBase(NeighborsCommonBase, metaclass=ABCMeta):
166
+ def __init__(
167
+ self,
168
+ n_neighbors=None,
169
+ radius=None,
170
+ algorithm="auto",
171
+ metric="minkowski",
172
+ p=2,
173
+ metric_params=None,
174
+ ):
175
+ self.n_neighbors = n_neighbors
176
+ self.radius = radius
177
+ self.algorithm = algorithm
178
+ self.metric = metric
179
+ self.p = p
180
+ self.metric_params = metric_params
181
+
182
+ def _validate_targets(self, y, dtype):
183
+ arr = _column_or_1d(y, warn=True)
184
+
185
+ try:
186
+ return arr.astype(dtype, copy=False)
187
+ except ValueError:
188
+ return arr
189
+
190
+ def _validate_n_classes(self):
191
+ if len(self.classes_) < 2:
192
+ raise ValueError(
193
+ "The number of classes has to be greater than one; got %d"
194
+ " class" % len(self.classes_)
195
+ )
196
+
197
+ def _fit(self, X, y, queue):
198
+ self._onedal_model = None
199
+ self._tree = None
200
+ self._shape = None
201
+ self.classes_ = None
202
+ self.effective_metric_ = getattr(self, "effective_metric_", self.metric)
203
+ self.effective_metric_params_ = getattr(
204
+ self, "effective_metric_params_", self.metric_params
205
+ )
206
+
207
+ if y is not None or self.requires_y:
208
+ shape = getattr(y, "shape", None)
209
+ X, y = super()._validate_data(
210
+ X, y, dtype=[np.float64, np.float32], accept_sparse="csr"
211
+ )
212
+ self._shape = shape if shape is not None else y.shape
213
+
214
+ if _is_classifier(self):
215
+ if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1:
216
+ self.outputs_2d_ = False
217
+ y = y.reshape((-1, 1))
218
+ else:
219
+ self.outputs_2d_ = True
220
+
221
+ _check_classification_targets(y)
222
+ self.classes_ = []
223
+ self._y = np.empty(y.shape, dtype=int)
224
+ for k in range(self._y.shape[1]):
225
+ classes, self._y[:, k] = np.unique(y[:, k], return_inverse=True)
226
+ self.classes_.append(classes)
227
+
228
+ if not self.outputs_2d_:
229
+ self.classes_ = self.classes_[0]
230
+ self._y = self._y.ravel()
231
+
232
+ self._validate_n_classes()
233
+ else:
234
+ self._y = y
235
+ else:
236
+ X, _ = super()._validate_data(X, dtype=[np.float64, np.float32])
237
+
238
+ self.n_samples_fit_ = X.shape[0]
239
+ self.n_features_in_ = X.shape[1]
240
+ self._fit_X = X
241
+
242
+ if self.n_neighbors is not None:
243
+ if self.n_neighbors <= 0:
244
+ raise ValueError("Expected n_neighbors > 0. Got %d" % self.n_neighbors)
245
+ if not isinstance(self.n_neighbors, Integral):
246
+ raise TypeError(
247
+ "n_neighbors does not take %s value, "
248
+ "enter integer value" % type(self.n_neighbors)
249
+ )
250
+
251
+ self._fit_method = super()._parse_auto_method(
252
+ self.algorithm, self.n_samples_fit_, self.n_features_in_
253
+ )
254
+
255
+ _fit_y = None
256
+ gpu_device = queue is not None and queue.sycl_device.is_gpu
257
+
258
+ if _is_classifier(self) or (_is_regressor(self) and gpu_device):
259
+ _fit_y = self._validate_targets(self._y, X.dtype).reshape((-1, 1))
260
+ result = self._onedal_fit(X, _fit_y, queue)
261
+
262
+ if y is not None and _is_regressor(self):
263
+ self._y = y if self._shape is None else y.reshape(self._shape)
264
+
265
+ self._onedal_model = result
266
+ result = self
267
+
268
+ return result
269
+
270
+ def _kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None):
271
+ n_features = getattr(self, "n_features_in_", None)
272
+ shape = getattr(X, "shape", None)
273
+ if n_features and shape and len(shape) > 1 and shape[1] != n_features:
274
+ raise ValueError(
275
+ (
276
+ f"X has {X.shape[1]} features, "
277
+ f"but kneighbors is expecting "
278
+ f"{n_features} features as input"
279
+ )
280
+ )
281
+
282
+ _check_is_fitted(self)
283
+
284
+ if n_neighbors is None:
285
+ n_neighbors = self.n_neighbors
286
+ elif n_neighbors <= 0:
287
+ raise ValueError("Expected n_neighbors > 0. Got %d" % n_neighbors)
288
+ else:
289
+ if not isinstance(n_neighbors, Integral):
290
+ raise TypeError(
291
+ "n_neighbors does not take %s value, "
292
+ "enter integer value" % type(n_neighbors)
293
+ )
294
+
295
+ if X is not None:
296
+ query_is_train = False
297
+ X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
298
+ else:
299
+ query_is_train = True
300
+ X = self._fit_X
301
+ # Include an extra neighbor to account for the sample itself being
302
+ # returned, which is removed later
303
+ n_neighbors += 1
304
+
305
+ n_samples_fit = self.n_samples_fit_
306
+ if n_neighbors > n_samples_fit:
307
+ if query_is_train:
308
+ n_neighbors -= 1 # ok to modify inplace because an error is raised
309
+ inequality_str = "n_neighbors < n_samples_fit"
310
+ else:
311
+ inequality_str = "n_neighbors <= n_samples_fit"
312
+ raise ValueError(
313
+ f"Expected {inequality_str}, but "
314
+ f"n_neighbors = {n_neighbors}, n_samples_fit = {n_samples_fit}, "
315
+ f"n_samples = {X.shape[0]}" # include n_samples for common tests
316
+ )
317
+
318
+ chunked_results = None
319
+ method = super()._parse_auto_method(
320
+ self._fit_method, self.n_samples_fit_, n_features
321
+ )
322
+
323
+ if (
324
+ type(self._onedal_model) is kdtree_knn_classification_model
325
+ or type(self._onedal_model) is bf_knn_classification_model
326
+ ):
327
+ params = super()._get_daal_params(X, n_neighbors=n_neighbors)
328
+ prediction_results = self._onedal_predict(
329
+ self._onedal_model, X, params, queue=queue
330
+ )
331
+ distances = prediction_results.distances
332
+ indices = prediction_results.indices
333
+ else:
334
+ params = super()._get_onedal_params(X, n_neighbors=n_neighbors)
335
+ prediction_results = self._onedal_predict(
336
+ self._onedal_model, X, params, queue=queue
337
+ )
338
+ distances = from_table(prediction_results.distances)
339
+ indices = from_table(prediction_results.indices)
340
+
341
+ if method == "kd_tree":
342
+ for i in range(distances.shape[0]):
343
+ seq = distances[i].argsort()
344
+ indices[i] = indices[i][seq]
345
+ distances[i] = distances[i][seq]
346
+
347
+ if return_distance:
348
+ results = distances, indices
349
+ else:
350
+ results = indices
351
+
352
+ if chunked_results is not None:
353
+ if return_distance:
354
+ neigh_dist, neigh_ind = zip(*chunked_results)
355
+ results = np.vstack(neigh_dist), np.vstack(neigh_ind)
356
+ else:
357
+ results = np.vstack(chunked_results)
358
+
359
+ if not query_is_train:
360
+ return results
361
+
362
+ # If the query data is the same as the indexed data, we would like
363
+ # to ignore the first nearest neighbor of every sample, i.e
364
+ # the sample itself.
365
+ if return_distance:
366
+ neigh_dist, neigh_ind = results
367
+ else:
368
+ neigh_ind = results
369
+
370
+ n_queries, _ = X.shape
371
+ sample_range = np.arange(n_queries)[:, None]
372
+ sample_mask = neigh_ind != sample_range
373
+
374
+ # Corner case: When the number of duplicates are more
375
+ # than the number of neighbors, the first NN will not
376
+ # be the sample, but a duplicate.
377
+ # In that case mask the first duplicate.
378
+ dup_gr_nbrs = np.all(sample_mask, axis=1)
379
+ sample_mask[:, 0][dup_gr_nbrs] = False
380
+
381
+ neigh_ind = np.reshape(neigh_ind[sample_mask], (n_queries, n_neighbors - 1))
382
+
383
+ if return_distance:
384
+ neigh_dist = np.reshape(neigh_dist[sample_mask], (n_queries, n_neighbors - 1))
385
+ return neigh_dist, neigh_ind
386
+ return neigh_ind
387
+
388
+
389
+ class KNeighborsClassifier(NeighborsBase, ClassifierMixin):
390
+ def __init__(
391
+ self,
392
+ n_neighbors=5,
393
+ *,
394
+ weights="uniform",
395
+ algorithm="auto",
396
+ p=2,
397
+ metric="minkowski",
398
+ metric_params=None,
399
+ **kwargs,
400
+ ):
401
+ super().__init__(
402
+ n_neighbors=n_neighbors,
403
+ algorithm=algorithm,
404
+ metric=metric,
405
+ p=p,
406
+ metric_params=metric_params,
407
+ **kwargs,
408
+ )
409
+ self.weights = weights
410
+
411
+ def _get_daal_params(self, data):
412
+ params = super()._get_daal_params(data)
413
+ params["resultsToEvaluate"] = "computeClassLabels"
414
+ params["resultsToCompute"] = ""
415
+ return params
416
+
417
+ def _onedal_fit(self, X, y, queue):
418
+ gpu_device = queue is not None and queue.sycl_device.is_gpu
419
+ if self.effective_metric_ == "euclidean" and not gpu_device:
420
+ params = self._get_daal_params(X)
421
+ if self._fit_method == "brute":
422
+ train_alg = bf_knn_classification_training
423
+
424
+ else:
425
+ train_alg = kdtree_knn_classification_training
426
+
427
+ return train_alg(**params).compute(X, y).model
428
+
429
+ policy = self._get_policy(queue, X, y)
430
+ X_table, y_table = to_table(X, y, queue=queue)
431
+ params = self._get_onedal_params(X_table, y)
432
+ train_alg = self._get_backend(
433
+ "neighbors", "classification", "train", policy, params, X_table, y_table
434
+ )
435
+
436
+ return train_alg.model
437
+
438
+ def _onedal_predict(self, model, X, params, queue):
439
+ if type(self._onedal_model) is kdtree_knn_classification_model:
440
+ return kdtree_knn_classification_prediction(**params).compute(X, model)
441
+ elif type(self._onedal_model) is bf_knn_classification_model:
442
+ return bf_knn_classification_prediction(**params).compute(X, model)
443
+
444
+ policy = self._get_policy(queue, X)
445
+ X = to_table(X, queue=queue)
446
+ if hasattr(self, "_onedal_model"):
447
+ model = self._onedal_model
448
+ else:
449
+ model = self._create_model(
450
+ self._get_backend("neighbors", "classification", None)
451
+ )
452
+ if "responses" not in params["result_option"]:
453
+ params["result_option"] += "|responses"
454
+ params["fptype"] = X.dtype
455
+ result = self._get_backend(
456
+ "neighbors", "classification", "infer", policy, params, model, X
457
+ )
458
+
459
+ return result
460
+
461
+ def fit(self, X, y, queue=None):
462
+ return super()._fit(X, y, queue=queue)
463
+
464
+ def predict(self, X, queue=None):
465
+ X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
466
+ onedal_model = getattr(self, "_onedal_model", None)
467
+ n_features = getattr(self, "n_features_in_", None)
468
+ n_samples_fit_ = getattr(self, "n_samples_fit_", None)
469
+ shape = getattr(X, "shape", None)
470
+ if n_features and shape and len(shape) > 1 and shape[1] != n_features:
471
+ raise ValueError(
472
+ (
473
+ f"X has {X.shape[1]} features, "
474
+ f"but KNNClassifier is expecting "
475
+ f"{n_features} features as input"
476
+ )
477
+ )
478
+
479
+ _check_is_fitted(self)
480
+
481
+ self._fit_method = super()._parse_auto_method(
482
+ self.algorithm, n_samples_fit_, n_features
483
+ )
484
+
485
+ self._validate_n_classes()
486
+
487
+ if (
488
+ type(onedal_model) is kdtree_knn_classification_model
489
+ or type(onedal_model) is bf_knn_classification_model
490
+ ):
491
+ params = self._get_daal_params(X)
492
+ prediction_result = self._onedal_predict(onedal_model, X, params, queue=queue)
493
+ responses = prediction_result.prediction
494
+ else:
495
+ params = self._get_onedal_params(X)
496
+ prediction_result = self._onedal_predict(onedal_model, X, params, queue=queue)
497
+ responses = from_table(prediction_result.responses)
498
+
499
+ result = self.classes_.take(np.asarray(responses.ravel(), dtype=np.intp))
500
+ return result
501
+
502
+ def predict_proba(self, X, queue=None):
503
+ neigh_dist, neigh_ind = self.kneighbors(X, queue=queue)
504
+
505
+ classes_ = self.classes_
506
+ _y = self._y
507
+ if not self.outputs_2d_:
508
+ _y = self._y.reshape((-1, 1))
509
+ classes_ = [self.classes_]
510
+
511
+ n_queries = _num_samples(X)
512
+
513
+ weights = self._get_weights(neigh_dist, self.weights)
514
+ if weights is None:
515
+ weights = np.ones_like(neigh_ind)
516
+
517
+ all_rows = np.arange(n_queries)
518
+ probabilities = []
519
+ for k, classes_k in enumerate(classes_):
520
+ pred_labels = _y[:, k][neigh_ind]
521
+ proba_k = np.zeros((n_queries, classes_k.size))
522
+
523
+ # a simple ':' index doesn't work right
524
+ for i, idx in enumerate(pred_labels.T): # loop is O(n_neighbors)
525
+ proba_k[all_rows, idx] += weights[:, i]
526
+
527
+ # normalize 'votes' into real [0,1] probabilities
528
+ normalizer = proba_k.sum(axis=1)[:, np.newaxis]
529
+ normalizer[normalizer == 0.0] = 1.0
530
+ proba_k /= normalizer
531
+
532
+ probabilities.append(proba_k)
533
+
534
+ if not self.outputs_2d_:
535
+ probabilities = probabilities[0]
536
+
537
+ return probabilities
538
+
539
+ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None):
540
+ return super()._kneighbors(X, n_neighbors, return_distance, queue=queue)
541
+
542
+
543
+ class KNeighborsRegressor(NeighborsBase, RegressorMixin):
544
+ def __init__(
545
+ self,
546
+ n_neighbors=5,
547
+ *,
548
+ weights="uniform",
549
+ algorithm="auto",
550
+ p=2,
551
+ metric="minkowski",
552
+ metric_params=None,
553
+ **kwargs,
554
+ ):
555
+ super().__init__(
556
+ n_neighbors=n_neighbors,
557
+ algorithm=algorithm,
558
+ metric=metric,
559
+ p=p,
560
+ metric_params=metric_params,
561
+ **kwargs,
562
+ )
563
+ self.weights = weights
564
+
565
+ def _get_daal_params(self, data):
566
+ params = super()._get_daal_params(data)
567
+ params["resultsToCompute"] = "computeIndicesOfNeighbors|computeDistances"
568
+ params["resultsToEvaluate"] = "none"
569
+ return params
570
+
571
+ def _onedal_fit(self, X, y, queue):
572
+ gpu_device = queue is not None and queue.sycl_device.is_gpu
573
+ if self.effective_metric_ == "euclidean" and not gpu_device:
574
+ params = self._get_daal_params(X)
575
+ if self._fit_method == "brute":
576
+ train_alg = bf_knn_classification_training
577
+
578
+ else:
579
+ train_alg = kdtree_knn_classification_training
580
+
581
+ return train_alg(**params).compute(X, y).model
582
+
583
+ policy = self._get_policy(queue, X, y)
584
+ X_table, y_table = to_table(X, y, queue=queue)
585
+ params = self._get_onedal_params(X_table, y)
586
+ train_alg_regr = self._get_backend("neighbors", "regression", None)
587
+ train_alg_srch = self._get_backend("neighbors", "search", None)
588
+
589
+ if gpu_device:
590
+ return train_alg_regr.train(policy, params, X_table, y_table).model
591
+ return train_alg_srch.train(policy, params, X_table).model
592
+
593
+ def _onedal_predict(self, model, X, params, queue):
594
+ if type(model) is kdtree_knn_classification_model:
595
+ return kdtree_knn_classification_prediction(**params).compute(X, model)
596
+ elif type(model) is bf_knn_classification_model:
597
+ return bf_knn_classification_prediction(**params).compute(X, model)
598
+
599
+ gpu_device = queue is not None and queue.sycl_device.is_gpu
600
+ policy = self._get_policy(queue, X)
601
+ X = to_table(X, queue=queue)
602
+ backend = (
603
+ self._get_backend("neighbors", "regression", None)
604
+ if gpu_device
605
+ else self._get_backend("neighbors", "search", None)
606
+ )
607
+
608
+ if hasattr(self, "_onedal_model"):
609
+ model = self._onedal_model
610
+ else:
611
+ model = self._create_model(backend)
612
+ if "responses" not in params["result_option"] and gpu_device:
613
+ params["result_option"] += "|responses"
614
+ params["fptype"] = X.dtype
615
+ result = backend.infer(policy, params, model, X)
616
+
617
+ return result
618
+
619
+ def fit(self, X, y, queue=None):
620
+ return super()._fit(X, y, queue=queue)
621
+
622
+ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None):
623
+ return super()._kneighbors(X, n_neighbors, return_distance, queue=queue)
624
+
625
+ def _predict_gpu(self, X, queue=None):
626
+ X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
627
+ onedal_model = getattr(self, "_onedal_model", None)
628
+ n_features = getattr(self, "n_features_in_", None)
629
+ n_samples_fit_ = getattr(self, "n_samples_fit_", None)
630
+ shape = getattr(X, "shape", None)
631
+ if n_features and shape and len(shape) > 1 and shape[1] != n_features:
632
+ raise ValueError(
633
+ (
634
+ f"X has {X.shape[1]} features, "
635
+ f"but KNNClassifier is expecting "
636
+ f"{n_features} features as input"
637
+ )
638
+ )
639
+
640
+ _check_is_fitted(self)
641
+
642
+ self._fit_method = super()._parse_auto_method(
643
+ self.algorithm, n_samples_fit_, n_features
644
+ )
645
+
646
+ params = self._get_onedal_params(X)
647
+
648
+ prediction_result = self._onedal_predict(onedal_model, X, params, queue=queue)
649
+ responses = from_table(prediction_result.responses)
650
+ result = responses.ravel()
651
+
652
+ return result
653
+
654
+ def _predict_skl(self, X, queue=None):
655
+ neigh_dist, neigh_ind = self.kneighbors(X, queue=queue)
656
+
657
+ weights = self._get_weights(neigh_dist, self.weights)
658
+
659
+ _y = self._y
660
+ if _y.ndim == 1:
661
+ _y = _y.reshape((-1, 1))
662
+
663
+ if weights is None:
664
+ y_pred = np.mean(_y[neigh_ind], axis=1)
665
+ else:
666
+ y_pred = np.empty((X.shape[0], _y.shape[1]), dtype=np.float64)
667
+ denom = np.sum(weights, axis=1)
668
+
669
+ for j in range(_y.shape[1]):
670
+ num = np.sum(_y[neigh_ind, j] * weights, axis=1)
671
+ y_pred[:, j] = num / denom
672
+
673
+ if self._y.ndim == 1:
674
+ y_pred = y_pred.ravel()
675
+
676
+ return y_pred
677
+
678
+ def predict(self, X, queue=None):
679
+ gpu_device = queue is not None and queue.sycl_device.is_gpu
680
+ is_uniform_weights = getattr(self, "weights", "uniform") == "uniform"
681
+ return (
682
+ self._predict_gpu(X, queue=queue)
683
+ if gpu_device and is_uniform_weights
684
+ else self._predict_skl(X, queue=queue)
685
+ )
686
+
687
+
688
+ class NearestNeighbors(NeighborsBase):
689
+ def __init__(
690
+ self,
691
+ n_neighbors=5,
692
+ *,
693
+ weights="uniform",
694
+ algorithm="auto",
695
+ p=2,
696
+ metric="minkowski",
697
+ metric_params=None,
698
+ **kwargs,
699
+ ):
700
+ super().__init__(
701
+ n_neighbors=n_neighbors,
702
+ algorithm=algorithm,
703
+ metric=metric,
704
+ p=p,
705
+ metric_params=metric_params,
706
+ **kwargs,
707
+ )
708
+ self.weights = weights
709
+
710
+ def _get_daal_params(self, data):
711
+ params = super()._get_daal_params(data)
712
+ params["resultsToCompute"] = "computeIndicesOfNeighbors|computeDistances"
713
+ params["resultsToEvaluate"] = (
714
+ "none" if getattr(self, "_y", None) is None else "computeClassLabels"
715
+ )
716
+ return params
717
+
718
+ def _onedal_fit(self, X, y, queue):
719
+ gpu_device = queue is not None and queue.sycl_device.is_gpu
720
+ if self.effective_metric_ == "euclidean" and not gpu_device:
721
+ params = self._get_daal_params(X)
722
+ if self._fit_method == "brute":
723
+ train_alg = bf_knn_classification_training
724
+
725
+ else:
726
+ train_alg = kdtree_knn_classification_training
727
+
728
+ return train_alg(**params).compute(X, y).model
729
+
730
+ policy = self._get_policy(queue, X, y)
731
+ X_table = to_table(X, queue=queue)
732
+ params = self._get_onedal_params(X_table, y)
733
+ train_alg = self._get_backend(
734
+ "neighbors", "search", "train", policy, params, X_table
735
+ )
736
+
737
+ return train_alg.model
738
+
739
+ def _onedal_predict(self, model, X, params, queue):
740
+ if type(self._onedal_model) is kdtree_knn_classification_model:
741
+ return kdtree_knn_classification_prediction(**params).compute(X, model)
742
+ elif type(self._onedal_model) is bf_knn_classification_model:
743
+ return bf_knn_classification_prediction(**params).compute(X, model)
744
+
745
+ policy = self._get_policy(queue, X)
746
+ X = to_table(X, queue=queue)
747
+ if hasattr(self, "_onedal_model"):
748
+ model = self._onedal_model
749
+ else:
750
+ model = self._create_model(self._get_backend("neighbors", "search", None))
751
+
752
+ params["fptype"] = X.dtype
753
+ result = self._get_backend(
754
+ "neighbors", "search", "infer", policy, params, model, X
755
+ )
756
+
757
+ return result
758
+
759
+ def fit(self, X, y, queue=None):
760
+ return super()._fit(X, y, queue=queue)
761
+
762
+ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None):
763
+ return super()._kneighbors(X, n_neighbors, return_distance, queue=queue)