scikit-learn-intelex 2025.4.0__py313-none-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- daal4py/__init__.py +73 -0
- daal4py/__main__.py +58 -0
- daal4py/_daal4py.cpython-313-x86_64-linux-gnu.so +0 -0
- daal4py/doc/third-party-programs.txt +424 -0
- daal4py/mb/__init__.py +19 -0
- daal4py/mb/model_builders.py +377 -0
- daal4py/mpi_transceiver.cpython-313-x86_64-linux-gnu.so +0 -0
- daal4py/sklearn/__init__.py +40 -0
- daal4py/sklearn/_n_jobs_support.py +248 -0
- daal4py/sklearn/_utils.py +245 -0
- daal4py/sklearn/cluster/__init__.py +20 -0
- daal4py/sklearn/cluster/dbscan.py +165 -0
- daal4py/sklearn/cluster/k_means.py +597 -0
- daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
- daal4py/sklearn/decomposition/__init__.py +19 -0
- daal4py/sklearn/decomposition/_pca.py +524 -0
- daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
- daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
- daal4py/sklearn/ensemble/__init__.py +27 -0
- daal4py/sklearn/ensemble/_forest.py +1397 -0
- daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
- daal4py/sklearn/linear_model/__init__.py +29 -0
- daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
- daal4py/sklearn/linear_model/_linear.py +272 -0
- daal4py/sklearn/linear_model/_ridge.py +325 -0
- daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
- daal4py/sklearn/linear_model/linear.py +17 -0
- daal4py/sklearn/linear_model/logistic_loss.py +195 -0
- daal4py/sklearn/linear_model/logistic_path.py +1026 -0
- daal4py/sklearn/linear_model/ridge.py +17 -0
- daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
- daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
- daal4py/sklearn/manifold/__init__.py +19 -0
- daal4py/sklearn/manifold/_t_sne.py +405 -0
- daal4py/sklearn/metrics/__init__.py +20 -0
- daal4py/sklearn/metrics/_pairwise.py +236 -0
- daal4py/sklearn/metrics/_ranking.py +210 -0
- daal4py/sklearn/model_selection/__init__.py +19 -0
- daal4py/sklearn/model_selection/_split.py +309 -0
- daal4py/sklearn/model_selection/tests/test_split.py +56 -0
- daal4py/sklearn/monkeypatch/__init__.py +0 -0
- daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
- daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
- daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
- daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
- daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
- daal4py/sklearn/neighbors/__init__.py +21 -0
- daal4py/sklearn/neighbors/_base.py +503 -0
- daal4py/sklearn/neighbors/_classification.py +139 -0
- daal4py/sklearn/neighbors/_regression.py +74 -0
- daal4py/sklearn/neighbors/_unsupervised.py +55 -0
- daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
- daal4py/sklearn/svm/__init__.py +19 -0
- daal4py/sklearn/svm/svm.py +734 -0
- daal4py/sklearn/utils/__init__.py +21 -0
- daal4py/sklearn/utils/base.py +75 -0
- daal4py/sklearn/utils/tests/test_utils.py +51 -0
- daal4py/sklearn/utils/validation.py +696 -0
- onedal/__init__.py +83 -0
- onedal/_config.py +54 -0
- onedal/_device_offload.py +204 -0
- onedal/_onedal_py_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
- onedal/_onedal_py_host.cpython-313-x86_64-linux-gnu.so +0 -0
- onedal/_onedal_py_spmd_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
- onedal/basic_statistics/__init__.py +20 -0
- onedal/basic_statistics/basic_statistics.py +107 -0
- onedal/basic_statistics/incremental_basic_statistics.py +175 -0
- onedal/basic_statistics/tests/test_basic_statistics.py +242 -0
- onedal/basic_statistics/tests/test_incremental_basic_statistics.py +279 -0
- onedal/basic_statistics/tests/utils.py +50 -0
- onedal/cluster/__init__.py +27 -0
- onedal/cluster/dbscan.py +105 -0
- onedal/cluster/kmeans.py +557 -0
- onedal/cluster/kmeans_init.py +112 -0
- onedal/cluster/tests/test_dbscan.py +125 -0
- onedal/cluster/tests/test_kmeans.py +88 -0
- onedal/cluster/tests/test_kmeans_init.py +93 -0
- onedal/common/_base.py +38 -0
- onedal/common/_estimator_checks.py +47 -0
- onedal/common/_mixin.py +62 -0
- onedal/common/_policy.py +55 -0
- onedal/common/_spmd_policy.py +30 -0
- onedal/common/hyperparameters.py +125 -0
- onedal/common/tests/test_policy.py +76 -0
- onedal/common/tests/test_sycl.py +128 -0
- onedal/covariance/__init__.py +20 -0
- onedal/covariance/covariance.py +122 -0
- onedal/covariance/incremental_covariance.py +161 -0
- onedal/covariance/tests/test_covariance.py +50 -0
- onedal/covariance/tests/test_incremental_covariance.py +190 -0
- onedal/datatypes/__init__.py +19 -0
- onedal/datatypes/_data_conversion.py +121 -0
- onedal/datatypes/tests/common.py +126 -0
- onedal/datatypes/tests/test_data.py +475 -0
- onedal/decomposition/__init__.py +20 -0
- onedal/decomposition/incremental_pca.py +214 -0
- onedal/decomposition/pca.py +186 -0
- onedal/decomposition/tests/test_incremental_pca.py +285 -0
- onedal/ensemble/__init__.py +29 -0
- onedal/ensemble/forest.py +736 -0
- onedal/ensemble/tests/test_random_forest.py +97 -0
- onedal/linear_model/__init__.py +27 -0
- onedal/linear_model/incremental_linear_model.py +292 -0
- onedal/linear_model/linear_model.py +325 -0
- onedal/linear_model/logistic_regression.py +247 -0
- onedal/linear_model/tests/test_incremental_linear_regression.py +213 -0
- onedal/linear_model/tests/test_incremental_ridge_regression.py +171 -0
- onedal/linear_model/tests/test_linear_regression.py +259 -0
- onedal/linear_model/tests/test_logistic_regression.py +95 -0
- onedal/linear_model/tests/test_ridge.py +95 -0
- onedal/neighbors/__init__.py +19 -0
- onedal/neighbors/neighbors.py +763 -0
- onedal/neighbors/tests/test_knn_classification.py +49 -0
- onedal/primitives/__init__.py +27 -0
- onedal/primitives/get_tree.py +25 -0
- onedal/primitives/kernel_functions.py +152 -0
- onedal/primitives/tests/test_kernel_functions.py +159 -0
- onedal/spmd/__init__.py +25 -0
- onedal/spmd/_base.py +30 -0
- onedal/spmd/basic_statistics/__init__.py +20 -0
- onedal/spmd/basic_statistics/basic_statistics.py +30 -0
- onedal/spmd/basic_statistics/incremental_basic_statistics.py +71 -0
- onedal/spmd/cluster/__init__.py +28 -0
- onedal/spmd/cluster/dbscan.py +23 -0
- onedal/spmd/cluster/kmeans.py +56 -0
- onedal/spmd/covariance/__init__.py +20 -0
- onedal/spmd/covariance/covariance.py +26 -0
- onedal/spmd/covariance/incremental_covariance.py +83 -0
- onedal/spmd/decomposition/__init__.py +20 -0
- onedal/spmd/decomposition/incremental_pca.py +124 -0
- onedal/spmd/decomposition/pca.py +26 -0
- onedal/spmd/ensemble/__init__.py +19 -0
- onedal/spmd/ensemble/forest.py +28 -0
- onedal/spmd/linear_model/__init__.py +21 -0
- onedal/spmd/linear_model/incremental_linear_model.py +101 -0
- onedal/spmd/linear_model/linear_model.py +30 -0
- onedal/spmd/linear_model/logistic_regression.py +38 -0
- onedal/spmd/neighbors/__init__.py +19 -0
- onedal/spmd/neighbors/neighbors.py +75 -0
- onedal/svm/__init__.py +19 -0
- onedal/svm/svm.py +556 -0
- onedal/svm/tests/test_csr_svm.py +351 -0
- onedal/svm/tests/test_nusvc.py +204 -0
- onedal/svm/tests/test_nusvr.py +210 -0
- onedal/svm/tests/test_svc.py +176 -0
- onedal/svm/tests/test_svr.py +243 -0
- onedal/tests/test_common.py +57 -0
- onedal/tests/utils/_dataframes_support.py +162 -0
- onedal/tests/utils/_device_selection.py +102 -0
- onedal/utils/__init__.py +49 -0
- onedal/utils/_array_api.py +81 -0
- onedal/utils/_dpep_helpers.py +56 -0
- onedal/utils/tests/test_validation.py +142 -0
- onedal/utils/validation.py +464 -0
- scikit_learn_intelex-2025.4.0.dist-info/LICENSE.txt +202 -0
- scikit_learn_intelex-2025.4.0.dist-info/METADATA +190 -0
- scikit_learn_intelex-2025.4.0.dist-info/RECORD +282 -0
- scikit_learn_intelex-2025.4.0.dist-info/WHEEL +5 -0
- scikit_learn_intelex-2025.4.0.dist-info/top_level.txt +3 -0
- sklearnex/__init__.py +66 -0
- sklearnex/__main__.py +58 -0
- sklearnex/_config.py +116 -0
- sklearnex/_device_offload.py +126 -0
- sklearnex/_utils.py +177 -0
- sklearnex/basic_statistics/__init__.py +20 -0
- sklearnex/basic_statistics/basic_statistics.py +261 -0
- sklearnex/basic_statistics/incremental_basic_statistics.py +352 -0
- sklearnex/basic_statistics/tests/test_basic_statistics.py +405 -0
- sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +455 -0
- sklearnex/cluster/__init__.py +20 -0
- sklearnex/cluster/dbscan.py +197 -0
- sklearnex/cluster/k_means.py +397 -0
- sklearnex/cluster/tests/test_dbscan.py +38 -0
- sklearnex/cluster/tests/test_kmeans.py +157 -0
- sklearnex/conftest.py +82 -0
- sklearnex/covariance/__init__.py +19 -0
- sklearnex/covariance/incremental_covariance.py +405 -0
- sklearnex/covariance/tests/test_incremental_covariance.py +287 -0
- sklearnex/decomposition/__init__.py +19 -0
- sklearnex/decomposition/pca.py +427 -0
- sklearnex/decomposition/tests/test_pca.py +58 -0
- sklearnex/dispatcher.py +534 -0
- sklearnex/doc/third-party-programs.txt +424 -0
- sklearnex/ensemble/__init__.py +29 -0
- sklearnex/ensemble/_forest.py +2029 -0
- sklearnex/ensemble/tests/test_forest.py +140 -0
- sklearnex/glob/__main__.py +72 -0
- sklearnex/glob/dispatcher.py +101 -0
- sklearnex/linear_model/__init__.py +32 -0
- sklearnex/linear_model/coordinate_descent.py +30 -0
- sklearnex/linear_model/incremental_linear.py +495 -0
- sklearnex/linear_model/incremental_ridge.py +432 -0
- sklearnex/linear_model/linear.py +346 -0
- sklearnex/linear_model/logistic_regression.py +415 -0
- sklearnex/linear_model/ridge.py +390 -0
- sklearnex/linear_model/tests/test_incremental_linear.py +267 -0
- sklearnex/linear_model/tests/test_incremental_ridge.py +214 -0
- sklearnex/linear_model/tests/test_linear.py +142 -0
- sklearnex/linear_model/tests/test_logreg.py +134 -0
- sklearnex/linear_model/tests/test_ridge.py +256 -0
- sklearnex/manifold/__init__.py +19 -0
- sklearnex/manifold/t_sne.py +26 -0
- sklearnex/manifold/tests/test_tsne.py +250 -0
- sklearnex/metrics/__init__.py +23 -0
- sklearnex/metrics/pairwise.py +22 -0
- sklearnex/metrics/ranking.py +20 -0
- sklearnex/metrics/tests/test_metrics.py +39 -0
- sklearnex/model_selection/__init__.py +21 -0
- sklearnex/model_selection/split.py +22 -0
- sklearnex/model_selection/tests/test_model_selection.py +34 -0
- sklearnex/neighbors/__init__.py +27 -0
- sklearnex/neighbors/_lof.py +236 -0
- sklearnex/neighbors/common.py +310 -0
- sklearnex/neighbors/knn_classification.py +231 -0
- sklearnex/neighbors/knn_regression.py +207 -0
- sklearnex/neighbors/knn_unsupervised.py +178 -0
- sklearnex/neighbors/tests/test_neighbors.py +82 -0
- sklearnex/preview/__init__.py +17 -0
- sklearnex/preview/covariance/__init__.py +19 -0
- sklearnex/preview/covariance/covariance.py +142 -0
- sklearnex/preview/covariance/tests/test_covariance.py +66 -0
- sklearnex/preview/decomposition/__init__.py +19 -0
- sklearnex/preview/decomposition/incremental_pca.py +244 -0
- sklearnex/preview/decomposition/tests/test_incremental_pca.py +336 -0
- sklearnex/spmd/__init__.py +25 -0
- sklearnex/spmd/basic_statistics/__init__.py +20 -0
- sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
- sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
- sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
- sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +306 -0
- sklearnex/spmd/cluster/__init__.py +30 -0
- sklearnex/spmd/cluster/dbscan.py +50 -0
- sklearnex/spmd/cluster/kmeans.py +21 -0
- sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
- sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +173 -0
- sklearnex/spmd/covariance/__init__.py +20 -0
- sklearnex/spmd/covariance/covariance.py +21 -0
- sklearnex/spmd/covariance/incremental_covariance.py +37 -0
- sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
- sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
- sklearnex/spmd/decomposition/__init__.py +20 -0
- sklearnex/spmd/decomposition/incremental_pca.py +30 -0
- sklearnex/spmd/decomposition/pca.py +21 -0
- sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
- sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
- sklearnex/spmd/ensemble/__init__.py +19 -0
- sklearnex/spmd/ensemble/forest.py +71 -0
- sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
- sklearnex/spmd/linear_model/__init__.py +21 -0
- sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
- sklearnex/spmd/linear_model/linear_model.py +21 -0
- sklearnex/spmd/linear_model/logistic_regression.py +21 -0
- sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +331 -0
- sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
- sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
- sklearnex/spmd/neighbors/__init__.py +19 -0
- sklearnex/spmd/neighbors/neighbors.py +25 -0
- sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
- sklearnex/svm/__init__.py +29 -0
- sklearnex/svm/_common.py +339 -0
- sklearnex/svm/nusvc.py +371 -0
- sklearnex/svm/nusvr.py +170 -0
- sklearnex/svm/svc.py +399 -0
- sklearnex/svm/svr.py +167 -0
- sklearnex/svm/tests/test_svm.py +93 -0
- sklearnex/tests/test_common.py +491 -0
- sklearnex/tests/test_config.py +123 -0
- sklearnex/tests/test_hyperparameters.py +43 -0
- sklearnex/tests/test_memory_usage.py +347 -0
- sklearnex/tests/test_monkeypatch.py +269 -0
- sklearnex/tests/test_n_jobs_support.py +108 -0
- sklearnex/tests/test_parallel.py +48 -0
- sklearnex/tests/test_patching.py +377 -0
- sklearnex/tests/test_run_to_run_stability.py +326 -0
- sklearnex/tests/utils/__init__.py +48 -0
- sklearnex/tests/utils/base.py +436 -0
- sklearnex/tests/utils/spmd.py +198 -0
- sklearnex/utils/__init__.py +19 -0
- sklearnex/utils/_array_api.py +82 -0
- sklearnex/utils/parallel.py +59 -0
- sklearnex/utils/tests/test_validation.py +238 -0
- sklearnex/utils/validation.py +208 -0
|
@@ -0,0 +1,763 @@
|
|
|
1
|
+
# ==============================================================================
|
|
2
|
+
# Copyright 2022 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ==============================================================================
|
|
16
|
+
|
|
17
|
+
from abc import ABCMeta
|
|
18
|
+
from numbers import Integral
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
|
|
22
|
+
from daal4py import (
|
|
23
|
+
bf_knn_classification_model,
|
|
24
|
+
bf_knn_classification_prediction,
|
|
25
|
+
bf_knn_classification_training,
|
|
26
|
+
kdtree_knn_classification_model,
|
|
27
|
+
kdtree_knn_classification_prediction,
|
|
28
|
+
kdtree_knn_classification_training,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
from ..common._base import BaseEstimator
|
|
32
|
+
from ..common._estimator_checks import _check_is_fitted, _is_classifier, _is_regressor
|
|
33
|
+
from ..common._mixin import ClassifierMixin, RegressorMixin
|
|
34
|
+
from ..datatypes import from_table, to_table
|
|
35
|
+
from ..utils import (
|
|
36
|
+
_check_array,
|
|
37
|
+
_check_classification_targets,
|
|
38
|
+
_check_n_features,
|
|
39
|
+
_check_X_y,
|
|
40
|
+
_column_or_1d,
|
|
41
|
+
_num_samples,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class NeighborsCommonBase(BaseEstimator, metaclass=ABCMeta):
|
|
46
|
+
def _parse_auto_method(self, method, n_samples, n_features):
|
|
47
|
+
result_method = method
|
|
48
|
+
|
|
49
|
+
if method in ["auto", "ball_tree"]:
|
|
50
|
+
condition = (
|
|
51
|
+
self.n_neighbors is not None and self.n_neighbors >= n_samples // 2
|
|
52
|
+
)
|
|
53
|
+
if self.metric == "precomputed" or n_features > 15 or condition:
|
|
54
|
+
result_method = "brute"
|
|
55
|
+
else:
|
|
56
|
+
if self.metric == "euclidean":
|
|
57
|
+
result_method = "kd_tree"
|
|
58
|
+
else:
|
|
59
|
+
result_method = "brute"
|
|
60
|
+
|
|
61
|
+
return result_method
|
|
62
|
+
|
|
63
|
+
def _validate_data(
|
|
64
|
+
self, X, y=None, reset=True, validate_separately=False, **check_params
|
|
65
|
+
):
|
|
66
|
+
if y is None:
|
|
67
|
+
if self.requires_y:
|
|
68
|
+
raise ValueError(
|
|
69
|
+
f"This {self.__class__.__name__} estimator "
|
|
70
|
+
f"requires y to be passed, but the target y is None."
|
|
71
|
+
)
|
|
72
|
+
X = _check_array(X, **check_params)
|
|
73
|
+
out = X, y
|
|
74
|
+
else:
|
|
75
|
+
if validate_separately:
|
|
76
|
+
# We need this because some estimators validate X and y
|
|
77
|
+
# separately, and in general, separately calling _check_array()
|
|
78
|
+
# on X and y isn't equivalent to just calling _check_X_y()
|
|
79
|
+
# :(
|
|
80
|
+
check_X_params, check_y_params = validate_separately
|
|
81
|
+
X = _check_array(X, **check_X_params)
|
|
82
|
+
y = _check_array(y, **check_y_params)
|
|
83
|
+
else:
|
|
84
|
+
X, y = _check_X_y(X, y, **check_params)
|
|
85
|
+
out = X, y
|
|
86
|
+
|
|
87
|
+
if check_params.get("ensure_2d", True):
|
|
88
|
+
_check_n_features(self, X, reset=reset)
|
|
89
|
+
|
|
90
|
+
return out
|
|
91
|
+
|
|
92
|
+
def _get_weights(self, dist, weights):
|
|
93
|
+
if weights in (None, "uniform"):
|
|
94
|
+
return None
|
|
95
|
+
if weights == "distance":
|
|
96
|
+
# if user attempts to classify a point that was zero distance from one
|
|
97
|
+
# or more training points, those training points are weighted as 1.0
|
|
98
|
+
# and the other points as 0.0
|
|
99
|
+
if dist.dtype is np.dtype(object):
|
|
100
|
+
for point_dist_i, point_dist in enumerate(dist):
|
|
101
|
+
# check if point_dist is iterable
|
|
102
|
+
# (ex: RadiusNeighborClassifier.predict may set an element of
|
|
103
|
+
# dist to 1e-6 to represent an 'outlier')
|
|
104
|
+
if hasattr(point_dist, "__contains__") and 0.0 in point_dist:
|
|
105
|
+
dist[point_dist_i] = point_dist == 0.0
|
|
106
|
+
else:
|
|
107
|
+
dist[point_dist_i] = 1.0 / point_dist
|
|
108
|
+
else:
|
|
109
|
+
with np.errstate(divide="ignore"):
|
|
110
|
+
dist = 1.0 / dist
|
|
111
|
+
inf_mask = np.isinf(dist)
|
|
112
|
+
inf_row = np.any(inf_mask, axis=1)
|
|
113
|
+
dist[inf_row] = inf_mask[inf_row]
|
|
114
|
+
return dist
|
|
115
|
+
elif callable(weights):
|
|
116
|
+
return weights(dist)
|
|
117
|
+
else:
|
|
118
|
+
raise ValueError(
|
|
119
|
+
"weights not recognized: should be 'uniform', "
|
|
120
|
+
"'distance', or a callable function"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
def _get_onedal_params(self, X, y=None, n_neighbors=None):
|
|
124
|
+
class_count = 0 if self.classes_ is None else len(self.classes_)
|
|
125
|
+
weights = getattr(self, "weights", "uniform")
|
|
126
|
+
if self.effective_metric_ == "manhattan":
|
|
127
|
+
p = 1.0
|
|
128
|
+
elif self.effective_metric_ == "euclidean":
|
|
129
|
+
p = 2.0
|
|
130
|
+
else:
|
|
131
|
+
p = self.p
|
|
132
|
+
return {
|
|
133
|
+
"fptype": X.dtype,
|
|
134
|
+
"vote_weights": "uniform" if weights == "uniform" else "distance",
|
|
135
|
+
"method": self._fit_method,
|
|
136
|
+
"radius": self.radius,
|
|
137
|
+
"class_count": class_count,
|
|
138
|
+
"neighbor_count": self.n_neighbors if n_neighbors is None else n_neighbors,
|
|
139
|
+
"metric": self.effective_metric_,
|
|
140
|
+
"p": p,
|
|
141
|
+
"metric_params": self.effective_metric_params_,
|
|
142
|
+
"result_option": "indices|distances" if y is None else "responses",
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
def _get_daal_params(self, data, n_neighbors=None):
|
|
146
|
+
class_count = 0 if self.classes_ is None else len(self.classes_)
|
|
147
|
+
weights = getattr(self, "weights", "uniform")
|
|
148
|
+
params = {
|
|
149
|
+
"fptype": "float" if data.dtype == np.float32 else "double",
|
|
150
|
+
"method": "defaultDense",
|
|
151
|
+
"k": self.n_neighbors if n_neighbors is None else n_neighbors,
|
|
152
|
+
"voteWeights": "voteUniform" if weights == "uniform" else "voteDistance",
|
|
153
|
+
"resultsToCompute": "computeIndicesOfNeighbors|computeDistances",
|
|
154
|
+
"resultsToEvaluate": (
|
|
155
|
+
"none"
|
|
156
|
+
if getattr(self, "_y", None) is None or _is_regressor(self)
|
|
157
|
+
else "computeClassLabels"
|
|
158
|
+
),
|
|
159
|
+
}
|
|
160
|
+
if class_count != 0:
|
|
161
|
+
params["nClasses"] = class_count
|
|
162
|
+
return params
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class NeighborsBase(NeighborsCommonBase, metaclass=ABCMeta):
|
|
166
|
+
def __init__(
|
|
167
|
+
self,
|
|
168
|
+
n_neighbors=None,
|
|
169
|
+
radius=None,
|
|
170
|
+
algorithm="auto",
|
|
171
|
+
metric="minkowski",
|
|
172
|
+
p=2,
|
|
173
|
+
metric_params=None,
|
|
174
|
+
):
|
|
175
|
+
self.n_neighbors = n_neighbors
|
|
176
|
+
self.radius = radius
|
|
177
|
+
self.algorithm = algorithm
|
|
178
|
+
self.metric = metric
|
|
179
|
+
self.p = p
|
|
180
|
+
self.metric_params = metric_params
|
|
181
|
+
|
|
182
|
+
def _validate_targets(self, y, dtype):
|
|
183
|
+
arr = _column_or_1d(y, warn=True)
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
return arr.astype(dtype, copy=False)
|
|
187
|
+
except ValueError:
|
|
188
|
+
return arr
|
|
189
|
+
|
|
190
|
+
def _validate_n_classes(self):
|
|
191
|
+
if len(self.classes_) < 2:
|
|
192
|
+
raise ValueError(
|
|
193
|
+
"The number of classes has to be greater than one; got %d"
|
|
194
|
+
" class" % len(self.classes_)
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
def _fit(self, X, y, queue):
|
|
198
|
+
self._onedal_model = None
|
|
199
|
+
self._tree = None
|
|
200
|
+
self._shape = None
|
|
201
|
+
self.classes_ = None
|
|
202
|
+
self.effective_metric_ = getattr(self, "effective_metric_", self.metric)
|
|
203
|
+
self.effective_metric_params_ = getattr(
|
|
204
|
+
self, "effective_metric_params_", self.metric_params
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
if y is not None or self.requires_y:
|
|
208
|
+
shape = getattr(y, "shape", None)
|
|
209
|
+
X, y = super()._validate_data(
|
|
210
|
+
X, y, dtype=[np.float64, np.float32], accept_sparse="csr"
|
|
211
|
+
)
|
|
212
|
+
self._shape = shape if shape is not None else y.shape
|
|
213
|
+
|
|
214
|
+
if _is_classifier(self):
|
|
215
|
+
if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1:
|
|
216
|
+
self.outputs_2d_ = False
|
|
217
|
+
y = y.reshape((-1, 1))
|
|
218
|
+
else:
|
|
219
|
+
self.outputs_2d_ = True
|
|
220
|
+
|
|
221
|
+
_check_classification_targets(y)
|
|
222
|
+
self.classes_ = []
|
|
223
|
+
self._y = np.empty(y.shape, dtype=int)
|
|
224
|
+
for k in range(self._y.shape[1]):
|
|
225
|
+
classes, self._y[:, k] = np.unique(y[:, k], return_inverse=True)
|
|
226
|
+
self.classes_.append(classes)
|
|
227
|
+
|
|
228
|
+
if not self.outputs_2d_:
|
|
229
|
+
self.classes_ = self.classes_[0]
|
|
230
|
+
self._y = self._y.ravel()
|
|
231
|
+
|
|
232
|
+
self._validate_n_classes()
|
|
233
|
+
else:
|
|
234
|
+
self._y = y
|
|
235
|
+
else:
|
|
236
|
+
X, _ = super()._validate_data(X, dtype=[np.float64, np.float32])
|
|
237
|
+
|
|
238
|
+
self.n_samples_fit_ = X.shape[0]
|
|
239
|
+
self.n_features_in_ = X.shape[1]
|
|
240
|
+
self._fit_X = X
|
|
241
|
+
|
|
242
|
+
if self.n_neighbors is not None:
|
|
243
|
+
if self.n_neighbors <= 0:
|
|
244
|
+
raise ValueError("Expected n_neighbors > 0. Got %d" % self.n_neighbors)
|
|
245
|
+
if not isinstance(self.n_neighbors, Integral):
|
|
246
|
+
raise TypeError(
|
|
247
|
+
"n_neighbors does not take %s value, "
|
|
248
|
+
"enter integer value" % type(self.n_neighbors)
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
self._fit_method = super()._parse_auto_method(
|
|
252
|
+
self.algorithm, self.n_samples_fit_, self.n_features_in_
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
_fit_y = None
|
|
256
|
+
gpu_device = queue is not None and queue.sycl_device.is_gpu
|
|
257
|
+
|
|
258
|
+
if _is_classifier(self) or (_is_regressor(self) and gpu_device):
|
|
259
|
+
_fit_y = self._validate_targets(self._y, X.dtype).reshape((-1, 1))
|
|
260
|
+
result = self._onedal_fit(X, _fit_y, queue)
|
|
261
|
+
|
|
262
|
+
if y is not None and _is_regressor(self):
|
|
263
|
+
self._y = y if self._shape is None else y.reshape(self._shape)
|
|
264
|
+
|
|
265
|
+
self._onedal_model = result
|
|
266
|
+
result = self
|
|
267
|
+
|
|
268
|
+
return result
|
|
269
|
+
|
|
270
|
+
def _kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None):
|
|
271
|
+
n_features = getattr(self, "n_features_in_", None)
|
|
272
|
+
shape = getattr(X, "shape", None)
|
|
273
|
+
if n_features and shape and len(shape) > 1 and shape[1] != n_features:
|
|
274
|
+
raise ValueError(
|
|
275
|
+
(
|
|
276
|
+
f"X has {X.shape[1]} features, "
|
|
277
|
+
f"but kneighbors is expecting "
|
|
278
|
+
f"{n_features} features as input"
|
|
279
|
+
)
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
_check_is_fitted(self)
|
|
283
|
+
|
|
284
|
+
if n_neighbors is None:
|
|
285
|
+
n_neighbors = self.n_neighbors
|
|
286
|
+
elif n_neighbors <= 0:
|
|
287
|
+
raise ValueError("Expected n_neighbors > 0. Got %d" % n_neighbors)
|
|
288
|
+
else:
|
|
289
|
+
if not isinstance(n_neighbors, Integral):
|
|
290
|
+
raise TypeError(
|
|
291
|
+
"n_neighbors does not take %s value, "
|
|
292
|
+
"enter integer value" % type(n_neighbors)
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
if X is not None:
|
|
296
|
+
query_is_train = False
|
|
297
|
+
X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
|
|
298
|
+
else:
|
|
299
|
+
query_is_train = True
|
|
300
|
+
X = self._fit_X
|
|
301
|
+
# Include an extra neighbor to account for the sample itself being
|
|
302
|
+
# returned, which is removed later
|
|
303
|
+
n_neighbors += 1
|
|
304
|
+
|
|
305
|
+
n_samples_fit = self.n_samples_fit_
|
|
306
|
+
if n_neighbors > n_samples_fit:
|
|
307
|
+
if query_is_train:
|
|
308
|
+
n_neighbors -= 1 # ok to modify inplace because an error is raised
|
|
309
|
+
inequality_str = "n_neighbors < n_samples_fit"
|
|
310
|
+
else:
|
|
311
|
+
inequality_str = "n_neighbors <= n_samples_fit"
|
|
312
|
+
raise ValueError(
|
|
313
|
+
f"Expected {inequality_str}, but "
|
|
314
|
+
f"n_neighbors = {n_neighbors}, n_samples_fit = {n_samples_fit}, "
|
|
315
|
+
f"n_samples = {X.shape[0]}" # include n_samples for common tests
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
chunked_results = None
|
|
319
|
+
method = super()._parse_auto_method(
|
|
320
|
+
self._fit_method, self.n_samples_fit_, n_features
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
if (
|
|
324
|
+
type(self._onedal_model) is kdtree_knn_classification_model
|
|
325
|
+
or type(self._onedal_model) is bf_knn_classification_model
|
|
326
|
+
):
|
|
327
|
+
params = super()._get_daal_params(X, n_neighbors=n_neighbors)
|
|
328
|
+
prediction_results = self._onedal_predict(
|
|
329
|
+
self._onedal_model, X, params, queue=queue
|
|
330
|
+
)
|
|
331
|
+
distances = prediction_results.distances
|
|
332
|
+
indices = prediction_results.indices
|
|
333
|
+
else:
|
|
334
|
+
params = super()._get_onedal_params(X, n_neighbors=n_neighbors)
|
|
335
|
+
prediction_results = self._onedal_predict(
|
|
336
|
+
self._onedal_model, X, params, queue=queue
|
|
337
|
+
)
|
|
338
|
+
distances = from_table(prediction_results.distances)
|
|
339
|
+
indices = from_table(prediction_results.indices)
|
|
340
|
+
|
|
341
|
+
if method == "kd_tree":
|
|
342
|
+
for i in range(distances.shape[0]):
|
|
343
|
+
seq = distances[i].argsort()
|
|
344
|
+
indices[i] = indices[i][seq]
|
|
345
|
+
distances[i] = distances[i][seq]
|
|
346
|
+
|
|
347
|
+
if return_distance:
|
|
348
|
+
results = distances, indices
|
|
349
|
+
else:
|
|
350
|
+
results = indices
|
|
351
|
+
|
|
352
|
+
if chunked_results is not None:
|
|
353
|
+
if return_distance:
|
|
354
|
+
neigh_dist, neigh_ind = zip(*chunked_results)
|
|
355
|
+
results = np.vstack(neigh_dist), np.vstack(neigh_ind)
|
|
356
|
+
else:
|
|
357
|
+
results = np.vstack(chunked_results)
|
|
358
|
+
|
|
359
|
+
if not query_is_train:
|
|
360
|
+
return results
|
|
361
|
+
|
|
362
|
+
# If the query data is the same as the indexed data, we would like
|
|
363
|
+
# to ignore the first nearest neighbor of every sample, i.e
|
|
364
|
+
# the sample itself.
|
|
365
|
+
if return_distance:
|
|
366
|
+
neigh_dist, neigh_ind = results
|
|
367
|
+
else:
|
|
368
|
+
neigh_ind = results
|
|
369
|
+
|
|
370
|
+
n_queries, _ = X.shape
|
|
371
|
+
sample_range = np.arange(n_queries)[:, None]
|
|
372
|
+
sample_mask = neigh_ind != sample_range
|
|
373
|
+
|
|
374
|
+
# Corner case: When the number of duplicates are more
|
|
375
|
+
# than the number of neighbors, the first NN will not
|
|
376
|
+
# be the sample, but a duplicate.
|
|
377
|
+
# In that case mask the first duplicate.
|
|
378
|
+
dup_gr_nbrs = np.all(sample_mask, axis=1)
|
|
379
|
+
sample_mask[:, 0][dup_gr_nbrs] = False
|
|
380
|
+
|
|
381
|
+
neigh_ind = np.reshape(neigh_ind[sample_mask], (n_queries, n_neighbors - 1))
|
|
382
|
+
|
|
383
|
+
if return_distance:
|
|
384
|
+
neigh_dist = np.reshape(neigh_dist[sample_mask], (n_queries, n_neighbors - 1))
|
|
385
|
+
return neigh_dist, neigh_ind
|
|
386
|
+
return neigh_ind
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
class KNeighborsClassifier(NeighborsBase, ClassifierMixin):
|
|
390
|
+
def __init__(
|
|
391
|
+
self,
|
|
392
|
+
n_neighbors=5,
|
|
393
|
+
*,
|
|
394
|
+
weights="uniform",
|
|
395
|
+
algorithm="auto",
|
|
396
|
+
p=2,
|
|
397
|
+
metric="minkowski",
|
|
398
|
+
metric_params=None,
|
|
399
|
+
**kwargs,
|
|
400
|
+
):
|
|
401
|
+
super().__init__(
|
|
402
|
+
n_neighbors=n_neighbors,
|
|
403
|
+
algorithm=algorithm,
|
|
404
|
+
metric=metric,
|
|
405
|
+
p=p,
|
|
406
|
+
metric_params=metric_params,
|
|
407
|
+
**kwargs,
|
|
408
|
+
)
|
|
409
|
+
self.weights = weights
|
|
410
|
+
|
|
411
|
+
def _get_daal_params(self, data):
|
|
412
|
+
params = super()._get_daal_params(data)
|
|
413
|
+
params["resultsToEvaluate"] = "computeClassLabels"
|
|
414
|
+
params["resultsToCompute"] = ""
|
|
415
|
+
return params
|
|
416
|
+
|
|
417
|
+
def _onedal_fit(self, X, y, queue):
|
|
418
|
+
gpu_device = queue is not None and queue.sycl_device.is_gpu
|
|
419
|
+
if self.effective_metric_ == "euclidean" and not gpu_device:
|
|
420
|
+
params = self._get_daal_params(X)
|
|
421
|
+
if self._fit_method == "brute":
|
|
422
|
+
train_alg = bf_knn_classification_training
|
|
423
|
+
|
|
424
|
+
else:
|
|
425
|
+
train_alg = kdtree_knn_classification_training
|
|
426
|
+
|
|
427
|
+
return train_alg(**params).compute(X, y).model
|
|
428
|
+
|
|
429
|
+
policy = self._get_policy(queue, X, y)
|
|
430
|
+
X_table, y_table = to_table(X, y, queue=queue)
|
|
431
|
+
params = self._get_onedal_params(X_table, y)
|
|
432
|
+
train_alg = self._get_backend(
|
|
433
|
+
"neighbors", "classification", "train", policy, params, X_table, y_table
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
return train_alg.model
|
|
437
|
+
|
|
438
|
+
def _onedal_predict(self, model, X, params, queue):
|
|
439
|
+
if type(self._onedal_model) is kdtree_knn_classification_model:
|
|
440
|
+
return kdtree_knn_classification_prediction(**params).compute(X, model)
|
|
441
|
+
elif type(self._onedal_model) is bf_knn_classification_model:
|
|
442
|
+
return bf_knn_classification_prediction(**params).compute(X, model)
|
|
443
|
+
|
|
444
|
+
policy = self._get_policy(queue, X)
|
|
445
|
+
X = to_table(X, queue=queue)
|
|
446
|
+
if hasattr(self, "_onedal_model"):
|
|
447
|
+
model = self._onedal_model
|
|
448
|
+
else:
|
|
449
|
+
model = self._create_model(
|
|
450
|
+
self._get_backend("neighbors", "classification", None)
|
|
451
|
+
)
|
|
452
|
+
if "responses" not in params["result_option"]:
|
|
453
|
+
params["result_option"] += "|responses"
|
|
454
|
+
params["fptype"] = X.dtype
|
|
455
|
+
result = self._get_backend(
|
|
456
|
+
"neighbors", "classification", "infer", policy, params, model, X
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
return result
|
|
460
|
+
|
|
461
|
+
def fit(self, X, y, queue=None):
|
|
462
|
+
return super()._fit(X, y, queue=queue)
|
|
463
|
+
|
|
464
|
+
def predict(self, X, queue=None):
|
|
465
|
+
X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
|
|
466
|
+
onedal_model = getattr(self, "_onedal_model", None)
|
|
467
|
+
n_features = getattr(self, "n_features_in_", None)
|
|
468
|
+
n_samples_fit_ = getattr(self, "n_samples_fit_", None)
|
|
469
|
+
shape = getattr(X, "shape", None)
|
|
470
|
+
if n_features and shape and len(shape) > 1 and shape[1] != n_features:
|
|
471
|
+
raise ValueError(
|
|
472
|
+
(
|
|
473
|
+
f"X has {X.shape[1]} features, "
|
|
474
|
+
f"but KNNClassifier is expecting "
|
|
475
|
+
f"{n_features} features as input"
|
|
476
|
+
)
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
_check_is_fitted(self)
|
|
480
|
+
|
|
481
|
+
self._fit_method = super()._parse_auto_method(
|
|
482
|
+
self.algorithm, n_samples_fit_, n_features
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
self._validate_n_classes()
|
|
486
|
+
|
|
487
|
+
if (
|
|
488
|
+
type(onedal_model) is kdtree_knn_classification_model
|
|
489
|
+
or type(onedal_model) is bf_knn_classification_model
|
|
490
|
+
):
|
|
491
|
+
params = self._get_daal_params(X)
|
|
492
|
+
prediction_result = self._onedal_predict(onedal_model, X, params, queue=queue)
|
|
493
|
+
responses = prediction_result.prediction
|
|
494
|
+
else:
|
|
495
|
+
params = self._get_onedal_params(X)
|
|
496
|
+
prediction_result = self._onedal_predict(onedal_model, X, params, queue=queue)
|
|
497
|
+
responses = from_table(prediction_result.responses)
|
|
498
|
+
|
|
499
|
+
result = self.classes_.take(np.asarray(responses.ravel(), dtype=np.intp))
|
|
500
|
+
return result
|
|
501
|
+
|
|
502
|
+
def predict_proba(self, X, queue=None):
|
|
503
|
+
neigh_dist, neigh_ind = self.kneighbors(X, queue=queue)
|
|
504
|
+
|
|
505
|
+
classes_ = self.classes_
|
|
506
|
+
_y = self._y
|
|
507
|
+
if not self.outputs_2d_:
|
|
508
|
+
_y = self._y.reshape((-1, 1))
|
|
509
|
+
classes_ = [self.classes_]
|
|
510
|
+
|
|
511
|
+
n_queries = _num_samples(X)
|
|
512
|
+
|
|
513
|
+
weights = self._get_weights(neigh_dist, self.weights)
|
|
514
|
+
if weights is None:
|
|
515
|
+
weights = np.ones_like(neigh_ind)
|
|
516
|
+
|
|
517
|
+
all_rows = np.arange(n_queries)
|
|
518
|
+
probabilities = []
|
|
519
|
+
for k, classes_k in enumerate(classes_):
|
|
520
|
+
pred_labels = _y[:, k][neigh_ind]
|
|
521
|
+
proba_k = np.zeros((n_queries, classes_k.size))
|
|
522
|
+
|
|
523
|
+
# a simple ':' index doesn't work right
|
|
524
|
+
for i, idx in enumerate(pred_labels.T): # loop is O(n_neighbors)
|
|
525
|
+
proba_k[all_rows, idx] += weights[:, i]
|
|
526
|
+
|
|
527
|
+
# normalize 'votes' into real [0,1] probabilities
|
|
528
|
+
normalizer = proba_k.sum(axis=1)[:, np.newaxis]
|
|
529
|
+
normalizer[normalizer == 0.0] = 1.0
|
|
530
|
+
proba_k /= normalizer
|
|
531
|
+
|
|
532
|
+
probabilities.append(proba_k)
|
|
533
|
+
|
|
534
|
+
if not self.outputs_2d_:
|
|
535
|
+
probabilities = probabilities[0]
|
|
536
|
+
|
|
537
|
+
return probabilities
|
|
538
|
+
|
|
539
|
+
def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None):
|
|
540
|
+
return super()._kneighbors(X, n_neighbors, return_distance, queue=queue)
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
class KNeighborsRegressor(NeighborsBase, RegressorMixin):
|
|
544
|
+
def __init__(
|
|
545
|
+
self,
|
|
546
|
+
n_neighbors=5,
|
|
547
|
+
*,
|
|
548
|
+
weights="uniform",
|
|
549
|
+
algorithm="auto",
|
|
550
|
+
p=2,
|
|
551
|
+
metric="minkowski",
|
|
552
|
+
metric_params=None,
|
|
553
|
+
**kwargs,
|
|
554
|
+
):
|
|
555
|
+
super().__init__(
|
|
556
|
+
n_neighbors=n_neighbors,
|
|
557
|
+
algorithm=algorithm,
|
|
558
|
+
metric=metric,
|
|
559
|
+
p=p,
|
|
560
|
+
metric_params=metric_params,
|
|
561
|
+
**kwargs,
|
|
562
|
+
)
|
|
563
|
+
self.weights = weights
|
|
564
|
+
|
|
565
|
+
def _get_daal_params(self, data):
|
|
566
|
+
params = super()._get_daal_params(data)
|
|
567
|
+
params["resultsToCompute"] = "computeIndicesOfNeighbors|computeDistances"
|
|
568
|
+
params["resultsToEvaluate"] = "none"
|
|
569
|
+
return params
|
|
570
|
+
|
|
571
|
+
def _onedal_fit(self, X, y, queue):
|
|
572
|
+
gpu_device = queue is not None and queue.sycl_device.is_gpu
|
|
573
|
+
if self.effective_metric_ == "euclidean" and not gpu_device:
|
|
574
|
+
params = self._get_daal_params(X)
|
|
575
|
+
if self._fit_method == "brute":
|
|
576
|
+
train_alg = bf_knn_classification_training
|
|
577
|
+
|
|
578
|
+
else:
|
|
579
|
+
train_alg = kdtree_knn_classification_training
|
|
580
|
+
|
|
581
|
+
return train_alg(**params).compute(X, y).model
|
|
582
|
+
|
|
583
|
+
policy = self._get_policy(queue, X, y)
|
|
584
|
+
X_table, y_table = to_table(X, y, queue=queue)
|
|
585
|
+
params = self._get_onedal_params(X_table, y)
|
|
586
|
+
train_alg_regr = self._get_backend("neighbors", "regression", None)
|
|
587
|
+
train_alg_srch = self._get_backend("neighbors", "search", None)
|
|
588
|
+
|
|
589
|
+
if gpu_device:
|
|
590
|
+
return train_alg_regr.train(policy, params, X_table, y_table).model
|
|
591
|
+
return train_alg_srch.train(policy, params, X_table).model
|
|
592
|
+
|
|
593
|
+
def _onedal_predict(self, model, X, params, queue):
|
|
594
|
+
if type(model) is kdtree_knn_classification_model:
|
|
595
|
+
return kdtree_knn_classification_prediction(**params).compute(X, model)
|
|
596
|
+
elif type(model) is bf_knn_classification_model:
|
|
597
|
+
return bf_knn_classification_prediction(**params).compute(X, model)
|
|
598
|
+
|
|
599
|
+
gpu_device = queue is not None and queue.sycl_device.is_gpu
|
|
600
|
+
policy = self._get_policy(queue, X)
|
|
601
|
+
X = to_table(X, queue=queue)
|
|
602
|
+
backend = (
|
|
603
|
+
self._get_backend("neighbors", "regression", None)
|
|
604
|
+
if gpu_device
|
|
605
|
+
else self._get_backend("neighbors", "search", None)
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
if hasattr(self, "_onedal_model"):
|
|
609
|
+
model = self._onedal_model
|
|
610
|
+
else:
|
|
611
|
+
model = self._create_model(backend)
|
|
612
|
+
if "responses" not in params["result_option"] and gpu_device:
|
|
613
|
+
params["result_option"] += "|responses"
|
|
614
|
+
params["fptype"] = X.dtype
|
|
615
|
+
result = backend.infer(policy, params, model, X)
|
|
616
|
+
|
|
617
|
+
return result
|
|
618
|
+
|
|
619
|
+
def fit(self, X, y, queue=None):
|
|
620
|
+
return super()._fit(X, y, queue=queue)
|
|
621
|
+
|
|
622
|
+
def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None):
|
|
623
|
+
return super()._kneighbors(X, n_neighbors, return_distance, queue=queue)
|
|
624
|
+
|
|
625
|
+
def _predict_gpu(self, X, queue=None):
|
|
626
|
+
X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
|
|
627
|
+
onedal_model = getattr(self, "_onedal_model", None)
|
|
628
|
+
n_features = getattr(self, "n_features_in_", None)
|
|
629
|
+
n_samples_fit_ = getattr(self, "n_samples_fit_", None)
|
|
630
|
+
shape = getattr(X, "shape", None)
|
|
631
|
+
if n_features and shape and len(shape) > 1 and shape[1] != n_features:
|
|
632
|
+
raise ValueError(
|
|
633
|
+
(
|
|
634
|
+
f"X has {X.shape[1]} features, "
|
|
635
|
+
f"but KNNClassifier is expecting "
|
|
636
|
+
f"{n_features} features as input"
|
|
637
|
+
)
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
_check_is_fitted(self)
|
|
641
|
+
|
|
642
|
+
self._fit_method = super()._parse_auto_method(
|
|
643
|
+
self.algorithm, n_samples_fit_, n_features
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
params = self._get_onedal_params(X)
|
|
647
|
+
|
|
648
|
+
prediction_result = self._onedal_predict(onedal_model, X, params, queue=queue)
|
|
649
|
+
responses = from_table(prediction_result.responses)
|
|
650
|
+
result = responses.ravel()
|
|
651
|
+
|
|
652
|
+
return result
|
|
653
|
+
|
|
654
|
+
def _predict_skl(self, X, queue=None):
|
|
655
|
+
neigh_dist, neigh_ind = self.kneighbors(X, queue=queue)
|
|
656
|
+
|
|
657
|
+
weights = self._get_weights(neigh_dist, self.weights)
|
|
658
|
+
|
|
659
|
+
_y = self._y
|
|
660
|
+
if _y.ndim == 1:
|
|
661
|
+
_y = _y.reshape((-1, 1))
|
|
662
|
+
|
|
663
|
+
if weights is None:
|
|
664
|
+
y_pred = np.mean(_y[neigh_ind], axis=1)
|
|
665
|
+
else:
|
|
666
|
+
y_pred = np.empty((X.shape[0], _y.shape[1]), dtype=np.float64)
|
|
667
|
+
denom = np.sum(weights, axis=1)
|
|
668
|
+
|
|
669
|
+
for j in range(_y.shape[1]):
|
|
670
|
+
num = np.sum(_y[neigh_ind, j] * weights, axis=1)
|
|
671
|
+
y_pred[:, j] = num / denom
|
|
672
|
+
|
|
673
|
+
if self._y.ndim == 1:
|
|
674
|
+
y_pred = y_pred.ravel()
|
|
675
|
+
|
|
676
|
+
return y_pred
|
|
677
|
+
|
|
678
|
+
def predict(self, X, queue=None):
|
|
679
|
+
gpu_device = queue is not None and queue.sycl_device.is_gpu
|
|
680
|
+
is_uniform_weights = getattr(self, "weights", "uniform") == "uniform"
|
|
681
|
+
return (
|
|
682
|
+
self._predict_gpu(X, queue=queue)
|
|
683
|
+
if gpu_device and is_uniform_weights
|
|
684
|
+
else self._predict_skl(X, queue=queue)
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
class NearestNeighbors(NeighborsBase):
|
|
689
|
+
def __init__(
|
|
690
|
+
self,
|
|
691
|
+
n_neighbors=5,
|
|
692
|
+
*,
|
|
693
|
+
weights="uniform",
|
|
694
|
+
algorithm="auto",
|
|
695
|
+
p=2,
|
|
696
|
+
metric="minkowski",
|
|
697
|
+
metric_params=None,
|
|
698
|
+
**kwargs,
|
|
699
|
+
):
|
|
700
|
+
super().__init__(
|
|
701
|
+
n_neighbors=n_neighbors,
|
|
702
|
+
algorithm=algorithm,
|
|
703
|
+
metric=metric,
|
|
704
|
+
p=p,
|
|
705
|
+
metric_params=metric_params,
|
|
706
|
+
**kwargs,
|
|
707
|
+
)
|
|
708
|
+
self.weights = weights
|
|
709
|
+
|
|
710
|
+
def _get_daal_params(self, data):
|
|
711
|
+
params = super()._get_daal_params(data)
|
|
712
|
+
params["resultsToCompute"] = "computeIndicesOfNeighbors|computeDistances"
|
|
713
|
+
params["resultsToEvaluate"] = (
|
|
714
|
+
"none" if getattr(self, "_y", None) is None else "computeClassLabels"
|
|
715
|
+
)
|
|
716
|
+
return params
|
|
717
|
+
|
|
718
|
+
def _onedal_fit(self, X, y, queue):
|
|
719
|
+
gpu_device = queue is not None and queue.sycl_device.is_gpu
|
|
720
|
+
if self.effective_metric_ == "euclidean" and not gpu_device:
|
|
721
|
+
params = self._get_daal_params(X)
|
|
722
|
+
if self._fit_method == "brute":
|
|
723
|
+
train_alg = bf_knn_classification_training
|
|
724
|
+
|
|
725
|
+
else:
|
|
726
|
+
train_alg = kdtree_knn_classification_training
|
|
727
|
+
|
|
728
|
+
return train_alg(**params).compute(X, y).model
|
|
729
|
+
|
|
730
|
+
policy = self._get_policy(queue, X, y)
|
|
731
|
+
X_table = to_table(X, queue=queue)
|
|
732
|
+
params = self._get_onedal_params(X_table, y)
|
|
733
|
+
train_alg = self._get_backend(
|
|
734
|
+
"neighbors", "search", "train", policy, params, X_table
|
|
735
|
+
)
|
|
736
|
+
|
|
737
|
+
return train_alg.model
|
|
738
|
+
|
|
739
|
+
def _onedal_predict(self, model, X, params, queue):
|
|
740
|
+
if type(self._onedal_model) is kdtree_knn_classification_model:
|
|
741
|
+
return kdtree_knn_classification_prediction(**params).compute(X, model)
|
|
742
|
+
elif type(self._onedal_model) is bf_knn_classification_model:
|
|
743
|
+
return bf_knn_classification_prediction(**params).compute(X, model)
|
|
744
|
+
|
|
745
|
+
policy = self._get_policy(queue, X)
|
|
746
|
+
X = to_table(X, queue=queue)
|
|
747
|
+
if hasattr(self, "_onedal_model"):
|
|
748
|
+
model = self._onedal_model
|
|
749
|
+
else:
|
|
750
|
+
model = self._create_model(self._get_backend("neighbors", "search", None))
|
|
751
|
+
|
|
752
|
+
params["fptype"] = X.dtype
|
|
753
|
+
result = self._get_backend(
|
|
754
|
+
"neighbors", "search", "infer", policy, params, model, X
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
return result
|
|
758
|
+
|
|
759
|
+
def fit(self, X, y, queue=None):
|
|
760
|
+
return super()._fit(X, y, queue=queue)
|
|
761
|
+
|
|
762
|
+
def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None):
|
|
763
|
+
return super()._kneighbors(X, n_neighbors, return_distance, queue=queue)
|