PyPI - scikit-learn-intelex - Versions diffs - 2025.1.0__py310-none-manylinux_2_28_x86_64.whl - Mend

scikit-learn-intelex 2025.1.0__py310-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (280) hide show

daal4py/__init__.py +73 -0
daal4py/__main__.py +58 -0
daal4py/_daal4py.cpython-310-x86_64-linux-gnu.so +0 -0
daal4py/doc/third-party-programs.txt +424 -0
daal4py/mb/__init__.py +19 -0
daal4py/mb/model_builders.py +377 -0
daal4py/mpi_transceiver.cpython-310-x86_64-linux-gnu.so +0 -0
daal4py/sklearn/__init__.py +40 -0
daal4py/sklearn/_n_jobs_support.py +248 -0
daal4py/sklearn/_utils.py +245 -0
daal4py/sklearn/cluster/__init__.py +20 -0
daal4py/sklearn/cluster/dbscan.py +165 -0
daal4py/sklearn/cluster/k_means.py +597 -0
daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
daal4py/sklearn/decomposition/__init__.py +19 -0
daal4py/sklearn/decomposition/_pca.py +524 -0
daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
daal4py/sklearn/ensemble/__init__.py +27 -0
daal4py/sklearn/ensemble/_forest.py +1397 -0
daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
daal4py/sklearn/linear_model/__init__.py +29 -0
daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
daal4py/sklearn/linear_model/_linear.py +272 -0
daal4py/sklearn/linear_model/_ridge.py +325 -0
daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
daal4py/sklearn/linear_model/linear.py +17 -0
daal4py/sklearn/linear_model/logistic_loss.py +195 -0
daal4py/sklearn/linear_model/logistic_path.py +1026 -0
daal4py/sklearn/linear_model/ridge.py +17 -0
daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
daal4py/sklearn/manifold/__init__.py +19 -0
daal4py/sklearn/manifold/_t_sne.py +405 -0
daal4py/sklearn/metrics/__init__.py +20 -0
daal4py/sklearn/metrics/_pairwise.py +236 -0
daal4py/sklearn/metrics/_ranking.py +210 -0
daal4py/sklearn/model_selection/__init__.py +19 -0
daal4py/sklearn/model_selection/_split.py +309 -0
daal4py/sklearn/model_selection/tests/test_split.py +56 -0
daal4py/sklearn/monkeypatch/__init__.py +0 -0
daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
daal4py/sklearn/neighbors/__init__.py +21 -0
daal4py/sklearn/neighbors/_base.py +503 -0
daal4py/sklearn/neighbors/_classification.py +139 -0
daal4py/sklearn/neighbors/_regression.py +74 -0
daal4py/sklearn/neighbors/_unsupervised.py +55 -0
daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
daal4py/sklearn/svm/__init__.py +19 -0
daal4py/sklearn/svm/svm.py +734 -0
daal4py/sklearn/utils/__init__.py +21 -0
daal4py/sklearn/utils/base.py +75 -0
daal4py/sklearn/utils/tests/test_utils.py +51 -0
daal4py/sklearn/utils/validation.py +693 -0
onedal/__init__.py +83 -0
onedal/_config.py +54 -0
onedal/_device_offload.py +222 -0
onedal/_onedal_py_dpc.cpython-310-x86_64-linux-gnu.so +0 -0
onedal/_onedal_py_host.cpython-310-x86_64-linux-gnu.so +0 -0
onedal/_onedal_py_spmd_dpc.cpython-310-x86_64-linux-gnu.so +0 -0
onedal/basic_statistics/__init__.py +20 -0
onedal/basic_statistics/basic_statistics.py +107 -0
onedal/basic_statistics/incremental_basic_statistics.py +160 -0
onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
onedal/cluster/__init__.py +27 -0
onedal/cluster/dbscan.py +110 -0
onedal/cluster/kmeans.py +564 -0
onedal/cluster/kmeans_init.py +115 -0
onedal/cluster/tests/test_dbscan.py +125 -0
onedal/cluster/tests/test_kmeans.py +88 -0
onedal/cluster/tests/test_kmeans_init.py +93 -0
onedal/common/_base.py +38 -0
onedal/common/_estimator_checks.py +47 -0
onedal/common/_mixin.py +62 -0
onedal/common/_policy.py +59 -0
onedal/common/_spmd_policy.py +30 -0
onedal/common/hyperparameters.py +125 -0
onedal/common/tests/test_policy.py +76 -0
onedal/covariance/__init__.py +20 -0
onedal/covariance/covariance.py +125 -0
onedal/covariance/incremental_covariance.py +146 -0
onedal/covariance/tests/test_covariance.py +50 -0
onedal/covariance/tests/test_incremental_covariance.py +122 -0
onedal/datatypes/__init__.py +19 -0
onedal/datatypes/_data_conversion.py +154 -0
onedal/datatypes/tests/common.py +126 -0
onedal/datatypes/tests/test_data.py +414 -0
onedal/decomposition/__init__.py +20 -0
onedal/decomposition/incremental_pca.py +204 -0
onedal/decomposition/pca.py +186 -0
onedal/decomposition/tests/test_incremental_pca.py +198 -0
onedal/ensemble/__init__.py +29 -0
onedal/ensemble/forest.py +727 -0
onedal/ensemble/tests/test_random_forest.py +97 -0
onedal/linear_model/__init__.py +27 -0
onedal/linear_model/incremental_linear_model.py +258 -0
onedal/linear_model/linear_model.py +329 -0
onedal/linear_model/logistic_regression.py +249 -0
onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
onedal/linear_model/tests/test_linear_regression.py +250 -0
onedal/linear_model/tests/test_logistic_regression.py +95 -0
onedal/linear_model/tests/test_ridge.py +95 -0
onedal/neighbors/__init__.py +19 -0
onedal/neighbors/neighbors.py +767 -0
onedal/neighbors/tests/test_knn_classification.py +49 -0
onedal/primitives/__init__.py +27 -0
onedal/primitives/get_tree.py +25 -0
onedal/primitives/kernel_functions.py +153 -0
onedal/primitives/tests/test_kernel_functions.py +159 -0
onedal/spmd/__init__.py +25 -0
onedal/spmd/_base.py +30 -0
onedal/spmd/basic_statistics/__init__.py +20 -0
onedal/spmd/basic_statistics/basic_statistics.py +30 -0
onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
onedal/spmd/cluster/__init__.py +28 -0
onedal/spmd/cluster/dbscan.py +23 -0
onedal/spmd/cluster/kmeans.py +56 -0
onedal/spmd/covariance/__init__.py +20 -0
onedal/spmd/covariance/covariance.py +26 -0
onedal/spmd/covariance/incremental_covariance.py +82 -0
onedal/spmd/decomposition/__init__.py +20 -0
onedal/spmd/decomposition/incremental_pca.py +117 -0
onedal/spmd/decomposition/pca.py +26 -0
onedal/spmd/ensemble/__init__.py +19 -0
onedal/spmd/ensemble/forest.py +28 -0
onedal/spmd/linear_model/__init__.py +21 -0
onedal/spmd/linear_model/incremental_linear_model.py +97 -0
onedal/spmd/linear_model/linear_model.py +30 -0
onedal/spmd/linear_model/logistic_regression.py +38 -0
onedal/spmd/neighbors/__init__.py +19 -0
onedal/spmd/neighbors/neighbors.py +75 -0
onedal/svm/__init__.py +19 -0
onedal/svm/svm.py +556 -0
onedal/svm/tests/test_csr_svm.py +351 -0
onedal/svm/tests/test_nusvc.py +204 -0
onedal/svm/tests/test_nusvr.py +210 -0
onedal/svm/tests/test_svc.py +176 -0
onedal/svm/tests/test_svr.py +243 -0
onedal/tests/test_common.py +57 -0
onedal/tests/utils/_dataframes_support.py +162 -0
onedal/tests/utils/_device_selection.py +102 -0
onedal/utils/__init__.py +49 -0
onedal/utils/_array_api.py +81 -0
onedal/utils/_dpep_helpers.py +56 -0
onedal/utils/validation.py +440 -0
scikit_learn_intelex-2025.1.0.dist-info/LICENSE.txt +202 -0
scikit_learn_intelex-2025.1.0.dist-info/METADATA +231 -0
scikit_learn_intelex-2025.1.0.dist-info/RECORD +280 -0
scikit_learn_intelex-2025.1.0.dist-info/WHEEL +5 -0
scikit_learn_intelex-2025.1.0.dist-info/top_level.txt +3 -0
sklearnex/__init__.py +66 -0
sklearnex/__main__.py +58 -0
sklearnex/_config.py +116 -0
sklearnex/_device_offload.py +126 -0
sklearnex/_utils.py +132 -0
sklearnex/basic_statistics/__init__.py +20 -0
sklearnex/basic_statistics/basic_statistics.py +230 -0
sklearnex/basic_statistics/incremental_basic_statistics.py +345 -0
sklearnex/basic_statistics/tests/test_basic_statistics.py +270 -0
sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +404 -0
sklearnex/cluster/__init__.py +20 -0
sklearnex/cluster/dbscan.py +197 -0
sklearnex/cluster/k_means.py +395 -0
sklearnex/cluster/tests/test_dbscan.py +38 -0
sklearnex/cluster/tests/test_kmeans.py +159 -0
sklearnex/conftest.py +82 -0
sklearnex/covariance/__init__.py +19 -0
sklearnex/covariance/incremental_covariance.py +398 -0
sklearnex/covariance/tests/test_incremental_covariance.py +237 -0
sklearnex/decomposition/__init__.py +19 -0
sklearnex/decomposition/pca.py +425 -0
sklearnex/decomposition/tests/test_pca.py +58 -0
sklearnex/dispatcher.py +543 -0
sklearnex/doc/third-party-programs.txt +424 -0
sklearnex/ensemble/__init__.py +29 -0
sklearnex/ensemble/_forest.py +2029 -0
sklearnex/ensemble/tests/test_forest.py +135 -0
sklearnex/glob/__main__.py +72 -0
sklearnex/glob/dispatcher.py +101 -0
sklearnex/linear_model/__init__.py +32 -0
sklearnex/linear_model/coordinate_descent.py +30 -0
sklearnex/linear_model/incremental_linear.py +482 -0
sklearnex/linear_model/incremental_ridge.py +425 -0
sklearnex/linear_model/linear.py +341 -0
sklearnex/linear_model/logistic_regression.py +413 -0
sklearnex/linear_model/ridge.py +24 -0
sklearnex/linear_model/tests/test_incremental_linear.py +207 -0
sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
sklearnex/linear_model/tests/test_linear.py +167 -0
sklearnex/linear_model/tests/test_logreg.py +134 -0
sklearnex/manifold/__init__.py +19 -0
sklearnex/manifold/t_sne.py +21 -0
sklearnex/manifold/tests/test_tsne.py +26 -0
sklearnex/metrics/__init__.py +23 -0
sklearnex/metrics/pairwise.py +22 -0
sklearnex/metrics/ranking.py +20 -0
sklearnex/metrics/tests/test_metrics.py +39 -0
sklearnex/model_selection/__init__.py +21 -0
sklearnex/model_selection/split.py +22 -0
sklearnex/model_selection/tests/test_model_selection.py +34 -0
sklearnex/neighbors/__init__.py +27 -0
sklearnex/neighbors/_lof.py +236 -0
sklearnex/neighbors/common.py +310 -0
sklearnex/neighbors/knn_classification.py +231 -0
sklearnex/neighbors/knn_regression.py +207 -0
sklearnex/neighbors/knn_unsupervised.py +178 -0
sklearnex/neighbors/tests/test_neighbors.py +82 -0
sklearnex/preview/__init__.py +17 -0
sklearnex/preview/covariance/__init__.py +19 -0
sklearnex/preview/covariance/covariance.py +138 -0
sklearnex/preview/covariance/tests/test_covariance.py +66 -0
sklearnex/preview/decomposition/__init__.py +19 -0
sklearnex/preview/decomposition/incremental_pca.py +233 -0
sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
sklearnex/preview/linear_model/__init__.py +19 -0
sklearnex/preview/linear_model/ridge.py +424 -0
sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
sklearnex/spmd/__init__.py +25 -0
sklearnex/spmd/basic_statistics/__init__.py +20 -0
sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
sklearnex/spmd/cluster/__init__.py +30 -0
sklearnex/spmd/cluster/dbscan.py +50 -0
sklearnex/spmd/cluster/kmeans.py +21 -0
sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
sklearnex/spmd/covariance/__init__.py +20 -0
sklearnex/spmd/covariance/covariance.py +21 -0
sklearnex/spmd/covariance/incremental_covariance.py +37 -0
sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
sklearnex/spmd/decomposition/__init__.py +20 -0
sklearnex/spmd/decomposition/incremental_pca.py +30 -0
sklearnex/spmd/decomposition/pca.py +21 -0
sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
sklearnex/spmd/ensemble/__init__.py +19 -0
sklearnex/spmd/ensemble/forest.py +71 -0
sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
sklearnex/spmd/linear_model/__init__.py +21 -0
sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
sklearnex/spmd/linear_model/linear_model.py +21 -0
sklearnex/spmd/linear_model/logistic_regression.py +21 -0
sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
sklearnex/spmd/neighbors/__init__.py +19 -0
sklearnex/spmd/neighbors/neighbors.py +25 -0
sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
sklearnex/svm/__init__.py +29 -0
sklearnex/svm/_common.py +339 -0
sklearnex/svm/nusvc.py +371 -0
sklearnex/svm/nusvr.py +170 -0
sklearnex/svm/svc.py +399 -0
sklearnex/svm/svr.py +167 -0
sklearnex/svm/tests/test_svm.py +93 -0
sklearnex/tests/test_common.py +390 -0
sklearnex/tests/test_config.py +123 -0
sklearnex/tests/test_memory_usage.py +379 -0
sklearnex/tests/test_monkeypatch.py +276 -0
sklearnex/tests/test_n_jobs_support.py +108 -0
sklearnex/tests/test_parallel.py +48 -0
sklearnex/tests/test_patching.py +385 -0
sklearnex/tests/test_run_to_run_stability.py +321 -0
sklearnex/tests/utils/__init__.py +44 -0
sklearnex/tests/utils/base.py +371 -0
sklearnex/tests/utils/spmd.py +198 -0
sklearnex/utils/__init__.py +19 -0
sklearnex/utils/_array_api.py +82 -0
sklearnex/utils/parallel.py +59 -0
sklearnex/utils/tests/test_finite.py +89 -0
sklearnex/utils/validation.py +17 -0

sklearnex/svm/svr.py ADDED Viewed

@@ -0,0 +1,167 @@
+# ==============================================================================
+# Copyright 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+from sklearn.svm import SVR as _sklearn_SVR
+from sklearn.utils.validation import _deprecate_positional_args, check_is_fitted
+from daal4py.sklearn._n_jobs_support import control_n_jobs
+from daal4py.sklearn._utils import sklearn_check_version
+from onedal.svm import SVR as onedal_SVR
+from .._device_offload import dispatch, wrap_output_data
+from ._common import BaseSVR
+if sklearn_check_version("1.6"):
+    from sklearn.utils.validation import validate_data
+else:
+    validate_data = BaseSVR._validate_data
+@control_n_jobs(decorated_methods=["fit", "predict", "score"])
+class SVR(_sklearn_SVR, BaseSVR):
+    __doc__ = _sklearn_SVR.__doc__
+    if sklearn_check_version("1.2"):
+        _parameter_constraints: dict = {**_sklearn_SVR._parameter_constraints}
+    @_deprecate_positional_args
+    def __init__(
+        self,
+        *,
+        kernel="rbf",
+        degree=3,
+        gamma="scale",
+        coef0=0.0,
+        tol=1e-3,
+        C=1.0,
+        epsilon=0.1,
+        shrinking=True,
+        cache_size=200,
+        verbose=False,
+        max_iter=-1,
+    ):
+        super().__init__(
+            kernel=kernel,
+            degree=degree,
+            gamma=gamma,
+            coef0=coef0,
+            tol=tol,
+            C=C,
+            epsilon=epsilon,
+            shrinking=shrinking,
+            cache_size=cache_size,
+            verbose=verbose,
+            max_iter=max_iter,
+        )
+    def fit(self, X, y, sample_weight=None):
+        if sklearn_check_version("1.2"):
+            self._validate_params()
+        elif self.C <= 0:
+            # else if added to correct issues with
+            # sklearn tests:
+            # svm/tests/test_sparse.py::test_error
+            # svm/tests/test_svm.py::test_bad_input
+            # for sklearn versions < 1.2 (i.e. without
+            # validate_params parameter checking)
+            # Without this, a segmentation fault with
+            # Windows fatal exception: access violation
+            # occurs
+            raise ValueError("C <= 0")
+        dispatch(
+            self,
+            "fit",
+            {
+                "onedal": self.__class__._onedal_fit,
+                "sklearn": _sklearn_SVR.fit,
+            },
+            X,
+            y,
+            sample_weight=sample_weight,
+        )
+        return self
+    @wrap_output_data
+    def predict(self, X):
+        check_is_fitted(self)
+        return dispatch(
+            self,
+            "predict",
+            {
+                "onedal": self.__class__._onedal_predict,
+                "sklearn": _sklearn_SVR.predict,
+            },
+            X,
+        )
+    @wrap_output_data
+    def score(self, X, y, sample_weight=None):
+        check_is_fitted(self)
+        return dispatch(
+            self,
+            "score",
+            {
+                "onedal": self.__class__._onedal_score,
+                "sklearn": _sklearn_SVR.score,
+            },
+            X,
+            y,
+            sample_weight=sample_weight,
+        )
+    def _onedal_fit(self, X, y, sample_weight=None, queue=None):
+        X, _, sample_weight = self._onedal_fit_checks(X, y, sample_weight)
+        onedal_params = {
+            "C": self.C,
+            "epsilon": self.epsilon,
+            "kernel": self.kernel,
+            "degree": self.degree,
+            "gamma": self._compute_gamma_sigma(X),
+            "coef0": self.coef0,
+            "tol": self.tol,
+            "shrinking": self.shrinking,
+            "cache_size": self.cache_size,
+            "max_iter": self.max_iter,
+        }
+        self._onedal_estimator = onedal_SVR(**onedal_params)
+        self._onedal_estimator.fit(X, y, sample_weight, queue=queue)
+        self._save_attributes()
+    def _onedal_predict(self, X, queue=None):
+        if sklearn_check_version("1.0"):
+            X = validate_data(
+                self,
+                X,
+                dtype=[np.float64, np.float32],
+                force_all_finite=False,
+                accept_sparse="csr",
+                reset=False,
+            )
+        else:
+            X = check_array(
+                X,
+                dtype=[np.float64, np.float32],
+                force_all_finite=False,
+                accept_sparse="csr",
+            )
+        return self._onedal_estimator.predict(X, queue=queue)
+    fit.__doc__ = _sklearn_SVR.fit.__doc__
+    predict.__doc__ = _sklearn_SVR.predict.__doc__
+    score.__doc__ = _sklearn_SVR.score.__doc__

sklearnex/svm/tests/test_svm.py ADDED Viewed

@@ -0,0 +1,93 @@
+# ===============================================================================
+# Copyright 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose
+from onedal.tests.utils._dataframes_support import (
+    _as_numpy,
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+def test_sklearnex_import_svc(dataframe, queue):
+    if queue and queue.sycl_device.is_gpu:
+        pytest.skip("SVC fit for the GPU sycl_queue is buggy.")
+    from sklearnex.svm import SVC
+    X = np.array([[-2, -1], [-1, -1], [-1, -2], [+1, +1], [+1, +2], [+2, +1]])
+    y = np.array([1, 1, 1, 2, 2, 2])
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
+    svc = SVC(kernel="linear").fit(X, y)
+    assert "daal4py" in svc.__module__ or "sklearnex" in svc.__module__
+    assert_allclose(_as_numpy(svc.dual_coef_), [[-0.25, 0.25]])
+    assert_allclose(_as_numpy(svc.support_), [1, 3])
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+def test_sklearnex_import_nusvc(dataframe, queue):
+    if queue and queue.sycl_device.is_gpu:
+        pytest.skip("NuSVC fit for the GPU sycl_queue is buggy.")
+    from sklearnex.svm import NuSVC
+    X = np.array([[-2, -1], [-1, -1], [-1, -2], [+1, +1], [+1, +2], [+2, +1]])
+    y = np.array([1, 1, 1, 2, 2, 2])
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
+    svc = NuSVC(kernel="linear").fit(X, y)
+    assert "daal4py" in svc.__module__ or "sklearnex" in svc.__module__
+    assert_allclose(
+        _as_numpy(svc.dual_coef_), [[-0.04761905, -0.0952381, 0.0952381, 0.04761905]]
+    )
+    assert_allclose(_as_numpy(svc.support_), [0, 1, 3, 4])
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+def test_sklearnex_import_svr(dataframe, queue):
+    if queue and queue.sycl_device.is_gpu:
+        pytest.skip("SVR fit for the GPU sycl_queue is buggy.")
+    from sklearnex.svm import SVR
+    X = np.array([[-2, -1], [-1, -1], [-1, -2], [+1, +1], [+1, +2], [+2, +1]])
+    y = np.array([1, 1, 1, 2, 2, 2])
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
+    svc = SVR(kernel="linear").fit(X, y)
+    assert "daal4py" in svc.__module__ or "sklearnex" in svc.__module__
+    assert_allclose(_as_numpy(svc.dual_coef_), [[-0.1, 0.1]])
+    assert_allclose(_as_numpy(svc.support_), [1, 3])
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+def test_sklearnex_import_nusvr(dataframe, queue):
+    if queue and queue.sycl_device.is_gpu:
+        pytest.skip("NuSVR fit for the GPU sycl_queue is buggy.")
+    from sklearnex.svm import NuSVR
+    X = np.array([[-2, -1], [-1, -1], [-1, -2], [+1, +1], [+1, +2], [+2, +1]])
+    y = np.array([1, 1, 1, 2, 2, 2])
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
+    svc = NuSVR(kernel="linear", nu=0.9).fit(X, y)
+    assert "daal4py" in svc.__module__ or "sklearnex" in svc.__module__
+    assert_allclose(
+        _as_numpy(svc.dual_coef_), [[-1.0, 0.611111, 1.0, -0.611111]], rtol=1e-3
+    )
+    assert_allclose(_as_numpy(svc.support_), [1, 2, 3, 5])

sklearnex/tests/test_common.py ADDED Viewed

@@ -0,0 +1,390 @@
+# ==============================================================================
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import importlib.util
+import os
+import pathlib
+import pkgutil
+import re
+import sys
+import trace
+import pytest
+from sklearn.utils import all_estimators
+from daal4py.sklearn._utils import sklearn_check_version
+from onedal.tests.test_common import _check_primitive_usage_ban
+from sklearnex.tests.utils import (
+    PATCHED_MODELS,
+    SPECIAL_INSTANCES,
+    call_method,
+    gen_dataset,
+    gen_models_info,
+)
+TARGET_OFFLOAD_ALLOWED_LOCATIONS = [
+    "_config.py",
+    "_device_offload.py",
+    "test",
+    "svc.py",
+    "svm" + os.sep + "_common.py",
+]
+_DESIGN_RULE_VIOLATIONS = {
+    "PCA-fit_transform-call_validate_data": "calls both 'fit' and 'transform'",
+    "IncrementalEmpiricalCovariance-score-call_validate_data": "must call clone of itself",
+    "SVC(probability=True)-fit-call_validate_data": "SVC fit can use sklearn estimator",
+    "NuSVC(probability=True)-fit-call_validate_data": "NuSVC fit can use sklearn estimator",
+    "LogisticRegression-score-n_jobs_check": "uses daal4py for cpu in sklearnex",
+    "LogisticRegression-fit-n_jobs_check": "uses daal4py for cpu in sklearnex",
+    "LogisticRegression-predict-n_jobs_check": "uses daal4py for cpu in sklearnex",
+    "LogisticRegression-predict_log_proba-n_jobs_check": "uses daal4py for cpu in sklearnex",
+    "LogisticRegression-predict_proba-n_jobs_check": "uses daal4py for cpu in sklearnex",
+    "KNeighborsClassifier-kneighbors-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsClassifier-fit-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsClassifier-score-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsClassifier-predict-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsClassifier-predict_proba-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsClassifier-kneighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsRegressor-kneighbors-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsRegressor-fit-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsRegressor-score-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsRegressor-predict-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsRegressor-kneighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal",
+    "NearestNeighbors-kneighbors-n_jobs_check": "uses daal4py for cpu in onedal",
+    "NearestNeighbors-fit-n_jobs_check": "uses daal4py for cpu in onedal",
+    "NearestNeighbors-radius_neighbors-n_jobs_check": "uses daal4py for cpu in onedal",
+    "NearestNeighbors-kneighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal",
+    "NearestNeighbors-radius_neighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal",
+    "LocalOutlierFactor-fit-n_jobs_check": "uses daal4py for cpu in onedal",
+    "LocalOutlierFactor-kneighbors-n_jobs_check": "uses daal4py for cpu in onedal",
+    "LocalOutlierFactor-kneighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsClassifier(algorithm='brute')-kneighbors-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsClassifier(algorithm='brute')-fit-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsClassifier(algorithm='brute')-score-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsClassifier(algorithm='brute')-predict-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsClassifier(algorithm='brute')-predict_proba-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsClassifier(algorithm='brute')-kneighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsRegressor(algorithm='brute')-kneighbors-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsRegressor(algorithm='brute')-fit-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsRegressor(algorithm='brute')-score-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsRegressor(algorithm='brute')-predict-n_jobs_check": "uses daal4py for cpu in onedal",
+    "KNeighborsRegressor(algorithm='brute')-kneighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal",
+    "NearestNeighbors(algorithm='brute')-kneighbors-n_jobs_check": "uses daal4py for cpu in onedal",
+    "NearestNeighbors(algorithm='brute')-fit-n_jobs_check": "uses daal4py for cpu in onedal",
+    "NearestNeighbors(algorithm='brute')-radius_neighbors-n_jobs_check": "uses daal4py for cpu in onedal",
+    "NearestNeighbors(algorithm='brute')-kneighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal",
+    "NearestNeighbors(algorithm='brute')-radius_neighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal",
+    "LocalOutlierFactor(novelty=True)-fit-n_jobs_check": "uses daal4py for cpu in onedal",
+    "LocalOutlierFactor(novelty=True)-kneighbors-n_jobs_check": "uses daal4py for cpu in onedal",
+    "LocalOutlierFactor(novelty=True)-kneighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal",
+    "LogisticRegression(solver='newton-cg')-score-n_jobs_check": "uses daal4py for cpu in sklearnex",
+    "LogisticRegression(solver='newton-cg')-fit-n_jobs_check": "uses daal4py for cpu in sklearnex",
+    "LogisticRegression(solver='newton-cg')-predict-n_jobs_check": "uses daal4py for cpu in sklearnex",
+    "LogisticRegression(solver='newton-cg')-predict_log_proba-n_jobs_check": "uses daal4py for cpu in sklearnex",
+    "LogisticRegression(solver='newton-cg')-predict_proba-n_jobs_check": "uses daal4py for cpu in sklearnex",
+}
+def test_target_offload_ban():
+    """This test blocks the use of target_offload in
+    in sklearnex files. Offloading computation to devices
+    via target_offload should only occur externally, and not
+    within the architecture of the sklearnex classes. This
+    is for clarity, traceability and maintainability.
+    """
+    output = _check_primitive_usage_ban(
+        primitive_name="target_offload",
+        package="sklearnex",
+        allowed_locations=TARGET_OFFLOAD_ALLOWED_LOCATIONS,
+    )
+    output = "\n".join(output)
+    assert output == "", f"target offloading is occuring in: \n{output}"
+def _sklearnex_walk(func):
+    """this replaces checks on pkgutils to look through sklearnex
+    folders specifically"""
+    def wrap(*args, **kwargs):
+        if "prefix" in kwargs and kwargs["prefix"] == "sklearn.":
+            kwargs["prefix"] = "sklearnex."
+        if "path" in kwargs:
+            # force root to sklearnex
+            kwargs["path"] = [str(pathlib.Path(__file__).parent.parent)]
+        for pkginfo in func(*args, **kwargs):
+            # Do not allow spmd to be yielded
+            if "spmd" not in pkginfo.name.split("."):
+                yield pkginfo
+    return wrap
+def test_class_trailing_underscore_ban(monkeypatch):
+    """Trailing underscores are defined for sklearn to be signatures of a fitted
+    estimator instance, sklearnex extends this to the classes as well"""
+    monkeypatch.setattr(pkgutil, "walk_packages", _sklearnex_walk(pkgutil.walk_packages))
+    estimators = all_estimators()  # list of tuples
+    for name, obj in estimators:
+        if "preview" not in obj.__module__ and "daal4py" not in obj.__module__:
+            # propeties also occur in sklearn, especially in deprecations and are expected
+            # to error if queried and the estimator is not fitted
+            assert all(
+                [
+                    isinstance(getattr(obj, attr), property)
+                    or (attr.startswith("_") or not attr.endswith("_"))
+                    for attr in dir(obj)
+                ]
+            ), f"{name} contains class attributes which have a trailing underscore but no leading one"
+def test_all_estimators_covered(monkeypatch):
+    """Check that all estimators defined in sklearnex are available in either the
+    patch map or covered in special testing via SPECIAL_INSTANCES. The estimator
+    must inherit sklearn's BaseEstimator and must not have a leading underscore.
+    The sklearnex.spmd and sklearnex.preview packages are not tested.
+    """
+    monkeypatch.setattr(pkgutil, "walk_packages", _sklearnex_walk(pkgutil.walk_packages))
+    estimators = all_estimators()  # list of tuples
+    uncovered_estimators = []
+    for name, obj in estimators:
+        # do nothing if defined in preview
+        if "preview" not in obj.__module__ and not (
+            any([issubclass(est, obj) for est in PATCHED_MODELS.values()])
+            or any([issubclass(est.__class__, obj) for est in SPECIAL_INSTANCES.values()])
+        ):
+            uncovered_estimators += [".".join([obj.__module__, name])]
+    assert (
+        uncovered_estimators == []
+    ), f"{uncovered_estimators} are currently not included"
+def _fullpath(path):
+    return os.path.realpath(os.path.expanduser(path))
+_TRACE_ALLOW_DICT = {
+    i: _fullpath(os.path.dirname(importlib.util.find_spec(i).origin))
+    for i in ["sklearn", "sklearnex", "onedal", "daal4py"]
+}
+def _whitelist_to_blacklist():
+    """block all standard library, built-in or site packages which are not
+    related to sklearn, daal4py, onedal or sklearnex"""
+    def _commonpath(inp):
+        # ValueError generated by os.path.commonpath when it is on a separate drive
+        try:
+            return os.path.commonpath(inp)
+        except ValueError:
+            return ""
+    blacklist = []
+    for path in sys.path:
+        fpath = _fullpath(path)
+        try:
+            # if candidate path is a parent directory to any directory in the whitelist
+            if any(
+                [_commonpath([i, fpath]) == fpath for i in _TRACE_ALLOW_DICT.values()]
+            ):
+                # find all sub-paths which are not in the whitelist and block them
+                # they should not have a common path that is either the whitelist path
+                # or the sub-path (meaning one is a parent directory of the either)
+                for f in os.scandir(fpath):
+                    temppath = _fullpath(f.path)
+                    if all(
+                        [
+                            _commonpath([i, temppath]) not in [i, temppath]
+                            for i in _TRACE_ALLOW_DICT.values()
+                        ]
+                    ):
+                        blacklist += [temppath]
+            # add path to blacklist if not a sub path of anything in the whitelist
+            elif all([_commonpath([i, fpath]) != i for i in _TRACE_ALLOW_DICT.values()]):
+                blacklist += [fpath]
+        except FileNotFoundError:
+            blacklist += [fpath]
+    return blacklist
+_TRACE_BLOCK_LIST = _whitelist_to_blacklist()
+@pytest.fixture
+def estimator_trace(estimator, method, cache, capsys, monkeypatch):
+    """Generate a trace of all function calls in calling estimator.method with cache.
+    Parameters
+    ----------
+    estimator : str
+        name of estimator which is a key from PATCHED_MODELS or
+    method : str
+        name of estimator method which is to be traced and stored
+    cache: pytest.fixture (standard)
+    capsys: pytest.fixture (standard)
+    monkeypatch: pytest.fixture (standard)
+    Returns
+    -------
+    dict: [calledfuncs, tracetext, modules, callinglines]
+        Returns a list of important attributes of the trace.
+        calledfuncs is the list of called functions, tracetext is the
+        total text output of the trace as a string, modules are the
+        module locations  of the called functions (must be from daal4py,
+        onedal, sklearn, or sklearnex), and callinglines is the line
+        which calls the function in calledfuncs
+    """
+    key = "-".join((str(estimator), method))
+    flag = cache.get("key", "") != key
+    if flag:
+        # get estimator
+        try:
+            est = PATCHED_MODELS[estimator]()
+        except KeyError:
+            est = SPECIAL_INSTANCES[estimator]
+        # get dataset
+        X, y = gen_dataset(est)[0]
+        # fit dataset if method does not contain 'fit'
+        if "fit" not in method:
+            est.fit(X, y)
+        # initialize tracer to have a more verbose module naming
+        # this impacts ignoremods, but it is not used.
+        monkeypatch.setattr(trace, "_modname", _fullpath)
+        tracer = trace.Trace(
+            count=0,
+            trace=1,
+            ignoredirs=_TRACE_BLOCK_LIST,
+        )
+        # call trace on method with dataset
+        tracer.runfunc(call_method, est, method, X, y)
+        # collect trace for analysis
+        text = capsys.readouterr().out
+        for modulename, file in _TRACE_ALLOW_DICT.items():
+            text = text.replace(file, modulename)
+        regex_func = (
+            r"(?<=funcname: )\S*(?=\n)"  # needed due to differences in module structure
+        )
+        regex_mod = r"(?<=--- modulename: )\S*(?=\.py)"  # needed due to differences in module structure
+        regex_callingline = r"(?<=\n)\S.*(?=\n --- modulename: )"
+        cache.set("key", key)
+        cache.set(
+            "text",
+            {
+                "funcs": re.findall(regex_func, text),
+                "trace": text,
+                "modules": [i.replace(os.sep, ".") for i in re.findall(regex_mod, text)],
+                "callingline": [""] + re.findall(regex_callingline, text),
+            },
+        )
+    return cache.get("text", None)
+def call_validate_data(text, estimator, method):
+    """test that the sklearn function/attribute validate_data is
+    called once before offloading to oneDAL in sklearnex"""
+    try:
+        # get last to_table call showing end of oneDAL input portion of code
+        idx = len(text["funcs"]) - 1 - text["funcs"][::-1].index("to_table")
+        validfuncs = text["funcs"][:idx]
+    except ValueError:
+        pytest.skip("onedal backend not used in this function")
+    validate_data = "validate_data" if sklearn_check_version("1.6") else "_validate_data"
+    assert (
+        validfuncs.count(validate_data) == 1
+    ), f"sklearn's {validate_data} should be called"
+    assert (
+        validfuncs.count("_check_feature_names") == 1
+    ), "estimator should check feature names in validate_data"
+def n_jobs_check(text, estimator, method):
+    """verify the n_jobs is being set if '_get_backend' or 'to_table' is called"""
+    # remove the _get_backend function from sklearnex from considered _get_backend
+    count = max(
+        text["funcs"].count("to_table"),
+        len(
+            [
+                i
+                for i in range(len(text["funcs"]))
+                if text["funcs"][i] == "_get_backend"
+                and "sklearnex" not in text["modules"][i]
+            ]
+        ),
+    )
+    n_jobs_count = text["funcs"].count("n_jobs_wrapper")
+    assert bool(count) == bool(
+        n_jobs_count
+    ), f"verify if {method} should be in control_n_jobs' decorated_methods for {estimator}"
+def runtime_property_check(text, estimator, method):
+    """use of Python's 'property' should not be used at runtime, only at class instantiation"""
+    assert (
+        len(re.findall(r"property\(", text["trace"])) == 0
+    ), f"{estimator}.{method} should only use 'property' at instantiation"
+def fit_check_before_support_check(text, estimator, method):
+    if "fit" not in method:
+        if "dispatch" not in text["funcs"]:
+            pytest.skip(f"onedal dispatching not used in {estimator}.{method}")
+        idx = len(text["funcs"]) - 1 - text["funcs"][::-1].index("dispatch")
+        validfuncs = text["funcs"][:idx]
+        assert (
+            "check_is_fitted" in validfuncs
+        ), f"sklearn's check_is_fitted must be called before checking oneDAL support"
+    else:
+        pytest.skip(f"fitting occurs in {estimator}.{method}")
+DESIGN_RULES = [n_jobs_check, runtime_property_check, fit_check_before_support_check]
+if sklearn_check_version("1.0"):
+    DESIGN_RULES += [call_validate_data]
+@pytest.mark.parametrize("design_pattern", DESIGN_RULES)
+@pytest.mark.parametrize(
+    "estimator, method",
+    gen_models_info({**PATCHED_MODELS, **SPECIAL_INSTANCES}, fit=True, daal4py=False),
+)
+def test_estimator(estimator, method, design_pattern, estimator_trace):
+    # These tests only apply to sklearnex estimators
+    try:
+        design_pattern(estimator_trace, estimator, method)
+    except AssertionError:
+        key = "-".join([estimator, method, design_pattern.__name__])
+        if key in _DESIGN_RULE_VIOLATIONS:
+            pytest.xfail(_DESIGN_RULE_VIOLATIONS[key])
+        else:
+            raise