PyPI - scikit-learn-intelex - Versions diffs - 2025.0.0__py311-none-manylinux_2_28_x86_64.whl - Mend

scikit-learn-intelex 2025.0.0__py311-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (278) hide show

daal4py/__init__.py +73 -0
daal4py/__main__.py +58 -0
daal4py/_daal4py.cpython-311-x86_64-linux-gnu.so +0 -0
daal4py/doc/third-party-programs.txt +424 -0
daal4py/mb/__init__.py +19 -0
daal4py/mb/model_builders.py +377 -0
daal4py/mpi_transceiver.cpython-311-x86_64-linux-gnu.so +0 -0
daal4py/sklearn/__init__.py +40 -0
daal4py/sklearn/_n_jobs_support.py +242 -0
daal4py/sklearn/_utils.py +241 -0
daal4py/sklearn/cluster/__init__.py +20 -0
daal4py/sklearn/cluster/dbscan.py +165 -0
daal4py/sklearn/cluster/k_means.py +597 -0
daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
daal4py/sklearn/decomposition/__init__.py +19 -0
daal4py/sklearn/decomposition/_pca.py +524 -0
daal4py/sklearn/ensemble/AdaBoostClassifier.py +192 -0
daal4py/sklearn/ensemble/GBTDAAL.py +318 -0
daal4py/sklearn/ensemble/__init__.py +27 -0
daal4py/sklearn/ensemble/_forest.py +1397 -0
daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
daal4py/sklearn/linear_model/__init__.py +29 -0
daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
daal4py/sklearn/linear_model/_linear.py +272 -0
daal4py/sklearn/linear_model/_ridge.py +325 -0
daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
daal4py/sklearn/linear_model/linear.py +17 -0
daal4py/sklearn/linear_model/logistic_loss.py +195 -0
daal4py/sklearn/linear_model/logistic_path.py +1026 -0
daal4py/sklearn/linear_model/ridge.py +17 -0
daal4py/sklearn/linear_model/tests/test_linear.py +196 -0
daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
daal4py/sklearn/manifold/__init__.py +19 -0
daal4py/sklearn/manifold/_t_sne.py +405 -0
daal4py/sklearn/metrics/__init__.py +20 -0
daal4py/sklearn/metrics/_pairwise.py +155 -0
daal4py/sklearn/metrics/_ranking.py +210 -0
daal4py/sklearn/model_selection/__init__.py +19 -0
daal4py/sklearn/model_selection/_split.py +309 -0
daal4py/sklearn/model_selection/tests/test_split.py +56 -0
daal4py/sklearn/monkeypatch/__init__.py +0 -0
daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
daal4py/sklearn/monkeypatch/tests/test_patching.py +87 -0
daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +118 -0
daal4py/sklearn/neighbors/__init__.py +21 -0
daal4py/sklearn/neighbors/_base.py +503 -0
daal4py/sklearn/neighbors/_classification.py +139 -0
daal4py/sklearn/neighbors/_regression.py +74 -0
daal4py/sklearn/neighbors/_unsupervised.py +55 -0
daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
daal4py/sklearn/svm/__init__.py +19 -0
daal4py/sklearn/svm/svm.py +734 -0
daal4py/sklearn/utils/__init__.py +21 -0
daal4py/sklearn/utils/base.py +75 -0
daal4py/sklearn/utils/tests/test_utils.py +51 -0
daal4py/sklearn/utils/validation.py +693 -0
onedal/__init__.py +83 -0
onedal/_config.py +53 -0
onedal/_device_offload.py +229 -0
onedal/_onedal_py_dpc.cpython-311-x86_64-linux-gnu.so +0 -0
onedal/_onedal_py_host.cpython-311-x86_64-linux-gnu.so +0 -0
onedal/_onedal_py_spmd_dpc.cpython-311-x86_64-linux-gnu.so +0 -0
onedal/basic_statistics/__init__.py +20 -0
onedal/basic_statistics/basic_statistics.py +107 -0
onedal/basic_statistics/incremental_basic_statistics.py +160 -0
onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
onedal/cluster/__init__.py +27 -0
onedal/cluster/dbscan.py +110 -0
onedal/cluster/kmeans.py +560 -0
onedal/cluster/kmeans_init.py +115 -0
onedal/cluster/tests/test_dbscan.py +125 -0
onedal/cluster/tests/test_kmeans.py +88 -0
onedal/cluster/tests/test_kmeans_init.py +93 -0
onedal/common/_base.py +38 -0
onedal/common/_estimator_checks.py +47 -0
onedal/common/_mixin.py +62 -0
onedal/common/_policy.py +59 -0
onedal/common/_spmd_policy.py +30 -0
onedal/common/hyperparameters.py +116 -0
onedal/common/tests/test_policy.py +75 -0
onedal/covariance/__init__.py +20 -0
onedal/covariance/covariance.py +125 -0
onedal/covariance/incremental_covariance.py +146 -0
onedal/covariance/tests/test_covariance.py +50 -0
onedal/covariance/tests/test_incremental_covariance.py +122 -0
onedal/datatypes/__init__.py +19 -0
onedal/datatypes/_data_conversion.py +95 -0
onedal/datatypes/tests/test_data.py +235 -0
onedal/decomposition/__init__.py +20 -0
onedal/decomposition/incremental_pca.py +204 -0
onedal/decomposition/pca.py +186 -0
onedal/decomposition/tests/test_incremental_pca.py +198 -0
onedal/ensemble/__init__.py +29 -0
onedal/ensemble/forest.py +720 -0
onedal/ensemble/tests/test_random_forest.py +97 -0
onedal/linear_model/__init__.py +27 -0
onedal/linear_model/incremental_linear_model.py +258 -0
onedal/linear_model/linear_model.py +329 -0
onedal/linear_model/logistic_regression.py +249 -0
onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
onedal/linear_model/tests/test_linear_regression.py +149 -0
onedal/linear_model/tests/test_logistic_regression.py +95 -0
onedal/linear_model/tests/test_ridge.py +95 -0
onedal/neighbors/__init__.py +19 -0
onedal/neighbors/neighbors.py +778 -0
onedal/neighbors/tests/test_knn_classification.py +49 -0
onedal/primitives/__init__.py +27 -0
onedal/primitives/get_tree.py +25 -0
onedal/primitives/kernel_functions.py +153 -0
onedal/primitives/tests/test_kernel_functions.py +159 -0
onedal/spmd/__init__.py +25 -0
onedal/spmd/_base.py +30 -0
onedal/spmd/basic_statistics/__init__.py +20 -0
onedal/spmd/basic_statistics/basic_statistics.py +30 -0
onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
onedal/spmd/cluster/__init__.py +28 -0
onedal/spmd/cluster/dbscan.py +23 -0
onedal/spmd/cluster/kmeans.py +56 -0
onedal/spmd/covariance/__init__.py +20 -0
onedal/spmd/covariance/covariance.py +26 -0
onedal/spmd/covariance/incremental_covariance.py +82 -0
onedal/spmd/decomposition/__init__.py +20 -0
onedal/spmd/decomposition/incremental_pca.py +117 -0
onedal/spmd/decomposition/pca.py +26 -0
onedal/spmd/ensemble/__init__.py +19 -0
onedal/spmd/ensemble/forest.py +28 -0
onedal/spmd/linear_model/__init__.py +21 -0
onedal/spmd/linear_model/incremental_linear_model.py +97 -0
onedal/spmd/linear_model/linear_model.py +30 -0
onedal/spmd/linear_model/logistic_regression.py +38 -0
onedal/spmd/neighbors/__init__.py +19 -0
onedal/spmd/neighbors/neighbors.py +75 -0
onedal/svm/__init__.py +19 -0
onedal/svm/svm.py +556 -0
onedal/svm/tests/test_csr_svm.py +351 -0
onedal/svm/tests/test_nusvc.py +204 -0
onedal/svm/tests/test_nusvr.py +210 -0
onedal/svm/tests/test_svc.py +168 -0
onedal/svm/tests/test_svr.py +243 -0
onedal/tests/test_common.py +41 -0
onedal/tests/utils/_dataframes_support.py +168 -0
onedal/tests/utils/_device_selection.py +107 -0
onedal/utils/__init__.py +49 -0
onedal/utils/_array_api.py +91 -0
onedal/utils/validation.py +432 -0
scikit_learn_intelex-2025.0.0.dist-info/LICENSE.txt +202 -0
scikit_learn_intelex-2025.0.0.dist-info/METADATA +231 -0
scikit_learn_intelex-2025.0.0.dist-info/RECORD +278 -0
scikit_learn_intelex-2025.0.0.dist-info/WHEEL +5 -0
scikit_learn_intelex-2025.0.0.dist-info/top_level.txt +3 -0
sklearnex/__init__.py +65 -0
sklearnex/__main__.py +58 -0
sklearnex/_config.py +98 -0
sklearnex/_device_offload.py +121 -0
sklearnex/_utils.py +109 -0
sklearnex/basic_statistics/__init__.py +20 -0
sklearnex/basic_statistics/basic_statistics.py +140 -0
sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +384 -0
sklearnex/cluster/__init__.py +20 -0
sklearnex/cluster/dbscan.py +192 -0
sklearnex/cluster/k_means.py +383 -0
sklearnex/cluster/tests/test_dbscan.py +38 -0
sklearnex/cluster/tests/test_kmeans.py +153 -0
sklearnex/conftest.py +73 -0
sklearnex/covariance/__init__.py +19 -0
sklearnex/covariance/incremental_covariance.py +368 -0
sklearnex/covariance/tests/test_incremental_covariance.py +226 -0
sklearnex/decomposition/__init__.py +19 -0
sklearnex/decomposition/pca.py +414 -0
sklearnex/decomposition/tests/test_pca.py +58 -0
sklearnex/dispatcher.py +543 -0
sklearnex/doc/third-party-programs.txt +424 -0
sklearnex/ensemble/__init__.py +29 -0
sklearnex/ensemble/_forest.py +2016 -0
sklearnex/ensemble/tests/test_forest.py +120 -0
sklearnex/glob/__main__.py +72 -0
sklearnex/glob/dispatcher.py +101 -0
sklearnex/linear_model/__init__.py +32 -0
sklearnex/linear_model/coordinate_descent.py +30 -0
sklearnex/linear_model/incremental_linear.py +463 -0
sklearnex/linear_model/incremental_ridge.py +418 -0
sklearnex/linear_model/linear.py +302 -0
sklearnex/linear_model/logistic_path.py +17 -0
sklearnex/linear_model/logistic_regression.py +403 -0
sklearnex/linear_model/ridge.py +24 -0
sklearnex/linear_model/tests/test_incremental_linear.py +203 -0
sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
sklearnex/linear_model/tests/test_linear.py +142 -0
sklearnex/linear_model/tests/test_logreg.py +134 -0
sklearnex/manifold/__init__.py +19 -0
sklearnex/manifold/t_sne.py +21 -0
sklearnex/manifold/tests/test_tsne.py +26 -0
sklearnex/metrics/__init__.py +23 -0
sklearnex/metrics/pairwise.py +22 -0
sklearnex/metrics/ranking.py +20 -0
sklearnex/metrics/tests/test_metrics.py +39 -0
sklearnex/model_selection/__init__.py +21 -0
sklearnex/model_selection/split.py +22 -0
sklearnex/model_selection/tests/test_model_selection.py +34 -0
sklearnex/neighbors/__init__.py +27 -0
sklearnex/neighbors/_lof.py +231 -0
sklearnex/neighbors/common.py +310 -0
sklearnex/neighbors/knn_classification.py +226 -0
sklearnex/neighbors/knn_regression.py +203 -0
sklearnex/neighbors/knn_unsupervised.py +170 -0
sklearnex/neighbors/tests/test_neighbors.py +80 -0
sklearnex/preview/__init__.py +17 -0
sklearnex/preview/covariance/__init__.py +19 -0
sklearnex/preview/covariance/covariance.py +133 -0
sklearnex/preview/covariance/tests/test_covariance.py +66 -0
sklearnex/preview/decomposition/__init__.py +19 -0
sklearnex/preview/decomposition/incremental_pca.py +228 -0
sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
sklearnex/preview/linear_model/__init__.py +19 -0
sklearnex/preview/linear_model/ridge.py +419 -0
sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
sklearnex/spmd/__init__.py +25 -0
sklearnex/spmd/basic_statistics/__init__.py +20 -0
sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
sklearnex/spmd/cluster/__init__.py +30 -0
sklearnex/spmd/cluster/dbscan.py +50 -0
sklearnex/spmd/cluster/kmeans.py +21 -0
sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
sklearnex/spmd/covariance/__init__.py +20 -0
sklearnex/spmd/covariance/covariance.py +21 -0
sklearnex/spmd/covariance/incremental_covariance.py +37 -0
sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
sklearnex/spmd/decomposition/__init__.py +20 -0
sklearnex/spmd/decomposition/incremental_pca.py +30 -0
sklearnex/spmd/decomposition/pca.py +21 -0
sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
sklearnex/spmd/ensemble/__init__.py +19 -0
sklearnex/spmd/ensemble/forest.py +71 -0
sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
sklearnex/spmd/linear_model/__init__.py +21 -0
sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
sklearnex/spmd/linear_model/linear_model.py +21 -0
sklearnex/spmd/linear_model/logistic_regression.py +21 -0
sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +166 -0
sklearnex/spmd/neighbors/__init__.py +19 -0
sklearnex/spmd/neighbors/neighbors.py +25 -0
sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
sklearnex/svm/__init__.py +29 -0
sklearnex/svm/_common.py +328 -0
sklearnex/svm/nusvc.py +332 -0
sklearnex/svm/nusvr.py +148 -0
sklearnex/svm/svc.py +360 -0
sklearnex/svm/svr.py +149 -0
sklearnex/svm/tests/test_svm.py +93 -0
sklearnex/tests/_utils.py +328 -0
sklearnex/tests/_utils_spmd.py +198 -0
sklearnex/tests/test_common.py +54 -0
sklearnex/tests/test_config.py +43 -0
sklearnex/tests/test_memory_usage.py +291 -0
sklearnex/tests/test_monkeypatch.py +276 -0
sklearnex/tests/test_n_jobs_support.py +103 -0
sklearnex/tests/test_parallel.py +48 -0
sklearnex/tests/test_patching.py +385 -0
sklearnex/tests/test_run_to_run_stability.py +296 -0
sklearnex/utils/__init__.py +19 -0
sklearnex/utils/_array_api.py +82 -0
sklearnex/utils/parallel.py +59 -0
sklearnex/utils/tests/test_finite.py +89 -0
sklearnex/utils/validation.py +17 -0

sklearnex/tests/test_memory_usage.py ADDED Viewed

@@ -0,0 +1,291 @@
+# ==============================================================================
+# Copyright 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import gc
+import logging
+import os
+import tracemalloc
+import types
+import warnings
+from inspect import isclass
+import numpy as np
+import pandas as pd
+import pytest
+from scipy.stats import pearsonr
+from sklearn.base import BaseEstimator, clone
+from sklearn.datasets import make_classification
+from sklearn.model_selection import KFold
+from onedal import _is_dpc_backend
+from onedal.tests.utils._dataframes_support import (
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
+from onedal.tests.utils._device_selection import get_queues, is_dpctl_available
+from sklearnex import config_context
+from sklearnex.tests._utils import PATCHED_FUNCTIONS, PATCHED_MODELS, SPECIAL_INSTANCES
+from sklearnex.utils._array_api import get_namespace
+if _is_dpc_backend:
+    from onedal import _backend
+CPU_SKIP_LIST = (
+    "TSNE",  # too slow for using in testing on common data size
+    "config_context",  # does not malloc
+    "get_config",  # does not malloc
+    "set_config",  # does not malloc
+    "SVC(probability=True)",  # memory leak fortran numpy (investigate _fit_proba)
+    "NuSVC(probability=True)",  # memory leak fortran numpy (investigate _fit_proba)
+    "IncrementalEmpiricalCovariance",  # dataframe_f issues
+    "IncrementalLinearRegression",  # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
+    "IncrementalPCA",  # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
+    "IncrementalRidge",  # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
+    "LogisticRegression(solver='newton-cg')",  # memory leak fortran (1000, 100)
+)
+GPU_SKIP_LIST = (
+    "TSNE",  # too slow for using in testing on common data size
+    "RandomForestRegressor",  # too slow for using in testing on common data size
+    "KMeans",  # does not support GPU offloading
+    "config_context",  # does not malloc
+    "get_config",  # does not malloc
+    "set_config",  # does not malloc
+    "Ridge",  # does not support GPU offloading (fails silently)
+    "ElasticNet",  # does not support GPU offloading (fails silently)
+    "Lasso",  # does not support GPU offloading (fails silently)
+    "SVR",  # does not support GPU offloading (fails silently)
+    "NuSVR",  # does not support GPU offloading (fails silently)
+    "NuSVC",  # does not support GPU offloading (fails silently)
+    "LogisticRegression",  # default parameters not supported, see solver=newton-cg
+    "NuSVC(probability=True)",  # does not support GPU offloading (fails silently)
+    "IncrementalLinearRegression",  # issue with potrf with the specific dataset
+    "LinearRegression",  # issue with potrf with the specific dataset
+)
+def gen_functions(functions):
+    func_dict = functions.copy()
+    roc_auc_score = func_dict.pop("roc_auc_score")
+    func_dict["roc_auc_score"] = lambda x, y: roc_auc_score(y, y)
+    pairwise_distances = func_dict.pop("pairwise_distances")
+    func_dict["pairwise_distances(metric='cosine')"] = lambda x, y: pairwise_distances(
+        x, metric="cosine"
+    )
+    func_dict["pairwise_distances(metric='correlation')"] = (
+        lambda x, y: pairwise_distances(x, metric="correlation")
+    )
+    _assert_all_finite = func_dict.pop("_assert_all_finite")
+    func_dict["_assert_all_finite"] = lambda x, y: [
+        _assert_all_finite(x),
+        _assert_all_finite(y),
+    ]
+    return func_dict
+FUNCTIONS = gen_functions(PATCHED_FUNCTIONS)
+CPU_ESTIMATORS = {
+    k: v
+    for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES, **FUNCTIONS}.items()
+    if not k in CPU_SKIP_LIST
+}
+GPU_ESTIMATORS = {
+    k: v
+    for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES}.items()
+    if not k in GPU_SKIP_LIST
+}
+data_shapes = [
+    pytest.param((1000, 100), id="(1000, 100)"),
+    pytest.param((2000, 50), id="(2000, 50)"),
+]
+EXTRA_MEMORY_THRESHOLD = 0.15
+N_SPLITS = 10
+ORDER_DICT = {"F": np.asfortranarray, "C": np.ascontiguousarray}
+def gen_clsf_data(n_samples, n_features):
+    data, label = make_classification(
+        n_classes=2, n_samples=n_samples, n_features=n_features, random_state=777
+    )
+    return (
+        data,
+        label,
+        data.size * data.dtype.itemsize + label.size * label.dtype.itemsize,
+    )
+def get_traced_memory(queue=None):
+    if _is_dpc_backend and queue and queue.sycl_device.is_gpu:
+        return _backend.get_used_memory(queue)
+    else:
+        return tracemalloc.get_traced_memory()[0]
+def take(x, index, axis=0, queue=None):
+    xp, array_api = get_namespace(x)
+    if array_api:
+        return xp.take(x, xp.asarray(index, device=queue), axis=axis)
+    else:
+        return x.take(index, axis=axis)
+def split_train_inference(kf, x, y, estimator, queue=None):
+    mem_tracks = []
+    for train_index, test_index in kf.split(x):
+        x_train = take(x, train_index, queue=queue)
+        y_train = take(y, train_index, queue=queue)
+        x_test = take(x, test_index, queue=queue)
+        y_test = take(y, test_index, queue=queue)
+        if isclass(estimator) and issubclass(estimator, BaseEstimator):
+            alg = estimator()
+            flag = True
+        elif isinstance(estimator, BaseEstimator):
+            alg = clone(estimator)
+            flag = True
+        else:
+            flag = False
+        if flag:
+            alg.fit(x_train, y_train)
+            if hasattr(alg, "predict"):
+                alg.predict(x_test)
+            elif hasattr(alg, "transform"):
+                alg.transform(x_test)
+            elif hasattr(alg, "kneighbors"):
+                alg.kneighbors(x_test)
+            del alg
+        else:
+            estimator(x_train, y_train)
+        del x_train, x_test, y_train, y_test, flag
+        mem_tracks.append(get_traced_memory(queue))
+    return mem_tracks
+def _kfold_function_template(estimator, dataframe, data_shape, queue=None, func=None):
+    tracemalloc.start()
+    n_samples, n_features = data_shape
+    X, y, data_memory_size = gen_clsf_data(n_samples, n_features)
+    kf = KFold(n_splits=N_SPLITS)
+    if func:
+        X = func(X)
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
+    mem_before = get_traced_memory(queue)
+    mem_tracks = split_train_inference(kf, X, y, estimator, queue=queue)
+    mem_iter_diffs = np.array(mem_tracks[1:]) - np.array(mem_tracks[:-1])
+    mem_incr_mean, mem_incr_std = mem_iter_diffs.mean(), mem_iter_diffs.std()
+    mem_incr_mean, mem_incr_std = round(mem_incr_mean), round(mem_incr_std)
+    with warnings.catch_warnings():
+        # In the case that the memory usage is constant, this will raise
+        # a ConstantInputWarning error in pearsonr from scipy, this can
+        # be ignored.
+        warnings.filterwarnings(
+            "ignore",
+            message="An input array is constant; the correlation coefficient is not defined",
+        )
+        mem_iter_corr, _ = pearsonr(mem_tracks, list(range(len(mem_tracks))))
+    if mem_iter_corr > 0.95:
+        logging.warning(
+            "Memory usage is steadily increasing with iterations "
+            "(Pearson correlation coefficient between "
+            f"memory tracks and iterations is {mem_iter_corr})\n"
+            "Memory usage increase per iteration: "
+            f"{mem_incr_mean}±{mem_incr_std} bytes"
+        )
+    mem_before_gc = get_traced_memory(queue)
+    mem_diff = mem_before_gc - mem_before
+    if isinstance(estimator, BaseEstimator):
+        name = str(estimator)
+    else:
+        name = estimator.__name__
+    message = (
+        "Size of extra allocated memory {} using garbage collector "
+        f"is greater than {EXTRA_MEMORY_THRESHOLD * 100}% of input data"
+        f"\n\tAlgorithm: {name}"
+        f"\n\tInput data size: {data_memory_size} bytes"
+        "\n\tExtra allocated memory size: {} bytes"
+        " / {} %"
+    )
+    if mem_diff >= EXTRA_MEMORY_THRESHOLD * data_memory_size:
+        logging.warning(
+            message.format(
+                "before", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
+            )
+        )
+    gc.collect()
+    mem_after = get_traced_memory(queue)
+    tracemalloc.stop()
+    mem_diff = mem_after - mem_before
+    # GPU offloading with SYCL contains a program/kernel cache which should
+    # be controllable via a KernelProgramCache object in the SYCL context.
+    # The programs and kernels are stored on the GPU, but cannot be cleared
+    # as this class is not available for access in all oneDAL DPC++ runtimes.
+    # Therefore, until this is implemented this test must be skipped for gpu
+    # as it looks like a memory leak (at least there is no way to discern a
+    # leak on the first run).
+    if queue is None or queue.sycl_device.is_cpu:
+        assert mem_diff < EXTRA_MEMORY_THRESHOLD * data_memory_size, message.format(
+            "after", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
+        )
+@pytest.mark.parametrize("order", ["F", "C"])
+@pytest.mark.parametrize(
+    "dataframe,queue", get_dataframes_and_queues("numpy,pandas,dpctl", "cpu")
+)
+@pytest.mark.parametrize("estimator", CPU_ESTIMATORS.keys())
+@pytest.mark.parametrize("data_shape", data_shapes)
+def test_memory_leaks(estimator, dataframe, queue, order, data_shape):
+    func = ORDER_DICT[order]
+    if estimator == "_assert_all_finite" and queue is not None:
+        pytest.skip(f"{estimator} is not designed for device offloading")
+    _kfold_function_template(
+        CPU_ESTIMATORS[estimator], dataframe, data_shape, queue, func
+    )
+@pytest.mark.skipif(
+    os.getenv("ZES_ENABLE_SYSMAN") is None or not is_dpctl_available("gpu"),
+    reason="SYCL device memory leak check requires the level zero sysman",
+)
+@pytest.mark.parametrize("queue", get_queues("gpu"))
+@pytest.mark.parametrize("estimator", GPU_ESTIMATORS.keys())
+@pytest.mark.parametrize("order", ["F", "C"])
+@pytest.mark.parametrize("data_shape", data_shapes)
+def test_gpu_memory_leaks(estimator, queue, order, data_shape):
+    func = ORDER_DICT[order]
+    if "ExtraTrees" in estimator and data_shape == (2000, 50):
+        pytest.skip("Avoid a segmentation fault in Extra Trees algorithms")
+    with config_context(target_offload=queue):
+        _kfold_function_template(GPU_ESTIMATORS[estimator], None, data_shape, queue, func)

sklearnex/tests/test_monkeypatch.py ADDED Viewed

@@ -0,0 +1,276 @@
+# ===============================================================================
+# Copyright 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+import sklearnex
+from daal4py.sklearn._utils import daal_check_version
+# General use of patch_sklearn and unpatch_sklearn in pytest is not recommended.
+# It changes global state and can impact the operation of other tests. This file
+# specifically tests patch_sklearn and unpatch_sklearn and is exempt from this.
+# If sklearnex patching is necessary in testing, use the 'with_sklearnex' pytest
+# fixture.
+def test_monkey_patching():
+    _tokens = sklearnex.get_patch_names()
+    _values = sklearnex.get_patch_map().values()
+    _classes = list()
+    for v in _values:
+        for c in v:
+            _classes.append(c[0])
+    try:
+        sklearnex.patch_sklearn()
+        for i, _ in enumerate(_tokens):
+            t = _tokens[i]
+            p = _classes[i][0]
+            n = _classes[i][1]
+            class_module = getattr(p, n).__module__
+            assert class_module.startswith("daal4py") or class_module.startswith(
+                "sklearnex"
+            ), "Patching has completed with error."
+        for i, _ in enumerate(_tokens):
+            t = _tokens[i]
+            p = _classes[i][0]
+            n = _classes[i][1]
+            sklearnex.unpatch_sklearn(t)
+            sklearn_class = getattr(p, n, None)
+            if sklearn_class is not None:
+                sklearn_class = sklearn_class.__module__
+            assert sklearn_class is None or sklearn_class.startswith(
+                "sklearn"
+            ), "Unpatching has completed with error."
+    finally:
+        sklearnex.unpatch_sklearn()
+    try:
+        for i, _ in enumerate(_tokens):
+            t = _tokens[i]
+            p = _classes[i][0]
+            n = _classes[i][1]
+            sklearn_class = getattr(p, n, None)
+            if sklearn_class is not None:
+                sklearn_class = sklearn_class.__module__
+            assert sklearn_class is None or sklearn_class.startswith(
+                "sklearn"
+            ), "Unpatching has completed with error."
+    finally:
+        sklearnex.unpatch_sklearn()
+    try:
+        for i, _ in enumerate(_tokens):
+            t = _tokens[i]
+            p = _classes[i][0]
+            n = _classes[i][1]
+            sklearnex.patch_sklearn(t)
+            class_module = getattr(p, n).__module__
+            assert class_module.startswith("daal4py") or class_module.startswith(
+                "sklearnex"
+            ), "Patching has completed with error."
+    finally:
+        sklearnex.unpatch_sklearn()
+def test_patch_by_list_simple():
+    try:
+        sklearnex.patch_sklearn(["LogisticRegression"])
+        from sklearn.ensemble import RandomForestRegressor
+        from sklearn.linear_model import LogisticRegression
+        from sklearn.neighbors import KNeighborsRegressor
+        from sklearn.svm import SVC
+        assert RandomForestRegressor.__module__.startswith("sklearn")
+        assert KNeighborsRegressor.__module__.startswith("sklearn")
+        if daal_check_version((2024, "P", 1)):
+            assert LogisticRegression.__module__.startswith("sklearnex")
+        else:
+            assert LogisticRegression.__module__.startswith("daal4py")
+        assert SVC.__module__.startswith("sklearn")
+    finally:
+        sklearnex.unpatch_sklearn()
+def test_patch_by_list_many_estimators():
+    try:
+        sklearnex.patch_sklearn(["LogisticRegression", "SVC"])
+        from sklearn.ensemble import RandomForestRegressor
+        from sklearn.linear_model import LogisticRegression
+        from sklearn.neighbors import KNeighborsRegressor
+        from sklearn.svm import SVC
+        assert RandomForestRegressor.__module__.startswith("sklearn")
+        assert KNeighborsRegressor.__module__.startswith("sklearn")
+        if daal_check_version((2024, "P", 1)):
+            assert LogisticRegression.__module__.startswith("sklearnex")
+        else:
+            assert LogisticRegression.__module__.startswith("daal4py")
+        assert SVC.__module__.startswith("daal4py") or SVC.__module__.startswith(
+            "sklearnex"
+        )
+    finally:
+        sklearnex.unpatch_sklearn()
+def test_unpatch_by_list_many_estimators():
+    try:
+        sklearnex.patch_sklearn()
+        from sklearn.ensemble import RandomForestRegressor
+        from sklearn.linear_model import LogisticRegression
+        from sklearn.neighbors import KNeighborsRegressor
+        from sklearn.svm import SVC
+        assert RandomForestRegressor.__module__.startswith("sklearnex")
+        assert KNeighborsRegressor.__module__.startswith(
+            "daal4py"
+        ) or KNeighborsRegressor.__module__.startswith("sklearnex")
+        if daal_check_version((2024, "P", 1)):
+            assert LogisticRegression.__module__.startswith("sklearnex")
+        else:
+            assert LogisticRegression.__module__.startswith("daal4py")
+        assert SVC.__module__.startswith("daal4py") or SVC.__module__.startswith(
+            "sklearnex"
+        )
+        sklearnex.unpatch_sklearn(["KNeighborsRegressor", "RandomForestRegressor"])
+        from sklearn.ensemble import RandomForestRegressor
+        from sklearn.linear_model import LogisticRegression
+        from sklearn.neighbors import KNeighborsRegressor
+        from sklearn.svm import SVC
+        assert RandomForestRegressor.__module__.startswith("sklearn")
+        assert KNeighborsRegressor.__module__.startswith("sklearn")
+        if daal_check_version((2024, "P", 1)):
+            assert LogisticRegression.__module__.startswith("sklearnex")
+        else:
+            assert LogisticRegression.__module__.startswith("daal4py")
+        assert SVC.__module__.startswith("daal4py") or SVC.__module__.startswith(
+            "sklearnex"
+        )
+    finally:
+        sklearnex.unpatch_sklearn()
+def test_patching_checker():
+    for name in [None, "SVC", "PCA"]:
+        try:
+            sklearnex.patch_sklearn(name=name)
+            assert sklearnex.sklearn_is_patched(name=name)
+        finally:
+            sklearnex.unpatch_sklearn(name=name)
+            assert not sklearnex.sklearn_is_patched(name=name)
+    try:
+        sklearnex.patch_sklearn()
+        patching_status_map = sklearnex.sklearn_is_patched(return_map=True)
+        assert len(patching_status_map) == len(sklearnex.get_patch_names())
+        for status in patching_status_map.values():
+            assert status
+    finally:
+        sklearnex.unpatch_sklearn()
+    patching_status_map = sklearnex.sklearn_is_patched(return_map=True)
+    assert len(patching_status_map) == len(sklearnex.get_patch_names())
+    for status in patching_status_map.values():
+        assert not status
+def test_preview_namespace():
+    def get_estimators():
+        from sklearn.cluster import DBSCAN
+        from sklearn.decomposition import PCA
+        from sklearn.ensemble import RandomForestClassifier
+        from sklearn.linear_model import LinearRegression, Ridge
+        from sklearn.svm import SVC
+        return (
+            Ridge(),
+            LinearRegression(),
+            PCA(),
+            DBSCAN(),
+            SVC(),
+            RandomForestClassifier(),
+        )
+    from sklearnex.dispatcher import _is_preview_enabled
+    try:
+        sklearnex.patch_sklearn(preview=True)
+        assert _is_preview_enabled()
+        ridge, lr, pca, dbscan, svc, rfc = get_estimators()
+        assert "sklearnex" in rfc.__module__
+        if daal_check_version((2024, "P", 600)):
+            assert "sklearnex.preview" in ridge.__module__
+        if daal_check_version((2023, "P", 100)):
+            assert "sklearnex" in lr.__module__
+        else:
+            assert "daal4py" in lr.__module__
+        assert "sklearnex" in pca.__module__
+        assert "sklearnex" in dbscan.__module__
+        assert "sklearnex" in svc.__module__
+    finally:
+        sklearnex.unpatch_sklearn()
+    # no patching behavior
+    ridge, lr, pca, dbscan, svc, rfc = get_estimators()
+    assert "sklearn." in ridge.__module__ and "daal4py" not in ridge.__module__
+    assert "sklearn." in lr.__module__ and "daal4py" not in lr.__module__
+    assert "sklearn." in pca.__module__ and "daal4py" not in pca.__module__
+    assert "sklearn." in dbscan.__module__ and "daal4py" not in dbscan.__module__
+    assert "sklearn." in svc.__module__ and "daal4py" not in svc.__module__
+    assert "sklearn." in rfc.__module__ and "daal4py" not in rfc.__module__
+    # default patching behavior
+    try:
+        sklearnex.patch_sklearn()
+        assert not _is_preview_enabled()
+        ridge, lr, pca, dbscan, svc, rfc = get_estimators()
+        assert "daal4py" in ridge.__module__
+        if daal_check_version((2023, "P", 100)):
+            assert "sklearnex" in lr.__module__
+        else:
+            assert "daal4py" in lr.__module__
+        assert "sklearnex" in pca.__module__
+        assert "sklearnex" in rfc.__module__
+        assert "sklearnex" in dbscan.__module__
+        assert "sklearnex" in svc.__module__
+    finally:
+        sklearnex.unpatch_sklearn()

sklearnex/tests/test_n_jobs_support.py ADDED Viewed

@@ -0,0 +1,103 @@
+# ==============================================================================
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import inspect
+import logging
+from multiprocessing import cpu_count
+import pytest
+from sklearn.base import BaseEstimator
+from sklearn.datasets import make_classification
+from sklearnex.decomposition import PCA
+from sklearnex.dispatcher import get_patch_map
+from sklearnex.svm import SVC, NuSVC
+ESTIMATORS = set(
+    filter(
+        lambda x: inspect.isclass(x) and issubclass(x, BaseEstimator),
+        [value[0][0][2] for value in get_patch_map().values()],
+    )
+)
+X, Y = make_classification(n_samples=40, n_features=4, random_state=42)
+@pytest.mark.parametrize("estimator_class", ESTIMATORS)
+@pytest.mark.parametrize("n_jobs", [None, -1, 1, 2])
+def test_n_jobs_support(caplog, estimator_class, n_jobs):
+    def check_estimator_doc(estimator):
+        if estimator.__doc__ is not None:
+            assert "n_jobs" in estimator.__doc__
+    def check_n_jobs_entry_in_logs(caplog, function_name, n_jobs):
+        for rec in caplog.records:
+            if function_name in rec.message and "threads" in rec.message:
+                expected_n_jobs = n_jobs if n_jobs > 0 else cpu_count() + 1 + n_jobs
+                logging.info(f"{function_name}: setting {expected_n_jobs} threads")
+                if f"{function_name}: setting {expected_n_jobs} threads" in rec.message:
+                    return True
+        # False if n_jobs is set and not found in logs
+        return n_jobs is None
+    def check_method(*args, method, caplog):
+        method(*args)
+        assert check_n_jobs_entry_in_logs(caplog, method.__name__, n_jobs)
+    def check_methods_decoration(estimator):
+        funcs = {
+            i: getattr(estimator, i)
+            for i in dir(estimator)
+            if hasattr(estimator, i) and callable(getattr(estimator, i))
+        }
+        for func_name, func in funcs.items():
+            assert hasattr(func, "__onedal_n_jobs_decorated__") == (
+                func_name in estimator._n_jobs_supported_onedal_methods
+            ), f"{estimator}.{func_name} n_jobs decoration does not match {estimator} n_jobs supported methods"
+    caplog.set_level(logging.DEBUG, logger="sklearnex")
+    estimator_kwargs = {"n_jobs": n_jobs}
+    # by default, [Nu]SVC.predict_proba is restricted by @available_if decorator
+    if estimator_class in [SVC, NuSVC]:
+        estimator_kwargs["probability"] = True
+    # explicitly request oneDAL's PCA-Covariance algorithm
+    if estimator_class == PCA:
+        estimator_kwargs["svd_solver"] = "covariance_eigh"
+    estimator_instance = estimator_class(**estimator_kwargs)
+    # check `n_jobs` parameter doc entry
+    check_estimator_doc(estimator_class)
+    check_estimator_doc(estimator_instance)
+    # check `n_jobs` log entry for supported methods
+    # `fit` call is required before other methods
+    check_method(X, Y, method=estimator_instance.fit, caplog=caplog)
+    for method_name in estimator_instance._n_jobs_supported_onedal_methods:
+        if method_name == "fit":
+            continue
+        method = getattr(estimator_instance, method_name)
+        argdict = inspect.signature(method).parameters
+        argnum = len(
+            [i for i in argdict if argdict[i].default == inspect.Parameter.empty]
+        )
+        if argnum == 0:
+            check_method(method=method, caplog=caplog)
+        elif argnum == 1:
+            check_method(X, method=method, caplog=caplog)
+        else:
+            check_method(X, Y, method=method, caplog=caplog)
+    # check if correct methods were decorated
+    check_methods_decoration(estimator_class)
+    check_methods_decoration(estimator_instance)