PyPI - scikit-learn-intelex - Versions diffs - 2024.4.0__py311-none-win_amd64.whl → 2024.6.0__py311-none-win_amd64.whl - Mend

scikit-learn-intelex 2024.4.0__py311-none-win_amd64.whl → 2024.6.0__py311-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (113) hide show

{scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.6.0.data}/data/Lib/site-packages/sklearnex/svm/nusvr.py RENAMED Viewed

@@ -65,6 +65,17 @@ class NuSVR(sklearn_NuSVR, BaseSVR):
     def fit(self, X, y, sample_weight=None):
         if sklearn_check_version("1.2"):
             self._validate_params()
+        elif self.nu <= 0 or self.nu > 1:
+            # else if added to correct issues with
+            # sklearn tests:
+            # svm/tests/test_sparse.py::test_error
+            # svm/tests/test_svm.py::test_bad_input
+            # for sklearn versions < 1.2 (i.e. without
+            # validate_params parameter checking)
+            # Without this, a segmentation fault with
+            # Windows fatal exception: access violation
+            # occurs
+            raise ValueError("nu <= 0 or nu > 1")
         if sklearn_check_version("1.0"):
             self._check_feature_names(X, reset=True)
         dispatch(
@@ -76,7 +87,7 @@ class NuSVR(sklearn_NuSVR, BaseSVR):
             },
             X,
             y,
-            sample_weight,
+            sample_weight=sample_weight,
         )
         return self
@@ -94,13 +105,30 @@ class NuSVR(sklearn_NuSVR, BaseSVR):
             X,
         )
+    @wrap_output_data
+    def score(self, X, y, sample_weight=None):
+        if sklearn_check_version("1.0"):
+            self._check_feature_names(X, reset=False)
+        return dispatch(
+            self,
+            "score",
+            {
+                "onedal": self.__class__._onedal_score,
+                "sklearn": sklearn_NuSVR.score,
+            },
+            X,
+            y,
+            sample_weight=sample_weight,
+        )
     def _onedal_fit(self, X, y, sample_weight=None, queue=None):
+        X, _, sample_weight = self._onedal_fit_checks(X, y, sample_weight)
         onedal_params = {
             "C": self.C,
             "nu": self.nu,
             "kernel": self.kernel,
             "degree": self.degree,
-            "gamma": self.gamma,
+            "gamma": self._compute_gamma_sigma(X),
             "coef0": self.coef0,
             "tol": self.tol,
             "shrinking": self.shrinking,
@@ -117,3 +145,4 @@ class NuSVR(sklearn_NuSVR, BaseSVR):
     fit.__doc__ = sklearn_NuSVR.fit.__doc__
     predict.__doc__ = sklearn_NuSVR.predict.__doc__
+    score.__doc__ = sklearn_NuSVR.score.__doc__

{scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.6.0.data}/data/Lib/site-packages/sklearnex/svm/svc.py RENAMED Viewed

@@ -85,6 +85,17 @@ class SVC(sklearn_SVC, BaseSVC):
     def fit(self, X, y, sample_weight=None):
         if sklearn_check_version("1.2"):
             self._validate_params()
+        elif self.C <= 0:
+            # else if added to correct issues with
+            # sklearn tests:
+            # svm/tests/test_sparse.py::test_error
+            # svm/tests/test_svm.py::test_bad_input
+            # for sklearn versions < 1.2 (i.e. without
+            # validate_params parameter checking)
+            # Without this, a segmentation fault with
+            # Windows fatal exception: access violation
+            # occurs
+            raise ValueError("C <= 0")
         if sklearn_check_version("1.0"):
             self._check_feature_names(X, reset=True)
         dispatch(
@@ -96,8 +107,9 @@ class SVC(sklearn_SVC, BaseSVC):
             },
             X,
             y,
-            sample_weight,
+            sample_weight=sample_weight,
         )
         return self
     @wrap_output_data
@@ -270,12 +282,30 @@ class SVC(sklearn_SVC, BaseSVC):
             return patching_status
         raise RuntimeError(f"Unknown method {method_name} in {class_name}")
+    def _get_sample_weight(self, X, y, sample_weight=None):
+        sample_weight = super()._get_sample_weight(X, y, sample_weight)
+        if sample_weight is None:
+            return sample_weight
+        if np.any(sample_weight <= 0) and len(np.unique(y[sample_weight > 0])) != len(
+            self.classes_
+        ):
+            raise ValueError(
+                "Invalid input - all samples with positive weights "
+                "belong to the same class"
+                if sklearn_check_version("1.2")
+                else "Invalid input - all samples with positive weights "
+                "have the same label."
+            )
+        return sample_weight
     def _onedal_fit(self, X, y, sample_weight=None, queue=None):
+        X, _, weights = self._onedal_fit_checks(X, y, sample_weight)
         onedal_params = {
             "C": self.C,
             "kernel": self.kernel,
             "degree": self.degree,
-            "gamma": self.gamma,
+            "gamma": self._compute_gamma_sigma(X),
             "coef0": self.coef0,
             "tol": self.tol,
             "shrinking": self.shrinking,
@@ -287,10 +317,16 @@ class SVC(sklearn_SVC, BaseSVC):
         }
         self._onedal_estimator = onedal_SVC(**onedal_params)
-        self._onedal_estimator.fit(X, y, sample_weight, queue=queue)
+        self._onedal_estimator.fit(X, y, weights, queue=queue)
         if self.probability:
-            self._fit_proba(X, y, sample_weight, queue=queue)
+            self._fit_proba(
+                X,
+                y,
+                sample_weight=sample_weight,
+                queue=queue,
+            )
         self._save_attributes()
     def _onedal_predict(self, X, queue=None):

{scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.6.0.data}/data/Lib/site-packages/sklearnex/svm/svr.py RENAMED Viewed

@@ -65,6 +65,17 @@ class SVR(sklearn_SVR, BaseSVR):
     def fit(self, X, y, sample_weight=None):
         if sklearn_check_version("1.2"):
             self._validate_params()
+        elif self.C <= 0:
+            # else if added to correct issues with
+            # sklearn tests:
+            # svm/tests/test_sparse.py::test_error
+            # svm/tests/test_svm.py::test_bad_input
+            # for sklearn versions < 1.2 (i.e. without
+            # validate_params parameter checking)
+            # Without this, a segmentation fault with
+            # Windows fatal exception: access violation
+            # occurs
+            raise ValueError("C <= 0")
         if sklearn_check_version("1.0"):
             self._check_feature_names(X, reset=True)
         dispatch(
@@ -76,7 +87,7 @@ class SVR(sklearn_SVR, BaseSVR):
             },
             X,
             y,
-            sample_weight,
+            sample_weight=sample_weight,
         )
         return self
@@ -95,13 +106,30 @@ class SVR(sklearn_SVR, BaseSVR):
             X,
         )
+    @wrap_output_data
+    def score(self, X, y, sample_weight=None):
+        if sklearn_check_version("1.0"):
+            self._check_feature_names(X, reset=False)
+        return dispatch(
+            self,
+            "score",
+            {
+                "onedal": self.__class__._onedal_score,
+                "sklearn": sklearn_SVR.score,
+            },
+            X,
+            y,
+            sample_weight=sample_weight,
+        )
     def _onedal_fit(self, X, y, sample_weight=None, queue=None):
+        X, _, sample_weight = self._onedal_fit_checks(X, y, sample_weight)
         onedal_params = {
             "C": self.C,
             "epsilon": self.epsilon,
             "kernel": self.kernel,
             "degree": self.degree,
-            "gamma": self.gamma,
+            "gamma": self._compute_gamma_sigma(X),
             "coef0": self.coef0,
             "tol": self.tol,
             "shrinking": self.shrinking,
@@ -118,3 +146,4 @@ class SVR(sklearn_SVR, BaseSVR):
     fit.__doc__ = sklearn_SVR.fit.__doc__
     predict.__doc__ = sklearn_SVR.predict.__doc__
+    score.__doc__ = sklearn_SVR.score.__doc__

{scikit_learn_intelex-2024.4.0.data → scikit_learn_intelex-2024.6.0.data}/data/Lib/site-packages/sklearnex/tests/_utils.py RENAMED Viewed

@@ -14,9 +14,12 @@
 # limitations under the License.
 # ==============================================================================
+from functools import partial
 from inspect import isclass
 import numpy as np
+from scipy import sparse as sp
+from sklearn import clone
 from sklearn.base import (
     BaseEstimator,
     ClassifierMixin,
@@ -87,18 +90,26 @@ mixin_map = [
 ]
-SPECIAL_INSTANCES = {
-    str(i): i
-    for i in [
-        LocalOutlierFactor(novelty=True),
-        SVC(probability=True),
-        NuSVC(probability=True),
-        KNeighborsClassifier(algorithm="brute"),
-        KNeighborsRegressor(algorithm="brute"),
-        NearestNeighbors(algorithm="brute"),
-        LogisticRegression(solver="newton-cg"),
-    ]
-}
+class _sklearn_clone_dict(dict):
+    def __getitem__(self, key):
+        return clone(super().__getitem__(key))
+SPECIAL_INSTANCES = _sklearn_clone_dict(
+    {
+        str(i): i
+        for i in [
+            LocalOutlierFactor(novelty=True),
+            SVC(probability=True),
+            NuSVC(probability=True),
+            KNeighborsClassifier(algorithm="brute"),
+            KNeighborsRegressor(algorithm="brute"),
+            NearestNeighbors(algorithm="brute"),
+            LogisticRegression(solver="newton-cg"),
+        ]
+    }
+)
 def gen_models_info(algorithms):
@@ -107,8 +118,8 @@ def gen_models_info(algorithms):
         if i in PATCHED_MODELS:
             est = PATCHED_MODELS[i]
-        elif i in SPECIAL_INSTANCES:
-            est = SPECIAL_INSTANCES[i].__class__
+        elif isinstance(algorithms[i], BaseEstimator):
+            est = algorithms[i].__class__
         else:
             raise KeyError(f"Unrecognized sklearnex estimator: {i}")
@@ -129,24 +140,54 @@ def gen_models_info(algorithms):
     return output
-def gen_dataset(estimator, queue=None, target_df=None, dtype=np.float64):
-    dataset = None
-    name = estimator.__class__.__name__
-    est = PATCHED_MODELS[name]
+def gen_dataset_type(est):
+    # est should be an estimator or estimator class
+    # dataset initialized to classification, but will be swapped
+    # for other types as necessary
+    dataset = "classification"
+    estimator = est.__class__ if isinstance(est, BaseEstimator) else est
     for mixin, _, data in mixin_map:
-        if issubclass(est, mixin) and data is not None:
+        if issubclass(estimator, mixin) and data is not None:
             dataset = data
+    return dataset
+_dataset_dict = {
+    "classification": [partial(load_iris, return_X_y=True)],
+    "regression": [partial(load_diabetes, return_X_y=True)],
+}
+def gen_dataset(
+    est,
+    datasets=_dataset_dict,
+    sparse=False,
+    queue=None,
+    target_df=None,
+    dtype=None,
+):
+    dataset_type = gen_dataset_type(est)
+    output = []
     # load data
-    if dataset == "classification" or dataset is None:
-        X, y = load_iris(return_X_y=True)
-    elif dataset == "regression":
-        X, y = load_diabetes(return_X_y=True)
-    else:
-        raise ValueError("Unknown dataset type")
-    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=target_df, dtype=dtype)
-    y = _convert_to_dataframe(y, sycl_queue=queue, target_df=target_df, dtype=dtype)
-    return X, y
+    flag = dtype is None
+    for func in datasets[dataset_type]:
+        X, y = func()
+        if flag:
+            dtype = X.dtype if hasattr(X, "dtype") else np.float64
+        if sparse:
+            X = sp.csr_matrix(X)
+        else:
+            X = _convert_to_dataframe(
+                X, sycl_queue=queue, target_df=target_df, dtype=dtype
+            )
+            y = _convert_to_dataframe(
+                y, sycl_queue=queue, target_df=target_df, dtype=dtype
+            )
+        output += [[X, y]]
+    return output
 DTYPES = [

scikit_learn_intelex-2024.6.0.data/data/Lib/site-packages/sklearnex/tests/test_common.py ADDED Viewed

@@ -0,0 +1,54 @@
+# ==============================================================================
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import os
+from glob import glob
+import pytest
+ALLOWED_LOCATIONS = [
+    "_config.py",
+    "_device_offload.py",
+    "test",
+    "svc.py",
+    "svm" + os.sep + "_common.py",
+]
+def test_target_offload_ban():
+    """This test blocks the use of target_offload in
+    in sklearnex files. Offloading computation to devices
+    via target_offload should only occur externally, and not
+    within the architecture of the sklearnex classes. This
+    is for clarity, traceability and maintainability.
+    """
+    from sklearnex import __file__ as loc
+    path = loc.replace("__init__.py", "")
+    files = [y for x in os.walk(path) for y in glob(os.path.join(x[0], "*.py"))]
+    output = []
+    for f in files:
+        if open(f, "r").read().find("target_offload") != -1:
+            output += [f.replace(path, "sklearnex" + os.sep)]
+    # remove this file from the list
+    for allowed in ALLOWED_LOCATIONS:
+        output = [i for i in output if allowed not in i]
+    output = "\n".join(output)
+    assert output == "", f"sklearn versioning is occuring in: \n{output}"

scikit_learn_intelex-2024.6.0.data/data/Lib/site-packages/sklearnex/tests/test_memory_usage.py ADDED Viewed

@@ -0,0 +1,290 @@
+# ==============================================================================
+# Copyright 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import gc
+import logging
+import os
+import tracemalloc
+import types
+import warnings
+from inspect import isclass
+import numpy as np
+import pandas as pd
+import pytest
+from scipy.stats import pearsonr
+from sklearn.base import BaseEstimator, clone
+from sklearn.datasets import make_classification
+from sklearn.model_selection import KFold
+from onedal import _is_dpc_backend
+from onedal.tests.utils._dataframes_support import (
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
+from onedal.tests.utils._device_selection import get_queues, is_dpctl_available
+from sklearnex import config_context
+from sklearnex.tests._utils import PATCHED_FUNCTIONS, PATCHED_MODELS, SPECIAL_INSTANCES
+from sklearnex.utils import get_namespace
+if _is_dpc_backend:
+    from onedal import _backend
+CPU_SKIP_LIST = (
+    "TSNE",  # too slow for using in testing on common data size
+    "config_context",  # does not malloc
+    "get_config",  # does not malloc
+    "set_config",  # does not malloc
+    "SVC(probability=True)",  # memory leak fortran numpy (investigate _fit_proba)
+    "NuSVC(probability=True)",  # memory leak fortran numpy (investigate _fit_proba)
+    "IncrementalEmpiricalCovariance",  # dataframe_f issues
+    "IncrementalLinearRegression",  # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
+    "IncrementalPCA",  # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
+    "LogisticRegression(solver='newton-cg')",  # memory leak fortran (1000, 100)
+)
+GPU_SKIP_LIST = (
+    "TSNE",  # too slow for using in testing on common data size
+    "RandomForestRegressor",  # too slow for using in testing on common data size
+    "KMeans",  # does not support GPU offloading
+    "config_context",  # does not malloc
+    "get_config",  # does not malloc
+    "set_config",  # does not malloc
+    "Ridge",  # does not support GPU offloading (fails silently)
+    "ElasticNet",  # does not support GPU offloading (fails silently)
+    "Lasso",  # does not support GPU offloading (fails silently)
+    "SVR",  # does not support GPU offloading (fails silently)
+    "NuSVR",  # does not support GPU offloading (fails silently)
+    "NuSVC",  # does not support GPU offloading (fails silently)
+    "LogisticRegression",  # default parameters not supported, see solver=newton-cg
+    "NuSVC(probability=True)",  # does not support GPU offloading (fails silently)
+    "IncrementalLinearRegression",  # issue with potrf with the specific dataset
+    "LinearRegression",  # issue with potrf with the specific dataset
+)
+def gen_functions(functions):
+    func_dict = functions.copy()
+    roc_auc_score = func_dict.pop("roc_auc_score")
+    func_dict["roc_auc_score"] = lambda x, y: roc_auc_score(y, y)
+    pairwise_distances = func_dict.pop("pairwise_distances")
+    func_dict["pairwise_distances(metric='cosine')"] = lambda x, y: pairwise_distances(
+        x, metric="cosine"
+    )
+    func_dict["pairwise_distances(metric='correlation')"] = (
+        lambda x, y: pairwise_distances(x, metric="correlation")
+    )
+    _assert_all_finite = func_dict.pop("_assert_all_finite")
+    func_dict["_assert_all_finite"] = lambda x, y: [
+        _assert_all_finite(x),
+        _assert_all_finite(y),
+    ]
+    return func_dict
+FUNCTIONS = gen_functions(PATCHED_FUNCTIONS)
+CPU_ESTIMATORS = {
+    k: v
+    for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES, **FUNCTIONS}.items()
+    if not k in CPU_SKIP_LIST
+}
+GPU_ESTIMATORS = {
+    k: v
+    for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES}.items()
+    if not k in GPU_SKIP_LIST
+}
+data_shapes = [
+    pytest.param((1000, 100), id="(1000, 100)"),
+    pytest.param((2000, 50), id="(2000, 50)"),
+]
+EXTRA_MEMORY_THRESHOLD = 0.15
+N_SPLITS = 10
+ORDER_DICT = {"F": np.asfortranarray, "C": np.ascontiguousarray}
+def gen_clsf_data(n_samples, n_features):
+    data, label = make_classification(
+        n_classes=2, n_samples=n_samples, n_features=n_features, random_state=777
+    )
+    return (
+        data,
+        label,
+        data.size * data.dtype.itemsize + label.size * label.dtype.itemsize,
+    )
+def get_traced_memory(queue=None):
+    if _is_dpc_backend and queue and queue.sycl_device.is_gpu:
+        return _backend.get_used_memory(queue)
+    else:
+        return tracemalloc.get_traced_memory()[0]
+def take(x, index, axis=0, queue=None):
+    xp, array_api = get_namespace(x)
+    if array_api:
+        return xp.take(x, xp.asarray(index, device=queue), axis=axis)
+    else:
+        return x.take(index, axis=axis)
+def split_train_inference(kf, x, y, estimator, queue=None):
+    mem_tracks = []
+    for train_index, test_index in kf.split(x):
+        x_train = take(x, train_index, queue=queue)
+        y_train = take(y, train_index, queue=queue)
+        x_test = take(x, test_index, queue=queue)
+        y_test = take(y, test_index, queue=queue)
+        if isclass(estimator) and issubclass(estimator, BaseEstimator):
+            alg = estimator()
+            flag = True
+        elif isinstance(estimator, BaseEstimator):
+            alg = clone(estimator)
+            flag = True
+        else:
+            flag = False
+        if flag:
+            alg.fit(x_train, y_train)
+            if hasattr(alg, "predict"):
+                alg.predict(x_test)
+            elif hasattr(alg, "transform"):
+                alg.transform(x_test)
+            elif hasattr(alg, "kneighbors"):
+                alg.kneighbors(x_test)
+            del alg
+        else:
+            estimator(x_train, y_train)
+        del x_train, x_test, y_train, y_test, flag
+        mem_tracks.append(get_traced_memory(queue))
+    return mem_tracks
+def _kfold_function_template(estimator, dataframe, data_shape, queue=None, func=None):
+    tracemalloc.start()
+    n_samples, n_features = data_shape
+    X, y, data_memory_size = gen_clsf_data(n_samples, n_features)
+    kf = KFold(n_splits=N_SPLITS)
+    if func:
+        X = func(X)
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
+    mem_before = get_traced_memory(queue)
+    mem_tracks = split_train_inference(kf, X, y, estimator, queue=queue)
+    mem_iter_diffs = np.array(mem_tracks[1:]) - np.array(mem_tracks[:-1])
+    mem_incr_mean, mem_incr_std = mem_iter_diffs.mean(), mem_iter_diffs.std()
+    mem_incr_mean, mem_incr_std = round(mem_incr_mean), round(mem_incr_std)
+    with warnings.catch_warnings():
+        # In the case that the memory usage is constant, this will raise
+        # a ConstantInputWarning error in pearsonr from scipy, this can
+        # be ignored.
+        warnings.filterwarnings(
+            "ignore",
+            message="An input array is constant; the correlation coefficient is not defined",
+        )
+        mem_iter_corr, _ = pearsonr(mem_tracks, list(range(len(mem_tracks))))
+    if mem_iter_corr > 0.95:
+        logging.warning(
+            "Memory usage is steadily increasing with iterations "
+            "(Pearson correlation coefficient between "
+            f"memory tracks and iterations is {mem_iter_corr})\n"
+            "Memory usage increase per iteration: "
+            f"{mem_incr_mean}±{mem_incr_std} bytes"
+        )
+    mem_before_gc = get_traced_memory(queue)
+    mem_diff = mem_before_gc - mem_before
+    if isinstance(estimator, BaseEstimator):
+        name = str(estimator)
+    else:
+        name = estimator.__name__
+    message = (
+        "Size of extra allocated memory {} using garbage collector "
+        f"is greater than {EXTRA_MEMORY_THRESHOLD * 100}% of input data"
+        f"\n\tAlgorithm: {name}"
+        f"\n\tInput data size: {data_memory_size} bytes"
+        "\n\tExtra allocated memory size: {} bytes"
+        " / {} %"
+    )
+    if mem_diff >= EXTRA_MEMORY_THRESHOLD * data_memory_size:
+        logging.warning(
+            message.format(
+                "before", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
+            )
+        )
+    gc.collect()
+    mem_after = get_traced_memory(queue)
+    tracemalloc.stop()
+    mem_diff = mem_after - mem_before
+    # GPU offloading with SYCL contains a program/kernel cache which should
+    # be controllable via a KernelProgramCache object in the SYCL context.
+    # The programs and kernels are stored on the GPU, but cannot be cleared
+    # as this class is not available for access in all oneDAL DPC++ runtimes.
+    # Therefore, until this is implemented this test must be skipped for gpu
+    # as it looks like a memory leak (at least there is no way to discern a
+    # leak on the first run).
+    if queue is None or queue.sycl_device.is_cpu:
+        assert mem_diff < EXTRA_MEMORY_THRESHOLD * data_memory_size, message.format(
+            "after", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
+        )
+@pytest.mark.parametrize("order", ["F", "C"])
+@pytest.mark.parametrize(
+    "dataframe,queue", get_dataframes_and_queues("numpy,pandas,dpctl", "cpu")
+)
+@pytest.mark.parametrize("estimator", CPU_ESTIMATORS.keys())
+@pytest.mark.parametrize("data_shape", data_shapes)
+def test_memory_leaks(estimator, dataframe, queue, order, data_shape):
+    func = ORDER_DICT[order]
+    if estimator == "_assert_all_finite" and queue is not None:
+        pytest.skip(f"{estimator} is not designed for device offloading")
+    _kfold_function_template(
+        CPU_ESTIMATORS[estimator], dataframe, data_shape, queue, func
+    )
+@pytest.mark.skipif(
+    os.getenv("ZES_ENABLE_SYSMAN") is None or not is_dpctl_available("gpu"),
+    reason="SYCL device memory leak check requires the level zero sysman",
+)
+@pytest.mark.parametrize("queue", get_queues("gpu"))
+@pytest.mark.parametrize("estimator", GPU_ESTIMATORS.keys())
+@pytest.mark.parametrize("order", ["F", "C"])
+@pytest.mark.parametrize("data_shape", data_shapes)
+def test_gpu_memory_leaks(estimator, queue, order, data_shape):
+    func = ORDER_DICT[order]
+    if "ExtraTrees" in estimator and data_shape == (2000, 50):
+        pytest.skip("Avoid a segmentation fault in Extra Trees algorithms")
+    with config_context(target_offload=queue):
+        _kfold_function_template(GPU_ESTIMATORS[estimator], None, data_shape, queue, func)