PyPI - scikit-learn-intelex - Versions diffs - 2024.2.0__py310-none-manylinux1_x86_64.whl → 2024.4.0__py310-none-manylinux1_x86_64.whl - Mend

scikit-learn-intelex 2024.2.0__py310-none-manylinux1_x86_64.whl → 2024.4.0__py310-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (50) hide show

{scikit_learn_intelex-2024.2.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/METADATA +2 -2
{scikit_learn_intelex-2024.2.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/RECORD +45 -45
sklearnex/__init__.py +9 -7
sklearnex/_device_offload.py +31 -4
sklearnex/basic_statistics/__init__.py +2 -1
sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +386 -0
sklearnex/cluster/dbscan.py +3 -1
sklearnex/conftest.py +63 -0
sklearnex/decomposition/pca.py +319 -1
sklearnex/decomposition/tests/test_pca.py +34 -5
sklearnex/dispatcher.py +74 -43
sklearnex/ensemble/_forest.py +78 -89
sklearnex/ensemble/tests/test_forest.py +15 -19
sklearnex/linear_model/linear.py +275 -340
sklearnex/linear_model/logistic_regression.py +63 -11
sklearnex/linear_model/tests/test_linear.py +40 -5
sklearnex/linear_model/tests/test_logreg.py +0 -2
sklearnex/neighbors/_lof.py +74 -20
sklearnex/neighbors/common.py +4 -1
sklearnex/neighbors/knn_classification.py +44 -131
sklearnex/neighbors/knn_regression.py +16 -126
sklearnex/neighbors/knn_unsupervised.py +11 -86
sklearnex/neighbors/tests/test_neighbors.py +0 -5
sklearnex/preview/__init__.py +1 -1
sklearnex/preview/cluster/k_means.py +5 -73
sklearnex/preview/covariance/covariance.py +6 -5
sklearnex/preview/covariance/tests/test_covariance.py +18 -5
sklearnex/spmd/ensemble/forest.py +4 -12
sklearnex/svm/_common.py +4 -7
sklearnex/svm/nusvc.py +70 -50
sklearnex/svm/nusvr.py +6 -52
sklearnex/svm/svc.py +70 -51
sklearnex/svm/svr.py +3 -49
sklearnex/tests/_utils.py +164 -0
sklearnex/tests/test_memory_usage.py +8 -3
sklearnex/tests/test_monkeypatch.py +177 -149
sklearnex/tests/test_n_jobs_support.py +8 -2
sklearnex/tests/test_parallel.py +6 -8
sklearnex/tests/test_patching.py +322 -87
sklearnex/utils/__init__.py +2 -1
sklearnex/utils/_namespace.py +97 -0
sklearnex/preview/decomposition/__init__.py +0 -19
sklearnex/preview/decomposition/pca.py +0 -374
sklearnex/preview/decomposition/tests/test_preview_pca.py +0 -42
sklearnex/tests/_models_info.py +0 -170
sklearnex/tests/utils/_launch_algorithms.py +0 -118
{scikit_learn_intelex-2024.2.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/LICENSE.txt +0 -0
{scikit_learn_intelex-2024.2.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/WHEEL +0 -0
{scikit_learn_intelex-2024.2.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/top_level.txt +0 -0

sklearnex/tests/test_patching.py CHANGED Viewed

@@ -14,107 +14,306 @@
 # limitations under the License.
 # ==============================================================================
+import importlib
 import inspect
+import logging
 import os
-import pathlib
 import re
-import subprocess
 import sys
-from inspect import isclass
+from inspect import signature
+import numpy as np
+import numpy.random as nprnd
 import pytest
-from _models_info import TO_SKIP
 from sklearn.base import BaseEstimator
-from sklearnex import get_patch_map, is_patched_instance, patch_sklearn, unpatch_sklearn
-def get_branch(s):
-    if len(s) == 0:
-        return "NO INFO"
-    for i in s:
-        if "failed to run accelerated version, fallback to original Scikit-learn" in i:
-            return "was in OPT, but go in Scikit"
-    for i in s:
-        if "running accelerated version" in i:
-            return "OPT"
-    return "Scikit"
-def run_parse(mas, result):
-    name, dtype = mas[0].split()
-    temp = []
-    INFO_POS = 16
-    for i in range(1, len(mas)):
-        mas[i] = mas[i][INFO_POS:]  # remove 'SKLEARNEX INFO: '
-        if not mas[i].startswith("sklearn"):
-            ind = name + " " + dtype + " " + mas[i]
-            result[ind] = get_branch(temp)
-            temp.clear()
-        else:
-            temp.append(mas[i])
-def get_result_log():
-    os.environ["SKLEARNEX_VERBOSE"] = "INFO"
-    absolute_path = str(pathlib.Path(__file__).parent.absolute())
-    try:
-        process = subprocess.check_output(
-            [sys.executable, absolute_path + "/utils/_launch_algorithms.py"]
+from daal4py.sklearn._utils import sklearn_check_version
+from onedal.tests.utils._dataframes_support import (
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
+from sklearnex import is_patched_instance
+from sklearnex.dispatcher import _is_preview_enabled
+from sklearnex.metrics import pairwise_distances, roc_auc_score
+from sklearnex.tests._utils import (
+    DTYPES,
+    PATCHED_FUNCTIONS,
+    PATCHED_MODELS,
+    SPECIAL_INSTANCES,
+    UNPATCHED_FUNCTIONS,
+    UNPATCHED_MODELS,
+    gen_dataset,
+    gen_models_info,
+)
+@pytest.mark.parametrize("dtype", DTYPES)
+@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("metric", ["cosine", "correlation"])
+def test_pairwise_distances_patching(caplog, dataframe, queue, dtype, metric):
+    with caplog.at_level(logging.WARNING, logger="sklearnex"):
+        if dtype == np.float16 and queue and not queue.sycl_device.has_aspect_fp16:
+            pytest.skip("Hardware does not support fp16 SYCL testing")
+        elif dtype == np.float64 and queue and not queue.sycl_device.has_aspect_fp64:
+            pytest.skip("Hardware does not support fp64 SYCL testing")
+        elif queue and queue.sycl_device.is_gpu:
+            pytest.skip("pairwise_distances does not support GPU queues")
+        rng = nprnd.default_rng()
+        X = _convert_to_dataframe(
+            rng.random(size=1000).reshape(1, -1),
+            sycl_queue=queue,
+            target_df=dataframe,
+            dtype=dtype,
+        )
+        _ = pairwise_distances(X, metric=metric)
+    assert all(
+        [
+            "running accelerated version" in i.message
+            or "fallback to original Scikit-learn" in i.message
+            for i in caplog.records
+        ]
+    ), f"sklearnex patching issue in pairwise_distances with log: \n{caplog.text}"
+@pytest.mark.parametrize(
+    "dtype", [i for i in DTYPES if "32" in i.__name__ or "64" in i.__name__]
+)
+@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
+def test_roc_auc_score_patching(caplog, dataframe, queue, dtype):
+    if dtype in [np.uint32, np.uint64] and sys.platform == "win32":
+        pytest.skip("Windows issue with unsigned ints")
+    elif dtype == np.float64 and queue and not queue.sycl_device.has_aspect_fp64:
+        pytest.skip("Hardware does not support fp64 SYCL testing")
+    with caplog.at_level(logging.WARNING, logger="sklearnex"):
+        rng = nprnd.default_rng()
+        X = _convert_to_dataframe(
+            rng.integers(2, size=1000),
+            sycl_queue=queue,
+            target_df=dataframe,
+            dtype=dtype,
+        )
+        y = _convert_to_dataframe(
+            rng.integers(2, size=1000),
+            sycl_queue=queue,
+            target_df=dataframe,
+            dtype=dtype,
         )
-    except subprocess.CalledProcessError as e:
-        print(e)
-        exit(1)
-    mas = []
-    result = {}
-    for i in process.decode().split("\n"):
-        if i.startswith("SKLEARNEX WARNING"):
-            continue
-        if not i.startswith("SKLEARNEX INFO") and len(mas) != 0:
-            run_parse(mas, result)
-            mas.clear()
-            mas.append(i.strip())
-        else:
-            mas.append(i.strip())
-    del os.environ["SKLEARNEX_VERBOSE"]
-    return result
-result_log = get_result_log()
-@pytest.mark.parametrize("configuration", result_log)
-def test_patching(configuration):
-    if "OPT" in result_log[configuration]:
-        return
-    for skip in TO_SKIP:
-        if re.search(skip, configuration) is not None:
-            pytest.skip("SKIPPED", allow_module_level=False)
-    raise ValueError("Test patching failed: " + configuration)
-def _load_all_models(patched):
-    if patched:
-        patch_sklearn()
-    models = {}
-    for patch_infos in get_patch_map().values():
-        maybe_class = getattr(patch_infos[0][0][0], patch_infos[0][0][1], None)
-        if (
-            maybe_class is not None
-            and isclass(maybe_class)
-            and issubclass(maybe_class, BaseEstimator)
+        _ = roc_auc_score(X, y)
+    assert all(
+        [
+            "running accelerated version" in i.message
+            or "fallback to original Scikit-learn" in i.message
+            for i in caplog.records
+        ]
+    ), f"sklearnex patching issue in roc_auc_score with log: \n{caplog.text}"
+@pytest.mark.parametrize("dtype", DTYPES)
+@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("estimator, method", gen_models_info(PATCHED_MODELS))
+def test_standard_estimator_patching(caplog, dataframe, queue, dtype, estimator, method):
+    with caplog.at_level(logging.WARNING, logger="sklearnex"):
+        est = PATCHED_MODELS[estimator]()
+        if queue:
+            if dtype == np.float16 and not queue.sycl_device.has_aspect_fp16:
+                pytest.skip("Hardware does not support fp16 SYCL testing")
+            elif dtype == np.float64 and not queue.sycl_device.has_aspect_fp64:
+                pytest.skip("Hardware does not support fp64 SYCL testing")
+            elif queue.sycl_device.is_gpu and estimator in [
+                "KMeans",
+                "ElasticNet",
+                "Lasso",
+                "Ridge",
+            ]:
+                pytest.skip(f"{estimator} does not support GPU queues")
+        if estimator == "TSNE" and method == "fit_transform":
+            pytest.skip("TSNE.fit_transform is too slow for common testing")
+        elif (
+            estimator == "Ridge"
+            and method in ["predict", "score"]
+            and sys.platform == "win32"
+            and dtype in [np.uint32, np.uint64]
         ):
-            models[patch_infos[0][0][1]] = maybe_class
+            pytest.skip("Windows segmentation fault for Ridge.predict for unsigned ints")
+        elif method and not hasattr(est, method):
+            pytest.skip(f"sklearn available_if prevents testing {estimator}.{method}")
+        X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)
+        est.fit(X, y)
+        if method:
+            if method != "score":
+                getattr(est, method)(X)
+            else:
+                est.score(X, y)
+    assert all(
+        [
+            "running accelerated version" in i.message
+            or "fallback to original Scikit-learn" in i.message
+            for i in caplog.records
+        ]
+    ), f"sklearnex patching issue in {estimator}.{method} with log: \n{caplog.text}"
+@pytest.mark.parametrize("dtype", DTYPES)
+@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("estimator, method", gen_models_info(SPECIAL_INSTANCES))
+def test_special_estimator_patching(caplog, dataframe, queue, dtype, estimator, method):
+    # prepare logging
+    with caplog.at_level(logging.WARNING, logger="sklearnex"):
+        est = SPECIAL_INSTANCES[estimator]
+        # Its not possible to get the dpnp/dpctl arrays to be in the proper dtype
+        if dtype == np.float16 and queue and not queue.sycl_device.has_aspect_fp16:
+            pytest.skip("Hardware does not support fp16 SYCL testing")
+        elif dtype == np.float64 and queue and not queue.sycl_device.has_aspect_fp64:
+            pytest.skip("Hardware does not support fp64 SYCL testing")
+        X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)
+        est.fit(X, y)
+        if method and not hasattr(est, method):
+            pytest.skip(f"sklearn available_if prevents testing {estimator}.{method}")
+        if method:
+            if method != "score":
+                getattr(est, method)(X)
+            else:
+                est.score(X, y)
+    assert all(
+        [
+            "running accelerated version" in i.message
+            or "fallback to original Scikit-learn" in i.message
+            for i in caplog.records
+        ]
+    ), f"sklearnex patching issue in {estimator}.{method} with log: \n{caplog.text}"
+@pytest.mark.parametrize("estimator", UNPATCHED_MODELS.keys())
+def test_standard_estimator_signatures(estimator):
+    est = PATCHED_MODELS[estimator]()
+    unpatched_est = UNPATCHED_MODELS[estimator]()
+    # all public sklearn methods should have signature matches in sklearnex
+    unpatched_est_methods = [
+        i
+        for i in dir(unpatched_est)
+        if not i.startswith("_") and not i.endswith("_") and hasattr(unpatched_est, i)
+    ]
+    for method in unpatched_est_methods:
+        est_method = getattr(est, method)
+        unpatched_est_method = getattr(unpatched_est, method)
+        if callable(unpatched_est_method):
+            regex = rf"(?:sklearn|daal4py)\S*{estimator}"  # needed due to differences in module structure
+            patched_sig = re.sub(regex, estimator, str(signature(est_method)))
+            unpatched_sig = re.sub(regex, estimator, str(signature(unpatched_est_method)))
+            assert (
+                patched_sig == unpatched_sig
+            ), f"Signature of {estimator}.{method} does not match sklearn"
+@pytest.mark.parametrize("estimator", UNPATCHED_MODELS.keys())
+def test_standard_estimator_init_signatures(estimator):
+    # Several estimators have additional parameters that are user-accessible
+    # which are sklearnex-specific. They will fail and are removed from tests.
+    # remove n_jobs due to estimator patching for sklearnex (known deviation)
+    patched_sig = str(signature(PATCHED_MODELS[estimator].__init__))
+    unpatched_sig = str(signature(UNPATCHED_MODELS[estimator].__init__))
+    # Sklearnex allows for positional kwargs and n_jobs, when sklearn doesn't
+    for kwarg in ["n_jobs=None", "*"]:
+        patched_sig = patched_sig.replace(", " + kwarg, "")
+        unpatched_sig = unpatched_sig.replace(", " + kwarg, "")
+    # Special sklearnex-specific kwargs are removed from signatures here
+    if estimator in [
+        "RandomForestRegressor",
+        "RandomForestClassifier",
+        "ExtraTreesRegressor",
+        "ExtraTreesClassifier",
+    ]:
+        for kwarg in ["min_bin_size=1", "max_bins=256"]:
+            patched_sig = patched_sig.replace(", " + kwarg, "")
+    assert (
+        patched_sig == unpatched_sig
+    ), f"Signature of {estimator}.__init__ does not match sklearn"
+@pytest.mark.parametrize(
+    "function",
+    [
+        i
+        for i in UNPATCHED_FUNCTIONS.keys()
+        if i not in ["train_test_split", "set_config", "config_context"]
+    ],
+)
+def test_patched_function_signatures(function):
+    # certain functions are dropped from the test
+    # as they add functionality to the underlying sklearn function
+    if not sklearn_check_version("1.1") and function == "_assert_all_finite":
+        pytest.skip("Sklearn versioning not added to _assert_all_finite")
+    func = PATCHED_FUNCTIONS[function]
+    unpatched_func = UNPATCHED_FUNCTIONS[function]
+    if callable(unpatched_func):
+        assert str(signature(func)) == str(
+            signature(unpatched_func)
+        ), f"Signature of {func} does not match sklearn"
+def test_patch_map_match():
+    # This rule applies to functions and classes which are out of preview.
+    # Items listed in a matching submodule's __all__ attribute should be
+    # in get_patch_map. There should not be any missing or additional elements.
-    if patched:
-        unpatch_sklearn()
+    def list_all_attr(string):
+        try:
+            modules = set(importlib.import_module(string).__all__)
+        except ModuleNotFoundError:
+            modules = set([None])
+        return modules
-    return models
+    if _is_preview_enabled():
+        pytest.skip("preview sklearnex has been activated")
+    patched = {**PATCHED_MODELS, **PATCHED_FUNCTIONS}
+    sklearnex__all__ = list_all_attr("sklearnex")
+    sklearn__all__ = list_all_attr("sklearn")
-PATCHED_MODELS = _load_all_models(patched=True)
-UNPATCHED_MODELS = _load_all_models(patched=False)
+    module_map = {i: i for i in sklearnex__all__.intersection(sklearn__all__)}
+    # _assert_all_finite patches an internal sklearn function which isn't
+    # exposed via __all__ in sklearn. It is a special case where this rule
+    # is not applied (e.g. it is grandfathered in).
+    del patched["_assert_all_finite"]
+    # remove all scikit-learn-intelex-only estimators
+    for i in patched.copy():
+        if i not in UNPATCHED_MODELS and i not in UNPATCHED_FUNCTIONS:
+            del patched[i]
+    for module in module_map:
+        sklearn_module__all__ = list_all_attr("sklearn." + module_map[module])
+        sklearnex_module__all__ = list_all_attr("sklearnex." + module)
+        intersect = sklearnex_module__all__.intersection(sklearn_module__all__)
+        for i in intersect:
+            if i:
+                del patched[i]
+            else:
+                del patched[module]
+    assert patched == {}, f"{patched.keys()} were not properly patched"
 @pytest.mark.parametrize("estimator", UNPATCHED_MODELS.keys())
@@ -125,6 +324,42 @@ def test_is_patched_instance(estimator):
     assert not is_patched_instance(unpatched), f"{unpatched} is an unpatched instance"
+@pytest.mark.parametrize("estimator", PATCHED_MODELS.keys())
+def test_if_estimator_inherits_sklearn(estimator):
+    est = PATCHED_MODELS[estimator]
+    if estimator in UNPATCHED_MODELS:
+        assert issubclass(
+            est, UNPATCHED_MODELS[estimator]
+        ), f"{estimator} does not inherit from the patched sklearn estimator"
+    else:
+        assert issubclass(est, BaseEstimator)
+@pytest.mark.parametrize("estimator", UNPATCHED_MODELS.keys())
+def test_docstring_patching_match(estimator):
+    patched = PATCHED_MODELS[estimator]
+    unpatched = UNPATCHED_MODELS[estimator]
+    patched_docstrings = {
+        i: getattr(patched, i).__doc__
+        for i in dir(patched)
+        if not i.startswith("_") and not i.endswith("_") and hasattr(patched, i)
+    }
+    unpatched_docstrings = {
+        i: getattr(unpatched, i).__doc__
+        for i in dir(unpatched)
+        if not i.startswith("_") and not i.endswith("_") and hasattr(unpatched, i)
+    }
+    # check class docstring match if a docstring is available
+    assert (patched.__doc__ is None) == (unpatched.__doc__ is None)
+    # check class attribute docstrings
+    for i in unpatched_docstrings:
+        assert (patched_docstrings[i] is None) == (unpatched_docstrings[i] is None)
 @pytest.mark.parametrize("member", ["_onedal_cpu_supported", "_onedal_gpu_supported"])
 @pytest.mark.parametrize(
     "name",

sklearnex/utils/__init__.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # limitations under the License.
 # ===============================================================================
+from ._namespace import get_namespace
 from .validation import _assert_all_finite
-__all__ = ["_assert_all_finite"]
+__all__ = ["get_namespace", "_assert_all_finite"]

sklearnex/utils/_namespace.py ADDED Viewed

@@ -0,0 +1,97 @@
+# ==============================================================================
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+from daal4py.sklearn._utils import sklearn_check_version
+from .._device_offload import dpnp_available
+if sklearn_check_version("1.2"):
+    from sklearn.utils._array_api import get_namespace as sklearn_get_namespace
+if dpnp_available:
+    import dpnp
+def get_namespace(*arrays):
+    """Get namespace of arrays.
+    Introspect `arrays` arguments and return their common Array API
+    compatible namespace object, if any. NumPy 1.22 and later can
+    construct such containers using the `numpy.array_api` namespace
+    for instance.
+    This function will return the namespace of SYCL-related arrays
+    which define the __sycl_usm_array_interface__ attribute
+    regardless of array_api support, the configuration of
+    array_api_dispatch, or scikit-learn version.
+    See: https://numpy.org/neps/nep-0047-array-api-standard.html
+    If `arrays` are regular numpy arrays, an instance of the
+    `_NumPyApiWrapper` compatibility wrapper is returned instead.
+    Namespace support is not enabled by default. To enabled it
+    call:
+      sklearn.set_config(array_api_dispatch=True)
+    or:
+      with sklearn.config_context(array_api_dispatch=True):
+          # your code here
+    Otherwise an instance of the `_NumPyApiWrapper`
+    compatibility wrapper is always returned irrespective of
+    the fact that arrays implement the `__array_namespace__`
+    protocol or not.
+    Parameters
+    ----------
+    *arrays : array objects
+        Array objects.
+    Returns
+    -------
+    namespace : module
+        Namespace shared by array objects.
+    is_array_api : bool
+        True of the arrays are containers that implement the Array API spec.
+    """
+    # sycl support designed to work regardless of array_api_dispatch sklearn global value
+    sycl_type = {type(x): x for x in arrays if hasattr(x, "__sycl_usm_array_interface__")}
+    if len(sycl_type) > 1:
+        raise ValueError(f"Multiple SYCL types for array inputs: {sycl_type}")
+    if sycl_type:
+        (X,) = sycl_type.values()
+        if hasattr(X, "__array_namespace__"):
+            return X.__array_namespace__(), True
+        elif dpnp_available and isinstance(X, dpnp.ndarray):
+            return dpnp, False
+        else:
+            raise ValueError(f"SYCL type not recognized: {sycl_type}")
+    elif sklearn_check_version("1.2"):
+        return sklearn_get_namespace(*arrays)
+    else:
+        return np, True

sklearnex/preview/decomposition/__init__.py DELETED Viewed

@@ -1,19 +0,0 @@
-# ===============================================================================
-# Copyright 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ===============================================================================
-from .pca import PCA
-__all__ = ["PCA"]