PyPI - scikit-learn-intelex - Versions diffs - 2024.3.0__py310-none-manylinux1_x86_64.whl → 2024.5.0__py310-none-manylinux1_x86_64.whl - Mend

scikit-learn-intelex 2024.3.0__py310-none-manylinux1_x86_64.whl → 2024.5.0__py310-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (43) hide show

{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/METADATA +2 -2
{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/RECORD +43 -37
sklearnex/_device_offload.py +39 -5
sklearnex/basic_statistics/__init__.py +2 -1
sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +384 -0
sklearnex/covariance/incremental_covariance.py +217 -30
sklearnex/covariance/tests/test_incremental_covariance.py +54 -17
sklearnex/decomposition/pca.py +71 -19
sklearnex/decomposition/tests/test_pca.py +2 -2
sklearnex/dispatcher.py +33 -2
sklearnex/ensemble/_forest.py +73 -79
sklearnex/linear_model/__init__.py +5 -3
sklearnex/linear_model/incremental_linear.py +387 -0
sklearnex/linear_model/linear.py +275 -340
sklearnex/linear_model/logistic_regression.py +50 -9
sklearnex/linear_model/tests/test_incremental_linear.py +200 -0
sklearnex/linear_model/tests/test_linear.py +40 -5
sklearnex/neighbors/_lof.py +53 -36
sklearnex/neighbors/common.py +4 -1
sklearnex/neighbors/knn_classification.py +37 -122
sklearnex/neighbors/knn_regression.py +10 -117
sklearnex/neighbors/knn_unsupervised.py +6 -78
sklearnex/neighbors/tests/test_neighbors.py +2 -2
sklearnex/preview/cluster/k_means.py +5 -73
sklearnex/preview/covariance/covariance.py +6 -5
sklearnex/preview/covariance/tests/test_covariance.py +18 -5
sklearnex/svm/_common.py +4 -7
sklearnex/svm/nusvc.py +66 -50
sklearnex/svm/nusvr.py +3 -49
sklearnex/svm/svc.py +66 -51
sklearnex/svm/svr.py +3 -49
sklearnex/tests/_utils.py +34 -16
sklearnex/tests/test_memory_usage.py +5 -1
sklearnex/tests/test_n_jobs_support.py +12 -2
sklearnex/tests/test_patching.py +87 -58
sklearnex/tests/test_run_to_run_stability_tests.py +1 -1
sklearnex/utils/__init__.py +2 -1
sklearnex/utils/_namespace.py +97 -0
sklearnex/utils/tests/test_finite.py +89 -0
{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/LICENSE.txt +0 -0
{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/WHEEL +0 -0
{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/top_level.txt +0 -0

sklearnex/tests/test_patching.py CHANGED Viewed

@@ -26,24 +26,7 @@ from inspect import signature
 import numpy as np
 import numpy.random as nprnd
 import pytest
-from _utils import (
-    DTYPES,
-    PATCHED_FUNCTIONS,
-    PATCHED_MODELS,
-    SPECIAL_INSTANCES,
-    UNPATCHED_FUNCTIONS,
-    UNPATCHED_MODELS,
-    gen_dataset,
-    gen_models_info,
-)
-from sklearn.base import (
-    BaseEstimator,
-    ClassifierMixin,
-    ClusterMixin,
-    OutlierMixin,
-    RegressorMixin,
-    TransformerMixin,
-)
+from sklearn.base import BaseEstimator
 from daal4py.sklearn._utils import sklearn_check_version
 from onedal.tests.utils._dataframes_support import (
@@ -53,21 +36,42 @@ from onedal.tests.utils._dataframes_support import (
 from sklearnex import is_patched_instance
 from sklearnex.dispatcher import _is_preview_enabled
 from sklearnex.metrics import pairwise_distances, roc_auc_score
+from sklearnex.tests._utils import (
+    DTYPES,
+    PATCHED_FUNCTIONS,
+    PATCHED_MODELS,
+    SPECIAL_INSTANCES,
+    UNPATCHED_FUNCTIONS,
+    UNPATCHED_MODELS,
+    gen_dataset,
+    gen_models_info,
+)
 @pytest.mark.parametrize("dtype", DTYPES)
-@pytest.mark.parametrize(
-    "dataframe, queue", get_dataframes_and_queues(dataframe_filter_="numpy")
-)
+@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
 @pytest.mark.parametrize("metric", ["cosine", "correlation"])
 def test_pairwise_distances_patching(caplog, dataframe, queue, dtype, metric):
     with caplog.at_level(logging.WARNING, logger="sklearnex"):
+        if dtype == np.float16 and queue and not queue.sycl_device.has_aspect_fp16:
+            pytest.skip("Hardware does not support fp16 SYCL testing")
+        elif dtype == np.float64 and queue and not queue.sycl_device.has_aspect_fp64:
+            pytest.skip("Hardware does not support fp64 SYCL testing")
+        elif queue and queue.sycl_device.is_gpu:
+            pytest.skip("pairwise_distances does not support GPU queues")
         rng = nprnd.default_rng()
-        X = _convert_to_dataframe(
-            rng.random(size=1000), sycl_queue=queue, target_df=dataframe, dtype=dtype
-        )
+        if dataframe == "pandas":
+            X = _convert_to_dataframe(
+                rng.random(size=1000).astype(dtype).reshape(1, -1),
+                target_df=dataframe,
+            )
+        else:
+            X = _convert_to_dataframe(
+                rng.random(size=1000), sycl_queue=queue, target_df=dataframe, dtype=dtype
+            )[None, :]
-        _ = pairwise_distances(X.reshape(1, -1), metric=metric)
+        _ = pairwise_distances(X, metric=metric)
     assert all(
         [
             "running accelerated version" in i.message
@@ -80,22 +84,26 @@ def test_pairwise_distances_patching(caplog, dataframe, queue, dtype, metric):
 @pytest.mark.parametrize(
     "dtype", [i for i in DTYPES if "32" in i.__name__ or "64" in i.__name__]
 )
-@pytest.mark.parametrize(
-    "dataframe, queue", get_dataframes_and_queues(dataframe_filter_="numpy")
-)
+@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
 def test_roc_auc_score_patching(caplog, dataframe, queue, dtype):
     if dtype in [np.uint32, np.uint64] and sys.platform == "win32":
         pytest.skip("Windows issue with unsigned ints")
+    elif dtype == np.float64 and queue and not queue.sycl_device.has_aspect_fp64:
+        pytest.skip("Hardware does not support fp64 SYCL testing")
     with caplog.at_level(logging.WARNING, logger="sklearnex"):
         rng = nprnd.default_rng()
+        X = rng.integers(2, size=1000)
+        y = rng.integers(2, size=1000)
         X = _convert_to_dataframe(
-            rng.integers(2, size=1000),
+            X,
             sycl_queue=queue,
             target_df=dataframe,
             dtype=dtype,
         )
         y = _convert_to_dataframe(
-            rng.integers(2, size=1000),
+            y,
             sycl_queue=queue,
             target_df=dataframe,
             dtype=dtype,
@@ -112,14 +120,25 @@ def test_roc_auc_score_patching(caplog, dataframe, queue, dtype):
 @pytest.mark.parametrize("dtype", DTYPES)
-@pytest.mark.parametrize(
-    "dataframe, queue", get_dataframes_and_queues(dataframe_filter_="numpy")
-)
+@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
 @pytest.mark.parametrize("estimator, method", gen_models_info(PATCHED_MODELS))
 def test_standard_estimator_patching(caplog, dataframe, queue, dtype, estimator, method):
     with caplog.at_level(logging.WARNING, logger="sklearnex"):
         est = PATCHED_MODELS[estimator]()
+        if queue:
+            if dtype == np.float16 and not queue.sycl_device.has_aspect_fp16:
+                pytest.skip("Hardware does not support fp16 SYCL testing")
+            elif dtype == np.float64 and not queue.sycl_device.has_aspect_fp64:
+                pytest.skip("Hardware does not support fp64 SYCL testing")
+            elif queue.sycl_device.is_gpu and estimator in [
+                "KMeans",
+                "ElasticNet",
+                "Lasso",
+                "Ridge",
+            ]:
+                pytest.skip(f"{estimator} does not support GPU queues")
         if estimator == "TSNE" and method == "fit_transform":
             pytest.skip("TSNE.fit_transform is too slow for common testing")
         elif (
@@ -129,15 +148,30 @@ def test_standard_estimator_patching(caplog, dataframe, queue, dtype, estimator,
             and dtype in [np.uint32, np.uint64]
         ):
             pytest.skip("Windows segmentation fault for Ridge.predict for unsigned ints")
-        elif not hasattr(est, method):
+        elif estimator == "IncrementalLinearRegression" and dtype in [
+            np.int8,
+            np.int16,
+            np.int32,
+            np.int64,
+            np.uint8,
+            np.uint16,
+            np.uint32,
+            np.uint64,
+        ]:
+            pytest.skip(
+                "IncrementalLinearRegression fails on oneDAL side with int types because dataset is filled by zeroes"
+            )
+        elif method and not hasattr(est, method):
             pytest.skip(f"sklearn available_if prevents testing {estimator}.{method}")
         X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)
         est.fit(X, y)
-        if method != "score":
-            getattr(est, method)(X)
-        else:
-            est.score(X, y)
+        if method:
+            if method != "score":
+                getattr(est, method)(X)
+            else:
+                est.score(X, y)
     assert all(
         [
             "running accelerated version" in i.message
@@ -148,9 +182,7 @@ def test_standard_estimator_patching(caplog, dataframe, queue, dtype, estimator,
 @pytest.mark.parametrize("dtype", DTYPES)
-@pytest.mark.parametrize(
-    "dataframe, queue", get_dataframes_and_queues(dataframe_filter_="numpy")
-)
+@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
 @pytest.mark.parametrize("estimator, method", gen_models_info(SPECIAL_INSTANCES))
 def test_special_estimator_patching(caplog, dataframe, queue, dtype, estimator, method):
     # prepare logging
@@ -158,15 +190,24 @@ def test_special_estimator_patching(caplog, dataframe, queue, dtype, estimator,
     with caplog.at_level(logging.WARNING, logger="sklearnex"):
         est = SPECIAL_INSTANCES[estimator]
+        # Its not possible to get the dpnp/dpctl arrays to be in the proper dtype
+        if dtype == np.float16 and queue and not queue.sycl_device.has_aspect_fp16:
+            pytest.skip("Hardware does not support fp16 SYCL testing")
+        elif dtype == np.float64 and queue and not queue.sycl_device.has_aspect_fp64:
+            pytest.skip("Hardware does not support fp64 SYCL testing")
         X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)
         est.fit(X, y)
-        if not hasattr(est, method):
+        if method and not hasattr(est, method):
             pytest.skip(f"sklearn available_if prevents testing {estimator}.{method}")
-        if method != "score":
-            getattr(est, method)(X)
-        else:
-            est.score(X, y)
+        if method:
+            if method != "score":
+                getattr(est, method)(X)
+            else:
+                est.score(X, y)
     assert all(
         [
             "running accelerated version" in i.message
@@ -311,18 +352,6 @@ def test_if_estimator_inherits_sklearn(estimator):
         ), f"{estimator} does not inherit from the patched sklearn estimator"
     else:
         assert issubclass(est, BaseEstimator)
-        assert any(
-            [
-                issubclass(est, i)
-                for i in [
-                    ClassifierMixin,
-                    ClusterMixin,
-                    OutlierMixin,
-                    RegressorMixin,
-                    TransformerMixin,
-                ]
-            ]
-        ), f"{estimator} does not inherit a sklearn Mixin"
 @pytest.mark.parametrize("estimator", UNPATCHED_MODELS.keys())

sklearnex/tests/test_run_to_run_stability_tests.py CHANGED Viewed

@@ -294,7 +294,7 @@ MODELS_INFO = [
         "dataset": "regression",
     },
     {
-        "model": PCA(n_components=0.5, svd_solver="full", random_state=0),
+        "model": PCA(n_components=0.5, svd_solver="covariance_eigh", random_state=0),
         "methods": ["transform", "get_covariance", "get_precision", "score_samples"],
         "dataset": "classifier",
     },

sklearnex/utils/__init__.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # limitations under the License.
 # ===============================================================================
+from ._namespace import get_namespace
 from .validation import _assert_all_finite
-__all__ = ["_assert_all_finite"]
+__all__ = ["get_namespace", "_assert_all_finite"]

sklearnex/utils/_namespace.py ADDED Viewed

@@ -0,0 +1,97 @@
+# ==============================================================================
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+from daal4py.sklearn._utils import sklearn_check_version
+from .._device_offload import dpnp_available
+if sklearn_check_version("1.2"):
+    from sklearn.utils._array_api import get_namespace as sklearn_get_namespace
+if dpnp_available:
+    import dpnp
+def get_namespace(*arrays):
+    """Get namespace of arrays.
+    Introspect `arrays` arguments and return their common Array API
+    compatible namespace object, if any. NumPy 1.22 and later can
+    construct such containers using the `numpy.array_api` namespace
+    for instance.
+    This function will return the namespace of SYCL-related arrays
+    which define the __sycl_usm_array_interface__ attribute
+    regardless of array_api support, the configuration of
+    array_api_dispatch, or scikit-learn version.
+    See: https://numpy.org/neps/nep-0047-array-api-standard.html
+    If `arrays` are regular numpy arrays, an instance of the
+    `_NumPyApiWrapper` compatibility wrapper is returned instead.
+    Namespace support is not enabled by default. To enabled it
+    call:
+      sklearn.set_config(array_api_dispatch=True)
+    or:
+      with sklearn.config_context(array_api_dispatch=True):
+          # your code here
+    Otherwise an instance of the `_NumPyApiWrapper`
+    compatibility wrapper is always returned irrespective of
+    the fact that arrays implement the `__array_namespace__`
+    protocol or not.
+    Parameters
+    ----------
+    *arrays : array objects
+        Array objects.
+    Returns
+    -------
+    namespace : module
+        Namespace shared by array objects.
+    is_array_api : bool
+        True of the arrays are containers that implement the Array API spec.
+    """
+    # sycl support designed to work regardless of array_api_dispatch sklearn global value
+    sycl_type = {type(x): x for x in arrays if hasattr(x, "__sycl_usm_array_interface__")}
+    if len(sycl_type) > 1:
+        raise ValueError(f"Multiple SYCL types for array inputs: {sycl_type}")
+    if sycl_type:
+        (X,) = sycl_type.values()
+        if hasattr(X, "__array_namespace__"):
+            return X.__array_namespace__(), True
+        elif dpnp_available and isinstance(X, dpnp.ndarray):
+            return dpnp, False
+        else:
+            raise ValueError(f"SYCL type not recognized: {sycl_type}")
+    elif sklearn_check_version("1.2"):
+        return sklearn_get_namespace(*arrays)
+    else:
+        return np, True

sklearnex/utils/tests/test_finite.py ADDED Viewed

@@ -0,0 +1,89 @@
+# ==============================================================================
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import time
+import numpy as np
+import numpy.random as rand
+import pytest
+from numpy.testing import assert_raises
+from sklearnex.utils import _assert_all_finite
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize(
+    "shape",
+    [
+        [16, 2048],
+        [
+            2**16 + 3,
+        ],
+        [1000, 1000],
+    ],
+)
+@pytest.mark.parametrize("allow_nan", [False, True])
+def test_sum_infinite_actually_finite(dtype, shape, allow_nan):
+    X = np.array(shape, dtype=dtype)
+    X.fill(np.finfo(dtype).max)
+    _assert_all_finite(X, allow_nan=allow_nan)
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize(
+    "shape",
+    [
+        [16, 2048],
+        [
+            2**16 + 3,
+        ],
+        [1000, 1000],
+    ],
+)
+@pytest.mark.parametrize("allow_nan", [False, True])
+@pytest.mark.parametrize("check", ["inf", "NaN", None])
+@pytest.mark.parametrize("seed", [0, int(time.time())])
+def test_assert_finite_random_location(dtype, shape, allow_nan, check, seed):
+    rand.seed(seed)
+    X = rand.uniform(high=np.finfo(dtype).max, size=shape).astype(dtype)
+    if check:
+        loc = rand.randint(0, X.size - 1)
+        X.reshape((-1,))[loc] = float(check)
+    if check is None or (allow_nan and check == "NaN"):
+        _assert_all_finite(X, allow_nan=allow_nan)
+    else:
+        assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan)
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("allow_nan", [False, True])
+@pytest.mark.parametrize("check", ["inf", "NaN", None])
+@pytest.mark.parametrize("seed", [0, int(time.time())])
+def test_assert_finite_random_shape_and_location(dtype, allow_nan, check, seed):
+    lb, ub = 32768, 1048576  # lb is a patching condition, ub 2^20
+    rand.seed(seed)
+    X = rand.uniform(high=np.finfo(dtype).max, size=rand.randint(lb, ub)).astype(dtype)
+    if check:
+        loc = rand.randint(0, X.size - 1)
+        X[loc] = float(check)
+    if check is None or (allow_nan and check == "NaN"):
+        _assert_all_finite(X, allow_nan=allow_nan)
+    else:
+        assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan)

{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/top_level.txt RENAMED Viewed

File without changes