PyPI - scikit-learn-intelex - Versions diffs - 2024.3.0__py38-none-win_amd64.whl → 2024.5.0__py38-none-win_amd64.whl - Mend

scikit-learn-intelex 2024.3.0__py38-none-win_amd64.whl → 2024.5.0__py38-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (107) hide show

scikit_learn_intelex-2024.5.0.data/data/Lib/site-packages/sklearnex/linear_model/linear.py ADDED Viewed

@@ -0,0 +1,316 @@
+# ===============================================================================
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+import logging
+from abc import ABC
+import numpy as np
+from sklearn.exceptions import NotFittedError
+from sklearn.linear_model import LinearRegression as sklearn_LinearRegression
+from daal4py.sklearn._n_jobs_support import control_n_jobs
+from daal4py.sklearn._utils import sklearn_check_version
+from .._device_offload import dispatch, wrap_output_data
+from .._utils import PatchingConditionsChain, get_patch_message, register_hyperparameters
+from ..utils.validation import _assert_all_finite
+if sklearn_check_version("1.0") and not sklearn_check_version("1.2"):
+    from sklearn.linear_model._base import _deprecate_normalize
+from scipy.sparse import issparse
+from sklearn.utils.validation import check_X_y
+from onedal.common.hyperparameters import get_hyperparameters
+from onedal.linear_model import LinearRegression as onedal_LinearRegression
+from onedal.utils import _num_features, _num_samples
+@register_hyperparameters({"fit": get_hyperparameters("linear_regression", "train")})
+@control_n_jobs(decorated_methods=["fit", "predict"])
+class LinearRegression(sklearn_LinearRegression):
+    __doc__ = sklearn_LinearRegression.__doc__
+    if sklearn_check_version("1.2"):
+        _parameter_constraints: dict = {**sklearn_LinearRegression._parameter_constraints}
+        def __init__(
+            self,
+            fit_intercept=True,
+            copy_X=True,
+            n_jobs=None,
+            positive=False,
+        ):
+            super().__init__(
+                fit_intercept=fit_intercept,
+                copy_X=copy_X,
+                n_jobs=n_jobs,
+                positive=positive,
+            )
+    else:
+        def __init__(
+            self,
+            fit_intercept=True,
+            normalize="deprecated" if sklearn_check_version("1.0") else False,
+            copy_X=True,
+            n_jobs=None,
+            positive=False,
+        ):
+            super().__init__(
+                fit_intercept=fit_intercept,
+                normalize=normalize,
+                copy_X=copy_X,
+                n_jobs=n_jobs,
+                positive=positive,
+            )
+    def fit(self, X, y, sample_weight=None):
+        if sklearn_check_version("1.0"):
+            self._check_feature_names(X, reset=True)
+        if sklearn_check_version("1.2"):
+            self._validate_params()
+        # It is necessary to properly update coefs for predict if we
+        # fallback to sklearn in dispatch
+        if hasattr(self, "_onedal_estimator"):
+            del self._onedal_estimator
+        dispatch(
+            self,
+            "fit",
+            {
+                "onedal": self.__class__._onedal_fit,
+                "sklearn": sklearn_LinearRegression.fit,
+            },
+            X,
+            y,
+            sample_weight,
+        )
+        return self
+    @wrap_output_data
+    def predict(self, X):
+        if not hasattr(self, "coef_"):
+            msg = (
+                "This %(name)s instance is not fitted yet. Call 'fit' with "
+                "appropriate arguments before using this estimator."
+            )
+            raise NotFittedError(msg % {"name": self.__class__.__name__})
+        return dispatch(
+            self,
+            "predict",
+            {
+                "onedal": self.__class__._onedal_predict,
+                "sklearn": sklearn_LinearRegression.predict,
+            },
+            X,
+        )
+    def _test_type_and_finiteness(self, X_in):
+        X = X_in if isinstance(X_in, np.ndarray) else np.asarray(X_in)
+        dtype = X.dtype
+        if "complex" in str(type(dtype)):
+            return False
+        try:
+            _assert_all_finite(X)
+        except BaseException:
+            return False
+        return True
+    def _onedal_fit_supported(self, method_name, *data):
+        assert method_name == "fit"
+        assert len(data) == 3
+        X, y, sample_weight = data
+        class_name = self.__class__.__name__
+        patching_status = PatchingConditionsChain(
+            f"sklearn.linear_model.{class_name}.fit"
+        )
+        normalize_is_set = (
+            hasattr(self, "normalize")
+            and self.normalize
+            and self.normalize != "deprecated"
+        )
+        positive_is_set = hasattr(self, "positive") and self.positive
+        n_samples = _num_samples(X)
+        n_features = _num_features(X, fallback_1d=True)
+        # Check if equations are well defined
+        is_underdetermined = n_samples < (n_features + int(self.fit_intercept))
+        dal_ready = patching_status.and_conditions(
+            [
+                (sample_weight is None, "Sample weight is not supported."),
+                (
+                    not issparse(X) and not issparse(y),
+                    "Sparse input is not supported.",
+                ),
+                (not normalize_is_set, "Normalization is not supported."),
+                (
+                    not positive_is_set,
+                    "Forced positive coefficients are not supported.",
+                ),
+                (
+                    not is_underdetermined,
+                    "The shape of X (fitting) does not satisfy oneDAL requirements:"
+                    "Number of features + 1 >= number of samples.",
+                ),
+            ]
+        )
+        if not dal_ready:
+            return patching_status
+        if not patching_status.and_condition(
+            self._test_type_and_finiteness(X), "Input X is not supported."
+        ):
+            return patching_status
+        patching_status.and_condition(
+            self._test_type_and_finiteness(y), "Input y is not supported."
+        )
+        return patching_status
+    def _onedal_predict_supported(self, method_name, *data):
+        assert method_name == "predict"
+        assert len(data) == 1
+        class_name = self.__class__.__name__
+        patching_status = PatchingConditionsChain(
+            f"sklearn.linear_model.{class_name}.predict"
+        )
+        n_samples = _num_samples(*data)
+        model_is_sparse = issparse(self.coef_) or (
+            self.fit_intercept and issparse(self.intercept_)
+        )
+        dal_ready = patching_status.and_conditions(
+            [
+                (n_samples > 0, "Number of samples is less than 1."),
+                (not issparse(*data), "Sparse input is not supported."),
+                (not model_is_sparse, "Sparse coefficients are not supported."),
+            ]
+        )
+        if not dal_ready:
+            return patching_status
+        patching_status.and_condition(
+            self._test_type_and_finiteness(*data), "Input X is not supported."
+        )
+        return patching_status
+    def _onedal_supported(self, method_name, *data):
+        if method_name == "fit":
+            return self._onedal_fit_supported(method_name, *data)
+        if method_name == "predict":
+            return self._onedal_predict_supported(method_name, *data)
+        raise RuntimeError(f"Unknown method {method_name} in {self.__class__.__name__}")
+    _onedal_gpu_supported = _onedal_supported
+    _onedal_cpu_supported = _onedal_supported
+    def _initialize_onedal_estimator(self):
+        onedal_params = {"fit_intercept": self.fit_intercept, "copy_X": self.copy_X}
+        self._onedal_estimator = onedal_LinearRegression(**onedal_params)
+    def _onedal_fit(self, X, y, sample_weight, queue=None):
+        assert sample_weight is None
+        check_params = {
+            "X": X,
+            "y": y,
+            "dtype": [np.float64, np.float32],
+            "accept_sparse": ["csr", "csc", "coo"],
+            "y_numeric": True,
+            "multi_output": True,
+            "force_all_finite": False,
+        }
+        if sklearn_check_version("1.2"):
+            X, y = self._validate_data(**check_params)
+        else:
+            X, y = check_X_y(**check_params)
+        if sklearn_check_version("1.0") and not sklearn_check_version("1.2"):
+            self._normalize = _deprecate_normalize(
+                self.normalize,
+                default=False,
+                estimator_name=self.__class__.__name__,
+            )
+        self._initialize_onedal_estimator()
+        try:
+            self._onedal_estimator.fit(X, y, queue=queue)
+            self._save_attributes()
+        except RuntimeError:
+            logging.getLogger("sklearnex").info(
+                f"{self.__class__.__name__}.fit "
+                + get_patch_message("sklearn_after_onedal")
+            )
+            del self._onedal_estimator
+            super().fit(X, y)
+    def _onedal_predict(self, X, queue=None):
+        if sklearn_check_version("1.0"):
+            self._check_feature_names(X, reset=False)
+        X = self._validate_data(X, accept_sparse=False, reset=False)
+        if not hasattr(self, "_onedal_estimator"):
+            self._initialize_onedal_estimator()
+            self._onedal_estimator.coef_ = self.coef_
+            self._onedal_estimator.intercept_ = self.intercept_
+        res = self._onedal_estimator.predict(X, queue=queue)
+        return res
+    def get_coef_(self):
+        return self.coef_
+    def set_coef_(self, value):
+        self.__dict__["coef_"] = value
+        if hasattr(self, "_onedal_estimator"):
+            self._onedal_estimator.coef_ = value
+            del self._onedal_estimator._onedal_model
+    def get_intercept_(self):
+        return self.intercept_
+    def set_intercept_(self, value):
+        self.__dict__["intercept_"] = value
+        if hasattr(self, "_onedal_estimator"):
+            self._onedal_estimator.intercept_ = value
+            del self._onedal_estimator._onedal_model
+    def _save_attributes(self):
+        self.coef_ = property(self.get_coef_, self.set_coef_)
+        self.intercept_ = property(self.get_intercept_, self.set_intercept_)
+        self.n_features_in_ = self._onedal_estimator.n_features_in_
+        self._sparse = False
+        self.__dict__["coef_"] = self._onedal_estimator.coef_
+        self.__dict__["intercept_"] = self._onedal_estimator.intercept_
+    fit.__doc__ = sklearn_LinearRegression.fit.__doc__
+    predict.__doc__ = sklearn_LinearRegression.predict.__doc__

{scikit_learn_intelex-2024.3.0.data → scikit_learn_intelex-2024.5.0.data}/data/Lib/site-packages/sklearnex/linear_model/logistic_regression.py RENAMED Viewed

@@ -38,19 +38,27 @@ if daal_check_version((2024, "P", 1)):
     import numpy as np
     from scipy.sparse import issparse
     from sklearn.linear_model import LogisticRegression as sklearn_LogisticRegression
+    from sklearn.metrics import accuracy_score
+    from sklearn.utils.multiclass import type_of_target
     from sklearn.utils.validation import check_X_y
     from daal4py.sklearn._n_jobs_support import control_n_jobs
     from daal4py.sklearn._utils import sklearn_check_version
     from onedal.linear_model import LogisticRegression as onedal_LogisticRegression
-    from onedal.utils import _num_features, _num_samples
+    from onedal.utils import _num_samples
     from .._device_offload import dispatch, wrap_output_data
     from .._utils import PatchingConditionsChain, get_patch_message
     from ..utils.validation import _assert_all_finite
     @control_n_jobs(
-        decorated_methods=["fit", "predict", "predict_proba", "predict_log_proba"]
+        decorated_methods=[
+            "fit",
+            "predict",
+            "predict_proba",
+            "predict_log_proba",
+            "score",
+        ]
     )
     class LogisticRegression(sklearn_LogisticRegression, BaseLogisticRegression):
         __doc__ = sklearn_LogisticRegression.__doc__
@@ -72,9 +80,9 @@ if daal_check_version((2024, "P", 1)):
             intercept_scaling=1,
             class_weight=None,
             random_state=None,
-            solver="lbfgs" if sklearn_check_version("0.22") else "liblinear",
+            solver="lbfgs",
             max_iter=100,
-            multi_class="auto" if sklearn_check_version("0.22") else "ovr",
+            multi_class="auto",
             verbose=0,
             warm_start=False,
             n_jobs=None,
@@ -160,6 +168,27 @@ if daal_check_version((2024, "P", 1)):
                 X,
             )
+        @wrap_output_data
+        def score(self, X, y, sample_weight=None):
+            if sklearn_check_version("1.0"):
+                self._check_feature_names(X, reset=False)
+            return dispatch(
+                self,
+                "score",
+                {
+                    "onedal": self.__class__._onedal_score,
+                    "sklearn": sklearn_LogisticRegression.score,
+                },
+                X,
+                y,
+                sample_weight=sample_weight,
+            )
+        def _onedal_score(self, X, y, sample_weight=None, queue=None):
+            return accuracy_score(
+                y, self._onedal_predict(X, queue=queue), sample_weight=sample_weight
+            )
         def _test_type_and_finiteness(self, X_in):
             X = np.asarray(X_in)
@@ -198,6 +227,10 @@ if daal_check_version((2024, "P", 1)):
                     (self.warm_start == False, "Warm start is not supported."),
                     (self.l1_ratio is None, "l1 ratio is not supported."),
                     (sample_weight is None, "Sample weight is not supported."),
+                    (
+                        type_of_target(y) == "binary",
+                        "Only binary classification is supported",
+                    ),
                 ]
             )
@@ -216,22 +249,29 @@ if daal_check_version((2024, "P", 1)):
             return patching_status
         def _onedal_gpu_predict_supported(self, method_name, *data):
-            assert method_name in ["predict", "predict_proba", "predict_log_proba"]
-            assert len(data) == 1
+            assert method_name in [
+                "predict",
+                "predict_proba",
+                "predict_log_proba",
+                "score",
+            ]
             class_name = self.__class__.__name__
             patching_status = PatchingConditionsChain(
                 f"sklearn.linear_model.{class_name}.{method_name}"
             )
-            n_samples = _num_samples(*data)
+            n_samples = _num_samples(data[0])
             model_is_sparse = issparse(self.coef_) or (
                 self.fit_intercept and issparse(self.intercept_)
             )
             dal_ready = patching_status.and_conditions(
                 [
                     (n_samples > 0, "Number of samples is less than 1."),
-                    (not issparse(*data), "Sparse input is not supported."),
+                    (
+                        not any([issparse(i) for i in data]),
+                        "Sparse input is not supported.",
+                    ),
                     (not model_is_sparse, "Sparse coefficients are not supported."),
                     (
                         hasattr(self, "_onedal_estimator"),
@@ -251,7 +291,7 @@ if daal_check_version((2024, "P", 1)):
         def _onedal_gpu_supported(self, method_name, *data):
             if method_name == "fit":
                 return self._onedal_gpu_fit_supported(method_name, *data)
-            if method_name in ["predict", "predict_proba", "predict_log_proba"]:
+            if method_name in ["predict", "predict_proba", "predict_log_proba", "score"]:
                 return self._onedal_gpu_predict_supported(method_name, *data)
             raise RuntimeError(
                 f"Unknown method {method_name} in {self.__class__.__name__}"
@@ -334,6 +374,7 @@ if daal_check_version((2024, "P", 1)):
         predict.__doc__ = sklearn_LogisticRegression.predict.__doc__
         predict_proba.__doc__ = sklearn_LogisticRegression.predict_proba.__doc__
         predict_log_proba.__doc__ = sklearn_LogisticRegression.predict_log_proba.__doc__
+        score.__doc__ = sklearn_LogisticRegression.score.__doc__
 else:
     LogisticRegression = LogisticRegression_daal4py

scikit_learn_intelex-2024.5.0.data/data/Lib/site-packages/sklearnex/linear_model/tests/test_incremental_linear.py ADDED Viewed

@@ -0,0 +1,200 @@
+# ===============================================================================
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose
+from onedal.tests.utils._dataframes_support import (
+    _as_numpy,
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
+from sklearnex.linear_model import IncrementalLinearRegression
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("fit_intercept", [True, False])
+@pytest.mark.parametrize("macro_block", [None, 1024])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_sklearnex_fit_on_gold_data(dataframe, queue, fit_intercept, macro_block, dtype):
+    X = np.array([[1], [2]])
+    X = X.astype(dtype=dtype)
+    X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y = np.array([1, 2])
+    y = y.astype(dtype=dtype)
+    y_df = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
+    inclin = IncrementalLinearRegression(fit_intercept=fit_intercept)
+    if macro_block is not None:
+        hparams = inclin.get_hyperparameters("fit")
+        hparams.cpu_macro_block = macro_block
+        hparams.gpu_macro_block = macro_block
+    inclin.fit(X_df, y_df)
+    y_pred = inclin.predict(X_df)
+    tol = 2e-6 if dtype == np.float32 else 1e-7
+    assert_allclose(inclin.coef_, [1], atol=tol)
+    if fit_intercept:
+        assert_allclose(inclin.intercept_, [0], atol=tol)
+    assert_allclose(_as_numpy(y_pred), y, atol=tol)
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("fit_intercept", [True, False])
+@pytest.mark.parametrize("macro_block", [None, 1024])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_sklearnex_partial_fit_on_gold_data(
+    dataframe, queue, fit_intercept, macro_block, dtype
+):
+    X = np.array([[1], [2], [3], [4]])
+    X = X.astype(dtype=dtype)
+    y = X + 3
+    y = y.astype(dtype=dtype)
+    X_split = np.array_split(X, 2)
+    y_split = np.array_split(y, 2)
+    inclin = IncrementalLinearRegression()
+    if macro_block is not None:
+        hparams = inclin.get_hyperparameters("fit")
+        hparams.cpu_macro_block = macro_block
+        hparams.gpu_macro_block = macro_block
+    for i in range(2):
+        X_split_df = _convert_to_dataframe(
+            X_split[i], sycl_queue=queue, target_df=dataframe
+        )
+        y_split_df = _convert_to_dataframe(
+            y_split[i], sycl_queue=queue, target_df=dataframe
+        )
+        inclin.partial_fit(X_split_df, y_split_df)
+    assert inclin.n_features_in_ == 1
+    tol = 2e-6 if dtype == np.float32 else 1e-7
+    assert_allclose(inclin.coef_, [[1]], atol=tol)
+    if fit_intercept:
+        assert_allclose(inclin.intercept_, 3, atol=tol)
+    X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y_pred = inclin.predict(X_df)
+    assert_allclose(_as_numpy(y_pred), y, atol=tol)
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("fit_intercept", [True, False])
+@pytest.mark.parametrize("macro_block", [None, 1024])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_sklearnex_partial_fit_multitarget_on_gold_data(
+    dataframe, queue, fit_intercept, macro_block, dtype
+):
+    X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
+    X = X.astype(dtype=dtype)
+    y = np.dot(X, [1, 2]) + 3
+    y = y.astype(dtype=dtype)
+    X_split = np.array_split(X, 2)
+    y_split = np.array_split(y, 2)
+    inclin = IncrementalLinearRegression()
+    if macro_block is not None:
+        hparams = inclin.get_hyperparameters("fit")
+        hparams.cpu_macro_block = macro_block
+        hparams.gpu_macro_block = macro_block
+    for i in range(2):
+        X_split_df = _convert_to_dataframe(
+            X_split[i], sycl_queue=queue, target_df=dataframe
+        )
+        y_split_df = _convert_to_dataframe(
+            y_split[i], sycl_queue=queue, target_df=dataframe
+        )
+        inclin.partial_fit(X_split_df, y_split_df)
+    assert inclin.n_features_in_ == 2
+    tol = 7e-6 if dtype == np.float32 else 1e-7
+    assert_allclose(inclin.coef_, [1.0, 2.0], atol=tol)
+    if fit_intercept:
+        assert_allclose(inclin.intercept_, 3.0, atol=tol)
+    X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y_pred = inclin.predict(X_df)
+    assert_allclose(_as_numpy(y_pred), y, atol=tol)
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("fit_intercept", [True, False])
+@pytest.mark.parametrize("num_samples", [100, 1000])
+@pytest.mark.parametrize("num_features", [5, 10])
+@pytest.mark.parametrize("num_targets", [1, 2])
+@pytest.mark.parametrize("num_blocks", [1, 10])
+@pytest.mark.parametrize("macro_block", [None, 1024])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_sklearnex_partial_fit_on_random_data(
+    dataframe,
+    queue,
+    fit_intercept,
+    num_samples,
+    num_features,
+    num_targets,
+    num_blocks,
+    macro_block,
+    dtype,
+):
+    seed = 42
+    gen = np.random.default_rng(seed)
+    intercept = gen.random(size=num_targets, dtype=dtype)
+    coef = gen.random(size=(num_targets, num_features), dtype=dtype).T
+    X = gen.random(size=(num_samples, num_features), dtype=dtype)
+    if fit_intercept:
+        y = X @ coef + intercept[np.newaxis, :]
+    else:
+        y = X @ coef
+    X_split = np.array_split(X, num_blocks)
+    y_split = np.array_split(y, num_blocks)
+    inclin = IncrementalLinearRegression(fit_intercept=fit_intercept)
+    if macro_block is not None:
+        hparams = inclin.get_hyperparameters("fit")
+        hparams.cpu_macro_block = macro_block
+        hparams.gpu_macro_block = macro_block
+    for i in range(num_blocks):
+        X_split_df = _convert_to_dataframe(
+            X_split[i], sycl_queue=queue, target_df=dataframe
+        )
+        y_split_df = _convert_to_dataframe(
+            y_split[i], sycl_queue=queue, target_df=dataframe
+        )
+        inclin.partial_fit(X_split_df, y_split_df)
+    tol = 1e-4 if dtype == np.float32 else 1e-7
+    assert_allclose(coef, inclin.coef_.T, atol=tol)
+    if fit_intercept:
+        assert_allclose(intercept, inclin.intercept_, atol=tol)
+    X_test = gen.random(size=(num_samples, num_features), dtype=dtype)
+    if fit_intercept:
+        expected_y_pred = X_test @ coef + intercept[np.newaxis, :]
+    else:
+        expected_y_pred = X_test @ coef
+    X_test_df = _convert_to_dataframe(X_test, sycl_queue=queue, target_df=dataframe)
+    y_pred = inclin.predict(X_test_df)
+    assert_allclose(expected_y_pred, _as_numpy(y_pred), atol=tol)