PyPI - scikit-learn-intelex - Versions diffs - 2024.3.0__py38-none-manylinux1_x86_64.whl → 2024.5.0__py38-none-manylinux1_x86_64.whl - Mend

scikit-learn-intelex 2024.3.0__py38-none-manylinux1_x86_64.whl → 2024.5.0__py38-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (43) hide show

{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/METADATA +2 -2
{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/RECORD +43 -37
sklearnex/_device_offload.py +39 -5
sklearnex/basic_statistics/__init__.py +2 -1
sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +384 -0
sklearnex/covariance/incremental_covariance.py +217 -30
sklearnex/covariance/tests/test_incremental_covariance.py +54 -17
sklearnex/decomposition/pca.py +71 -19
sklearnex/decomposition/tests/test_pca.py +2 -2
sklearnex/dispatcher.py +33 -2
sklearnex/ensemble/_forest.py +73 -79
sklearnex/linear_model/__init__.py +5 -3
sklearnex/linear_model/incremental_linear.py +387 -0
sklearnex/linear_model/linear.py +275 -340
sklearnex/linear_model/logistic_regression.py +50 -9
sklearnex/linear_model/tests/test_incremental_linear.py +200 -0
sklearnex/linear_model/tests/test_linear.py +40 -5
sklearnex/neighbors/_lof.py +53 -36
sklearnex/neighbors/common.py +4 -1
sklearnex/neighbors/knn_classification.py +37 -122
sklearnex/neighbors/knn_regression.py +10 -117
sklearnex/neighbors/knn_unsupervised.py +6 -78
sklearnex/neighbors/tests/test_neighbors.py +2 -2
sklearnex/preview/cluster/k_means.py +5 -73
sklearnex/preview/covariance/covariance.py +6 -5
sklearnex/preview/covariance/tests/test_covariance.py +18 -5
sklearnex/svm/_common.py +4 -7
sklearnex/svm/nusvc.py +66 -50
sklearnex/svm/nusvr.py +3 -49
sklearnex/svm/svc.py +66 -51
sklearnex/svm/svr.py +3 -49
sklearnex/tests/_utils.py +34 -16
sklearnex/tests/test_memory_usage.py +5 -1
sklearnex/tests/test_n_jobs_support.py +12 -2
sklearnex/tests/test_patching.py +87 -58
sklearnex/tests/test_run_to_run_stability_tests.py +1 -1
sklearnex/utils/__init__.py +2 -1
sklearnex/utils/_namespace.py +97 -0
sklearnex/utils/tests/test_finite.py +89 -0
{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/LICENSE.txt +0 -0
{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/WHEEL +0 -0
{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/top_level.txt +0 -0

sklearnex/covariance/incremental_covariance.py CHANGED Viewed

@@ -14,18 +14,33 @@
 # limitations under the License.
 # ===============================================================================
+import numbers
+import warnings
 import numpy as np
+from scipy import linalg
+from sklearn.base import BaseEstimator
+from sklearn.covariance import EmpiricalCovariance as sklearn_EmpiricalCovariance
 from sklearn.utils import check_array, gen_batches
 from daal4py.sklearn._n_jobs_support import control_n_jobs
+from daal4py.sklearn._utils import daal_check_version, sklearn_check_version
 from onedal._device_offload import support_usm_ndarray
 from onedal.covariance import (
     IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance,
 )
+from sklearnex import config_context
+from .._device_offload import dispatch, wrap_output_data
+from .._utils import PatchingConditionsChain, register_hyperparameters
+from ..metrics import pairwise_distances
+if sklearn_check_version("1.2"):
+    from sklearn.utils._param_validation import Interval
-@control_n_jobs(decorated_methods=["partial_fit"])
-class IncrementalEmpiricalCovariance:
+@control_n_jobs(decorated_methods=["partial_fit", "fit", "_onedal_finalize_fit"])
+class IncrementalEmpiricalCovariance(BaseEstimator):
     """
     Incremental estimator for covariance.
     Allows to compute empirical covariance estimated by maximum
@@ -33,12 +48,25 @@ class IncrementalEmpiricalCovariance:
     Parameters
     ----------
+    store_precision : bool, default=False
+        Specifies if the estimated precision is stored.
+    assume_centered : bool, default=False
+        If True, data are not centered before computation.
+        Useful when working with data whose mean is almost, but not exactly
+        zero.
+        If False (default), data are centered before computation.
     batch_size : int, default=None
         The number of samples to use for each batch. Only used when calling
         ``fit``. If ``batch_size`` is ``None``, then ``batch_size``
         is inferred from the data and set to ``5 * n_features``, to provide a
         balance between approximation accuracy and memory consumption.
+    copy : bool, default=True
+        If False, X will be overwritten. ``copy=False`` can be used to
+        save memory but is unsafe for general use.
     Attributes
     ----------
     location_ : ndarray of shape (n_features,)
@@ -46,44 +74,130 @@ class IncrementalEmpiricalCovariance:
     covariance_ : ndarray of shape (n_features, n_features)
         Estimated covariance matrix
+    n_samples_seen_ : int
+        The number of samples processed by the estimator. Will be reset on
+        new calls to fit, but increments across ``partial_fit`` calls.
+    batch_size_ : int
+        Inferred batch size from ``batch_size``.
+    n_features_in_ : int
+        Number of features seen during :term:`fit` `partial_fit`.
     """
     _onedal_incremental_covariance = staticmethod(onedal_IncrementalEmpiricalCovariance)
-    def __init__(self, batch_size=None):
-        self._need_to_finalize = False  # If True then finalize compute should
-        #      be called to obtain covariance_ or location_ from partial compute data
+    if sklearn_check_version("1.2"):
+        _parameter_constraints: dict = {
+            "store_precision": ["boolean"],
+            "assume_centered": ["boolean"],
+            "batch_size": [Interval(numbers.Integral, 1, None, closed="left"), None],
+            "copy": ["boolean"],
+        }
+    get_precision = sklearn_EmpiricalCovariance.get_precision
+    error_norm = wrap_output_data(sklearn_EmpiricalCovariance.error_norm)
+    score = wrap_output_data(sklearn_EmpiricalCovariance.score)
+    def __init__(
+        self, *, store_precision=False, assume_centered=False, batch_size=None, copy=True
+    ):
+        self.assume_centered = assume_centered
+        self.store_precision = store_precision
         self.batch_size = batch_size
+        self.copy = copy
+    def _onedal_supported(self, method_name, *data):
+        patching_status = PatchingConditionsChain(
+            f"sklearn.covariance.{self.__class__.__name__}.{method_name}"
+        )
+        return patching_status
     def _onedal_finalize_fit(self):
         assert hasattr(self, "_onedal_estimator")
         self._onedal_estimator.finalize_fit()
         self._need_to_finalize = False
-    def _onedal_partial_fit(self, X, queue):
+        if not daal_check_version((2024, "P", 400)) and self.assume_centered:
+            location = self._onedal_estimator.location_[None, :]
+            self._onedal_estimator.covariance_ += np.dot(location.T, location)
+            self._onedal_estimator.location_ = np.zeros_like(np.squeeze(location))
+        if self.store_precision:
+            self.precision_ = linalg.pinvh(
+                self._onedal_estimator.covariance_, check_finite=False
+            )
+        else:
+            self.precision_ = None
+    @property
+    def covariance_(self):
+        if hasattr(self, "_onedal_estimator"):
+            if self._need_to_finalize:
+                self._onedal_finalize_fit()
+            return self._onedal_estimator.covariance_
+        else:
+            raise AttributeError(
+                f"'{self.__class__.__name__}' object has no attribute 'covariance_'"
+            )
+    @property
+    def location_(self):
+        if hasattr(self, "_onedal_estimator"):
+            if self._need_to_finalize:
+                self._onedal_finalize_fit()
+            return self._onedal_estimator.location_
+        else:
+            raise AttributeError(
+                f"'{self.__class__.__name__}' object has no attribute 'location_'"
+            )
+    def _onedal_partial_fit(self, X, queue=None, check_input=True):
+        first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0
+        # finite check occurs on onedal side
+        if check_input:
+            if sklearn_check_version("1.2"):
+                self._validate_params()
+            if sklearn_check_version("1.0"):
+                X = self._validate_data(
+                    X,
+                    dtype=[np.float64, np.float32],
+                    reset=first_pass,
+                    copy=self.copy,
+                    force_all_finite=False,
+                )
+            else:
+                X = check_array(
+                    X,
+                    dtype=[np.float64, np.float32],
+                    copy=self.copy,
+                    force_all_finite=False,
+                )
         onedal_params = {
             "method": "dense",
             "bias": True,
+            "assume_centered": self.assume_centered,
         }
         if not hasattr(self, "_onedal_estimator"):
             self._onedal_estimator = self._onedal_incremental_covariance(**onedal_params)
-        self._onedal_estimator.partial_fit(X, queue)
-        self._need_to_finalize = True
+        try:
+            if first_pass:
+                self.n_samples_seen_ = X.shape[0]
+                self.n_features_in_ = X.shape[1]
+            else:
+                self.n_samples_seen_ += X.shape[0]
-    @property
-    def covariance_(self):
-        if self._need_to_finalize:
-            self._onedal_finalize_fit()
-        return self._onedal_estimator.covariance_
+            self._onedal_estimator.partial_fit(X, queue)
+        finally:
+            self._need_to_finalize = True
-    @property
-    def location_(self):
-        if self._need_to_finalize:
-            self._onedal_finalize_fit()
-        return self._onedal_estimator.location_
+        return self
-    @support_usm_ndarray()
-    def partial_fit(self, X, queue=None):
+    def partial_fit(self, X, y=None, check_input=True):
         """
         Incremental fit with X. All of X is processed as a single batch.
@@ -93,16 +207,29 @@ class IncrementalEmpiricalCovariance:
             Training data, where `n_samples` is the number of samples and
             `n_features` is the number of features.
+        y : Ignored
+            Not used, present for API consistency by convention.
+        check_input : bool, default=True
+            Run check_array on X.
         Returns
         -------
         self : object
             Returns the instance itself.
         """
-        X = check_array(X, dtype=[np.float64, np.float32])
-        self._onedal_partial_fit(X, queue)
-        return self
+        return dispatch(
+            self,
+            "partial_fit",
+            {
+                "onedal": self.__class__._onedal_partial_fit,
+                "sklearn": None,
+            },
+            X,
+            check_input=check_input,
+        )
-    def fit(self, X, queue=None):
+    def fit(self, X, y=None):
         """
         Fit the model with X, using minibatches of size batch_size.
@@ -112,19 +239,79 @@ class IncrementalEmpiricalCovariance:
             Training data, where `n_samples` is the number of samples and
             `n_features` is the number of features.
+        y : Ignored
+            Not used, present for API consistency by convention.
         Returns
         -------
         self : object
             Returns the instance itself.
         """
-        n_samples, n_features = X.shape
-        if self.batch_size is None:
-            batch_size_ = 5 * n_features
+        return dispatch(
+            self,
+            "fit",
+            {
+                "onedal": self.__class__._onedal_fit,
+                "sklearn": None,
+            },
+            X,
+        )
+    def _onedal_fit(self, X, queue=None):
+        self.n_samples_seen_ = 0
+        if hasattr(self, "_onedal_estimator"):
+            self._onedal_estimator._reset()
+        if sklearn_check_version("1.2"):
+            self._validate_params()
+        # finite check occurs on onedal side
+        if sklearn_check_version("1.0"):
+            X = self._validate_data(
+                X, dtype=[np.float64, np.float32], copy=self.copy, force_all_finite=False
+            )
         else:
-            batch_size_ = self.batch_size
-        for batch in gen_batches(n_samples, batch_size_):
+            X = check_array(
+                X, dtype=[np.float64, np.float32], copy=self.copy, force_all_finite=False
+            )
+            self.n_features_in_ = X.shape[1]
+        self.batch_size_ = self.batch_size if self.batch_size else 5 * self.n_features_in_
+        if X.shape[0] == 1:
+            warnings.warn(
+                "Only one sample available. You may want to reshape your data array"
+            )
+        for batch in gen_batches(X.shape[0], self.batch_size_):
             X_batch = X[batch]
-            self.partial_fit(X_batch, queue=queue)
+            self._onedal_partial_fit(X_batch, queue=queue, check_input=False)
         self._onedal_finalize_fit()
         return self
+    # expose sklearnex pairwise_distances if mahalanobis distance eventually supported
+    @wrap_output_data
+    def mahalanobis(self, X):
+        if sklearn_check_version("1.0"):
+            self._validate_data(X, reset=False, copy=self.copy)
+        else:
+            check_array(X, copy=self.copy)
+        precision = self.get_precision()
+        with config_context(assume_finite=True):
+            # compute mahalanobis distances
+            dist = pairwise_distances(
+                X, self.location_[np.newaxis, :], metric="mahalanobis", VI=precision
+            )
+        return np.reshape(dist, (len(X),)) ** 2
+    _onedal_cpu_supported = _onedal_supported
+    _onedal_gpu_supported = _onedal_supported
+    mahalanobis.__doc__ = sklearn_EmpiricalCovariance.mahalanobis.__doc__
+    error_norm.__doc__ = sklearn_EmpiricalCovariance.error_norm.__doc__
+    score.__doc__ = sklearn_EmpiricalCovariance.score.__doc__

sklearnex/covariance/tests/test_incremental_covariance.py CHANGED Viewed

@@ -17,6 +17,10 @@
 import numpy as np
 import pytest
 from numpy.testing import assert_allclose
+from sklearn.covariance.tests.test_covariance import (
+    test_covariance,
+    test_EmpiricalCovariance_validates_mahalanobis,
+)
 from onedal.tests.utils._dataframes_support import (
     _convert_to_dataframe,
@@ -26,13 +30,14 @@ from onedal.tests.utils._dataframes_support import (
 @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
-def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype):
+@pytest.mark.parametrize("assume_centered", [True, False])
+def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype, assume_centered):
     from sklearnex.covariance import IncrementalEmpiricalCovariance
     X = np.array([[0, 1], [0, 1]])
     X = X.astype(dtype)
     X_split = np.array_split(X, 2)
-    inccov = IncrementalEmpiricalCovariance()
+    inccov = IncrementalEmpiricalCovariance(assume_centered=assume_centered)
     for i in range(2):
         X_split_df = _convert_to_dataframe(
@@ -40,8 +45,12 @@ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype):
         )
         result = inccov.partial_fit(X_split_df)
-    expected_covariance = np.array([[0, 0], [0, 0]])
-    expected_means = np.array([0, 1])
+    if assume_centered:
+        expected_covariance = np.array([[0, 0], [0, 1]])
+        expected_means = np.array([0, 0])
+    else:
+        expected_covariance = np.array([[0, 0], [0, 0]])
+        expected_means = np.array([0, 1])
     assert_allclose(expected_covariance, result.covariance_)
     assert_allclose(expected_means, result.location_)
@@ -49,7 +58,7 @@ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype):
     X = np.array([[1, 2], [3, 6]])
     X = X.astype(dtype)
     X_split = np.array_split(X, 2)
-    inccov = IncrementalEmpiricalCovariance()
+    inccov = IncrementalEmpiricalCovariance(assume_centered=assume_centered)
     for i in range(2):
         X_split_df = _convert_to_dataframe(
@@ -57,8 +66,12 @@ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, dtype):
         )
         result = inccov.partial_fit(X_split_df)
-    expected_covariance = np.array([[1, 2], [2, 4]])
-    expected_means = np.array([2, 4])
+    if assume_centered:
+        expected_covariance = np.array([[5, 10], [10, 20]])
+        expected_means = np.array([0, 0])
+    else:
+        expected_covariance = np.array([[1, 2], [2, 4]])
+        expected_means = np.array([2, 4])
     assert_allclose(expected_covariance, result.covariance_)
     assert_allclose(expected_means, result.location_)
@@ -87,9 +100,9 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, batch_size, dtype):
 @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
-@pytest.mark.parametrize("num_batches", [2, 4, 6, 8, 10])
-@pytest.mark.parametrize("row_count", [100, 1000, 2000])
-@pytest.mark.parametrize("column_count", [10, 100, 200])
+@pytest.mark.parametrize("num_batches", [2, 10])
+@pytest.mark.parametrize("row_count", [100, 1000])
+@pytest.mark.parametrize("column_count", [10, 100])
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
 def test_sklearnex_partial_fit_on_random_data(
     dataframe, queue, num_batches, row_count, column_count, dtype
@@ -117,12 +130,13 @@ def test_sklearnex_partial_fit_on_random_data(
 @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
-@pytest.mark.parametrize("num_batches", [2, 4, 6, 8, 10])
-@pytest.mark.parametrize("row_count", [100, 1000, 2000])
-@pytest.mark.parametrize("column_count", [10, 100, 200])
+@pytest.mark.parametrize("num_batches", [2, 10])
+@pytest.mark.parametrize("row_count", [100, 1000])
+@pytest.mark.parametrize("column_count", [10, 100])
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("assume_centered", [True, False])
 def test_sklearnex_fit_on_random_data(
-    dataframe, queue, num_batches, row_count, column_count, dtype
+    dataframe, queue, num_batches, row_count, column_count, dtype, assume_centered
 ):
     from sklearnex.covariance import IncrementalEmpiricalCovariance
@@ -132,12 +146,35 @@ def test_sklearnex_fit_on_random_data(
     X = X.astype(dtype)
     X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
     batch_size = row_count // num_batches
-    inccov = IncrementalEmpiricalCovariance(batch_size=batch_size)
+    inccov = IncrementalEmpiricalCovariance(
+        batch_size=batch_size, assume_centered=assume_centered
+    )
     result = inccov.fit(X_df)
-    expected_covariance = np.cov(X.T, bias=1)
-    expected_means = np.mean(X, axis=0)
+    if assume_centered:
+        expected_covariance = np.dot(X.T, X) / X.shape[0]
+        expected_means = np.zeros_like(X[0])
+    else:
+        expected_covariance = np.cov(X.T, bias=1)
+        expected_means = np.mean(X, axis=0)
     assert_allclose(expected_covariance, result.covariance_, atol=1e-6)
     assert_allclose(expected_means, result.location_, atol=1e-6)
+# Monkeypatch IncrementalEmpiricalCovariance into relevant sklearn.covariance tests
+@pytest.mark.allow_sklearn_fallback
+@pytest.mark.parametrize(
+    "sklearn_test",
+    [
+        test_covariance,
+        test_EmpiricalCovariance_validates_mahalanobis,
+    ],
+)
+def test_IncrementalEmpiricalCovariance_against_sklearn(monkeypatch, sklearn_test):
+    from sklearnex.covariance import IncrementalEmpiricalCovariance
+    class_name = ".".join([sklearn_test.__module__, "EmpiricalCovariance"])
+    monkeypatch.setattr(class_name, IncrementalEmpiricalCovariance)
+    sklearn_test()

sklearnex/decomposition/pca.py CHANGED Viewed

@@ -21,6 +21,7 @@ from daal4py.sklearn._utils import daal_check_version
 if daal_check_version((2024, "P", 100)):
     import numbers
     from math import sqrt
+    from warnings import warn
     import numpy as np
     from scipy.sparse import issparse
@@ -35,9 +36,13 @@ if daal_check_version((2024, "P", 100)):
     if sklearn_check_version("1.1") and not sklearn_check_version("1.2"):
         from sklearn.utils import check_scalar
+    if sklearn_check_version("1.2"):
+        from sklearn.utils._param_validation import StrOptions
     from sklearn.decomposition import PCA as sklearn_PCA
     from onedal.decomposition import PCA as onedal_PCA
+    from sklearnex.utils import get_namespace
     @control_n_jobs(decorated_methods=["fit", "transform", "fit_transform"])
     class PCA(sklearn_PCA):
@@ -45,6 +50,16 @@ if daal_check_version((2024, "P", 100)):
         if sklearn_check_version("1.2"):
             _parameter_constraints: dict = {**sklearn_PCA._parameter_constraints}
+            # "onedal_svd" solver uses oneDAL's PCA-SVD algorithm
+            # and required for testing purposes to fully enable it in future.
+            # "covariance_eigh" solver is added for ability to explicitly request
+            # oneDAL's PCA-Covariance algorithm using any sklearn version < 1.5.
+            _parameter_constraints["svd_solver"] = [
+                StrOptions(
+                    _parameter_constraints["svd_solver"][0].options
+                    | {"onedal_svd", "covariance_eigh"}
+                )
+            ]
         if sklearn_check_version("1.1"):
@@ -95,6 +110,7 @@ if daal_check_version((2024, "P", 100)):
             self._fit(X)
             return self
+        @wrap_output_data
         def _fit(self, X):
             if sklearn_check_version("1.2"):
                 self._validate_params()
@@ -106,7 +122,7 @@ if daal_check_version((2024, "P", 100)):
                     target_type=numbers.Integral,
                 )
-            U, S, Vt = dispatch(
+            return dispatch(
                 self,
                 "fit",
                 {
@@ -115,7 +131,6 @@ if daal_check_version((2024, "P", 100)):
                 },
                 X,
             )
-            return U, S, Vt
         def _onedal_fit(self, X, queue=None):
             X = self._validate_data(
@@ -128,7 +143,7 @@ if daal_check_version((2024, "P", 100)):
             onedal_params = {
                 "n_components": self.n_components,
                 "is_deterministic": True,
-                "method": "cov",
+                "method": "svd" if self._fit_svd_solver == "onedal_svd" else "cov",
                 "whiten": self.whiten,
             }
             self._onedal_estimator = onedal_PCA(**onedal_params)
@@ -139,7 +154,13 @@ if daal_check_version((2024, "P", 100)):
             S = self.singular_values_
             Vt = self.components_
-            return U, S, Vt
+            if sklearn_check_version("1.5"):
+                xp, _ = get_namespace(X)
+                x_is_centered = not self.copy
+                return U, S, Vt, X, x_is_centered, xp
+            else:
+                return U, S, Vt
         @wrap_output_data
         def transform(self, X):
@@ -155,34 +176,39 @@ if daal_check_version((2024, "P", 100)):
         def _onedal_transform(self, X, queue=None):
             check_is_fitted(self)
+            if sklearn_check_version("1.0"):
+                self._check_feature_names(X, reset=False)
             X = self._validate_data(
                 X,
                 dtype=[np.float64, np.float32],
                 reset=False,
             )
             self._validate_n_features_in_after_fitting(X)
-            if sklearn_check_version("1.0"):
-                self._check_feature_names(X, reset=False)
             return self._onedal_estimator.predict(X, queue=queue)
-        @wrap_output_data
         def fit_transform(self, X, y=None):
-            U, S, Vt = self._fit(X)
-            if U is None:
-                # oneDAL PCA was fit
-                X_transformed = self._onedal_transform(X)
-                return X_transformed
+            if sklearn_check_version("1.5"):
+                U, S, Vt, X_fit, x_is_centered, xp = self._fit(X)
             else:
+                U, S, Vt = self._fit(X)
+                X_fit = X
+            if hasattr(self, "_onedal_estimator"):
+                # oneDAL PCA was fit
+                return self.transform(X)
+            elif U is not None:
                 # Scikit-learn PCA was fit
                 U = U[:, : self.n_components_]
                 if self.whiten:
-                    U *= sqrt(X.shape[0] - 1)
+                    U *= sqrt(X_fit.shape[0] - 1)
                 else:
                     U *= S[: self.n_components_]
                 return U
+            else:
+                # Scikit-learn PCA["covariance_eigh"] was fit
+                return self._transform(X_fit, xp, x_is_centered=x_is_centered)
         def _onedal_supported(self, method_name, X):
             class_name = self.__class__.__name__
@@ -200,7 +226,13 @@ if daal_check_version((2024, "P", 100)):
                         ),
                         (
                             self._is_solver_compatible_with_onedal(shape_tuple),
-                            f"Only 'full' svd solver is supported.",
+                            (
+                                "Only 'covariance_eigh' and 'onedal_svd' "
+                                "solvers are supported."
+                                if sklearn_check_version("1.5")
+                                else "Only 'full', 'covariance_eigh' and 'onedal_svd' "
+                                "solvers are supported."
+                            ),
                         ),
                         (not issparse(X), "oneDAL PCA does not support sparse data"),
                     ]
@@ -255,7 +287,13 @@ if daal_check_version((2024, "P", 100)):
             if self._fit_svd_solver == "auto":
                 if sklearn_check_version("1.1"):
-                    if max(shape_tuple) <= 500 or n_components == "mle":
+                    if (
+                        sklearn_check_version("1.5")
+                        and shape_tuple[1] <= 1_000
+                        and shape_tuple[0] >= 10 * shape_tuple[1]
+                    ):
+                        self._fit_svd_solver = "covariance_eigh"
+                    elif max(shape_tuple) <= 500 or n_components == "mle":
                         self._fit_svd_solver = "full"
                     elif 1 <= n_components < 0.8 * n_sf_min:
                         self._fit_svd_solver = "randomized"
@@ -289,7 +327,23 @@ if daal_check_version((2024, "P", 100)):
                         else:
                             self._fit_svd_solver = "full"
-            if self._fit_svd_solver == "full":
+            # Use oneDAL in next cases:
+            # 1. oneDAL SVD solver is explicitly set
+            # 2. solver is set or dispatched to "covariance_eigh"
+            # 3. solver is set or dispatched to "full" and sklearn version < 1.5
+            # 4. solver is set to "auto" and dispatched to "full"
+            if self._fit_svd_solver in ["onedal_svd", "covariance_eigh"]:
+                return True
+            elif not sklearn_check_version("1.5") and self._fit_svd_solver == "full":
+                self._fit_svd_solver = "covariance_eigh"
+                return True
+            elif self.svd_solver == "auto" and self._fit_svd_solver == "full":
+                warn(
+                    "Sklearnex always uses `covariance_eigh` solver instead of `full` "
+                    "when `svd_solver` parameter is set to `auto` "
+                    "for performance purposes."
+                )
+                self._fit_svd_solver = "covariance_eigh"
                 return True
             else:
                 return False
@@ -298,11 +352,9 @@ if daal_check_version((2024, "P", 100)):
             self.n_samples_ = self._onedal_estimator.n_samples_
             if sklearn_check_version("1.2"):
                 self.n_features_in_ = self._onedal_estimator.n_features_
-            elif sklearn_check_version("0.24"):
-                self.n_features_ = self._onedal_estimator.n_features_
-                self.n_features_in_ = self._onedal_estimator.n_features_
             else:
                 self.n_features_ = self._onedal_estimator.n_features_
+                self.n_features_in_ = self._onedal_estimator.n_features_
             self.n_components_ = self._onedal_estimator.n_components_
             self.components_ = self._onedal_estimator.components_
             self.mean_ = self._onedal_estimator.mean_

sklearnex/decomposition/tests/test_pca.py CHANGED Viewed

@@ -41,10 +41,10 @@ def test_sklearnex_import(dataframe, queue):
         [3.6053038, 0.04224385],
     ]
-    pca = PCA(n_components=2, svd_solver="full")
+    pca = PCA(n_components=2, svd_solver="covariance_eigh")
     pca.fit(X)
     X_transformed = pca.transform(X)
-    X_fit_transformed = PCA(n_components=2, svd_solver="full").fit_transform(X)
+    X_fit_transformed = PCA(n_components=2, svd_solver="covariance_eigh").fit_transform(X)
     if daal_check_version((2024, "P", 100)):
         assert "sklearnex" in pca.__module__