PyPI - scikit-learn-intelex - Versions diffs - 2024.3.0__py312-none-manylinux1_x86_64.whl → 2024.5.0__py312-none-manylinux1_x86_64.whl - Mend

scikit-learn-intelex 2024.3.0__py312-none-manylinux1_x86_64.whl → 2024.5.0__py312-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (43) hide show

{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/METADATA +2 -2
{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/RECORD +43 -37
sklearnex/_device_offload.py +39 -5
sklearnex/basic_statistics/__init__.py +2 -1
sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +384 -0
sklearnex/covariance/incremental_covariance.py +217 -30
sklearnex/covariance/tests/test_incremental_covariance.py +54 -17
sklearnex/decomposition/pca.py +71 -19
sklearnex/decomposition/tests/test_pca.py +2 -2
sklearnex/dispatcher.py +33 -2
sklearnex/ensemble/_forest.py +73 -79
sklearnex/linear_model/__init__.py +5 -3
sklearnex/linear_model/incremental_linear.py +387 -0
sklearnex/linear_model/linear.py +275 -340
sklearnex/linear_model/logistic_regression.py +50 -9
sklearnex/linear_model/tests/test_incremental_linear.py +200 -0
sklearnex/linear_model/tests/test_linear.py +40 -5
sklearnex/neighbors/_lof.py +53 -36
sklearnex/neighbors/common.py +4 -1
sklearnex/neighbors/knn_classification.py +37 -122
sklearnex/neighbors/knn_regression.py +10 -117
sklearnex/neighbors/knn_unsupervised.py +6 -78
sklearnex/neighbors/tests/test_neighbors.py +2 -2
sklearnex/preview/cluster/k_means.py +5 -73
sklearnex/preview/covariance/covariance.py +6 -5
sklearnex/preview/covariance/tests/test_covariance.py +18 -5
sklearnex/svm/_common.py +4 -7
sklearnex/svm/nusvc.py +66 -50
sklearnex/svm/nusvr.py +3 -49
sklearnex/svm/svc.py +66 -51
sklearnex/svm/svr.py +3 -49
sklearnex/tests/_utils.py +34 -16
sklearnex/tests/test_memory_usage.py +5 -1
sklearnex/tests/test_n_jobs_support.py +12 -2
sklearnex/tests/test_patching.py +87 -58
sklearnex/tests/test_run_to_run_stability_tests.py +1 -1
sklearnex/utils/__init__.py +2 -1
sklearnex/utils/_namespace.py +97 -0
sklearnex/utils/tests/test_finite.py +89 -0
{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/LICENSE.txt +0 -0
{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/WHEEL +0 -0
{scikit_learn_intelex-2024.3.0.dist-info → scikit_learn_intelex-2024.5.0.dist-info}/top_level.txt +0 -0

sklearnex/neighbors/knn_classification.py CHANGED Viewed

@@ -14,129 +14,30 @@
 # limitations under the License.
 # ===============================================================================
-from daal4py.sklearn._n_jobs_support import control_n_jobs
-from daal4py.sklearn._utils import sklearn_check_version
-if not sklearn_check_version("1.2"):
-    from sklearn.neighbors._base import _check_weights
+from sklearn.metrics import accuracy_score
 from sklearn.neighbors._classification import (
     KNeighborsClassifier as sklearn_KNeighborsClassifier,
 )
 from sklearn.neighbors._unsupervised import NearestNeighbors as sklearn_NearestNeighbors
 from sklearn.utils.validation import _deprecate_positional_args, check_is_fitted
+from daal4py.sklearn._n_jobs_support import control_n_jobs
+from daal4py.sklearn._utils import sklearn_check_version
 from onedal.neighbors import KNeighborsClassifier as onedal_KNeighborsClassifier
 from .._device_offload import dispatch, wrap_output_data
 from .common import KNeighborsDispatchingBase
-if sklearn_check_version("0.24"):
-    class KNeighborsClassifier_(sklearn_KNeighborsClassifier):
-        if sklearn_check_version("1.2"):
-            _parameter_constraints: dict = {
-                **sklearn_KNeighborsClassifier._parameter_constraints
-            }
-        @_deprecate_positional_args
-        def __init__(
-            self,
-            n_neighbors=5,
-            *,
-            weights="uniform",
-            algorithm="auto",
-            leaf_size=30,
-            p=2,
-            metric="minkowski",
-            metric_params=None,
-            n_jobs=None,
-            **kwargs,
-        ):
-            super().__init__(
-                n_neighbors=n_neighbors,
-                algorithm=algorithm,
-                leaf_size=leaf_size,
-                metric=metric,
-                p=p,
-                metric_params=metric_params,
-                n_jobs=n_jobs,
-                **kwargs,
-            )
-            self.weights = (
-                weights if sklearn_check_version("1.0") else _check_weights(weights)
-            )
-elif sklearn_check_version("0.22"):
-    from sklearn.neighbors._base import (
-        SupervisedIntegerMixin as BaseSupervisedIntegerMixin,
-    )
-    class KNeighborsClassifier_(sklearn_KNeighborsClassifier, BaseSupervisedIntegerMixin):
-        @_deprecate_positional_args
-        def __init__(
-            self,
-            n_neighbors=5,
-            *,
-            weights="uniform",
-            algorithm="auto",
-            leaf_size=30,
-            p=2,
-            metric="minkowski",
-            metric_params=None,
-            n_jobs=None,
-            **kwargs,
-        ):
-            super().__init__(
-                n_neighbors=n_neighbors,
-                algorithm=algorithm,
-                leaf_size=leaf_size,
-                metric=metric,
-                p=p,
-                metric_params=metric_params,
-                n_jobs=n_jobs,
-                **kwargs,
-            )
-            self.weights = _check_weights(weights)
-else:
-    from sklearn.neighbors.base import (
-        SupervisedIntegerMixin as BaseSupervisedIntegerMixin,
-    )
-    class KNeighborsClassifier_(sklearn_KNeighborsClassifier, BaseSupervisedIntegerMixin):
-        @_deprecate_positional_args
-        def __init__(
-            self,
-            n_neighbors=5,
-            *,
-            weights="uniform",
-            algorithm="auto",
-            leaf_size=30,
-            p=2,
-            metric="minkowski",
-            metric_params=None,
-            n_jobs=None,
-            **kwargs,
-        ):
-            super().__init__(
-                n_neighbors=n_neighbors,
-                algorithm=algorithm,
-                leaf_size=leaf_size,
-                metric=metric,
-                p=p,
-                metric_params=metric_params,
-                n_jobs=n_jobs,
-                **kwargs,
-            )
-            self.weights = _check_weights(weights)
-@control_n_jobs(decorated_methods=["fit", "predict", "predict_proba", "kneighbors"])
-class KNeighborsClassifier(KNeighborsClassifier_, KNeighborsDispatchingBase):
+@control_n_jobs(
+    decorated_methods=["fit", "predict", "predict_proba", "kneighbors", "score"]
+)
+class KNeighborsClassifier(sklearn_KNeighborsClassifier, KNeighborsDispatchingBase):
     __doc__ = sklearn_KNeighborsClassifier.__doc__
     if sklearn_check_version("1.2"):
-        _parameter_constraints: dict = {**KNeighborsClassifier_._parameter_constraints}
+        _parameter_constraints: dict = {
+            **sklearn_KNeighborsClassifier._parameter_constraints
+        }
     if sklearn_check_version("1.0"):
@@ -192,7 +93,6 @@ class KNeighborsClassifier(KNeighborsClassifier_, KNeighborsDispatchingBase):
             )
     def fit(self, X, y):
-        self._fit_validation(X, y)
         dispatch(
             self,
             "fit",
@@ -235,6 +135,23 @@ class KNeighborsClassifier(KNeighborsClassifier_, KNeighborsDispatchingBase):
             X,
         )
+    @wrap_output_data
+    def score(self, X, y, sample_weight=None):
+        check_is_fitted(self)
+        if sklearn_check_version("1.0"):
+            self._check_feature_names(X, reset=False)
+        return dispatch(
+            self,
+            "score",
+            {
+                "onedal": self.__class__._onedal_score,
+                "sklearn": sklearn_KNeighborsClassifier.score,
+            },
+            X,
+            y,
+            sample_weight=sample_weight,
+        )
     @wrap_output_data
     def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
         check_is_fitted(self)
@@ -263,18 +180,10 @@ class KNeighborsClassifier(KNeighborsClassifier_, KNeighborsDispatchingBase):
             or getattr(self, "_tree", 0) is None
             and self._fit_method == "kd_tree"
         ):
-            if sklearn_check_version("0.24"):
-                sklearn_NearestNeighbors.fit(self, self._fit_X, getattr(self, "_y", None))
-            else:
-                sklearn_NearestNeighbors.fit(self, self._fit_X)
-        if sklearn_check_version("0.22"):
-            result = sklearn_NearestNeighbors.radius_neighbors(
-                self, X, radius, return_distance, sort_results
-            )
-        else:
-            result = sklearn_NearestNeighbors.radius_neighbors(
-                self, X, radius, return_distance
-            )
+            sklearn_NearestNeighbors.fit(self, self._fit_X, getattr(self, "_y", None))
+        result = sklearn_NearestNeighbors.radius_neighbors(
+            self, X, radius, return_distance, sort_results
+        )
         return result
@@ -313,6 +222,11 @@ class KNeighborsClassifier(KNeighborsClassifier_, KNeighborsDispatchingBase):
             X, n_neighbors, return_distance, queue=queue
         )
+    def _onedal_score(self, X, y, sample_weight=None, queue=None):
+        return accuracy_score(
+            y, self._onedal_predict(X, queue=queue), sample_weight=sample_weight
+        )
     def _save_attributes(self):
         self.classes_ = self._onedal_estimator.classes_
         self.n_features_in_ = self._onedal_estimator.n_features_in_
@@ -326,5 +240,6 @@ class KNeighborsClassifier(KNeighborsClassifier_, KNeighborsDispatchingBase):
     fit.__doc__ = sklearn_KNeighborsClassifier.fit.__doc__
     predict.__doc__ = sklearn_KNeighborsClassifier.predict.__doc__
     predict_proba.__doc__ = sklearn_KNeighborsClassifier.predict_proba.__doc__
+    score.__doc__ = sklearn_KNeighborsClassifier.score.__doc__
     kneighbors.__doc__ = sklearn_KNeighborsClassifier.kneighbors.__doc__
     radius_neighbors.__doc__ = sklearn_NearestNeighbors.radius_neighbors.__doc__

sklearnex/neighbors/knn_regression.py CHANGED Viewed

@@ -14,125 +14,27 @@
 # limitations under the License.
 # ==============================================================================
-from daal4py.sklearn._n_jobs_support import control_n_jobs
-from daal4py.sklearn._utils import sklearn_check_version
-if not sklearn_check_version("1.2"):
-    from sklearn.neighbors._base import _check_weights
 from sklearn.neighbors._regression import (
     KNeighborsRegressor as sklearn_KNeighborsRegressor,
 )
 from sklearn.neighbors._unsupervised import NearestNeighbors as sklearn_NearestNeighbors
 from sklearn.utils.validation import _deprecate_positional_args, check_is_fitted
+from daal4py.sklearn._n_jobs_support import control_n_jobs
+from daal4py.sklearn._utils import sklearn_check_version
 from onedal.neighbors import KNeighborsRegressor as onedal_KNeighborsRegressor
 from .._device_offload import dispatch, wrap_output_data
 from .common import KNeighborsDispatchingBase
-if sklearn_check_version("0.24"):
-    class KNeighborsRegressor_(sklearn_KNeighborsRegressor):
-        if sklearn_check_version("1.2"):
-            _parameter_constraints: dict = {
-                **sklearn_KNeighborsRegressor._parameter_constraints
-            }
-        @_deprecate_positional_args
-        def __init__(
-            self,
-            n_neighbors=5,
-            *,
-            weights="uniform",
-            algorithm="auto",
-            leaf_size=30,
-            p=2,
-            metric="minkowski",
-            metric_params=None,
-            n_jobs=None,
-            **kwargs,
-        ):
-            super().__init__(
-                n_neighbors=n_neighbors,
-                algorithm=algorithm,
-                leaf_size=leaf_size,
-                metric=metric,
-                p=p,
-                metric_params=metric_params,
-                n_jobs=n_jobs,
-                **kwargs,
-            )
-            self.weights = (
-                weights if sklearn_check_version("1.0") else _check_weights(weights)
-            )
-elif sklearn_check_version("0.22"):
-    from sklearn.neighbors._base import SupervisedFloatMixin as BaseSupervisedFloatMixin
-    class KNeighborsRegressor_(sklearn_KNeighborsRegressor, BaseSupervisedFloatMixin):
-        @_deprecate_positional_args
-        def __init__(
-            self,
-            n_neighbors=5,
-            *,
-            weights="uniform",
-            algorithm="auto",
-            leaf_size=30,
-            p=2,
-            metric="minkowski",
-            metric_params=None,
-            n_jobs=None,
-            **kwargs,
-        ):
-            super().__init__(
-                n_neighbors=n_neighbors,
-                algorithm=algorithm,
-                leaf_size=leaf_size,
-                metric=metric,
-                p=p,
-                metric_params=metric_params,
-                n_jobs=n_jobs,
-                **kwargs,
-            )
-            self.weights = _check_weights(weights)
-else:
-    from sklearn.neighbors.base import SupervisedFloatMixin as BaseSupervisedFloatMixin
-    class KNeighborsRegressor_(sklearn_KNeighborsRegressor, BaseSupervisedFloatMixin):
-        @_deprecate_positional_args
-        def __init__(
-            self,
-            n_neighbors=5,
-            *,
-            weights="uniform",
-            algorithm="auto",
-            leaf_size=30,
-            p=2,
-            metric="minkowski",
-            metric_params=None,
-            n_jobs=None,
-            **kwargs,
-        ):
-            super().__init__(
-                n_neighbors=n_neighbors,
-                algorithm=algorithm,
-                leaf_size=leaf_size,
-                metric=metric,
-                p=p,
-                metric_params=metric_params,
-                n_jobs=n_jobs,
-                **kwargs,
-            )
-            self.weights = _check_weights(weights)
 @control_n_jobs(decorated_methods=["fit", "predict", "kneighbors"])
-class KNeighborsRegressor(KNeighborsRegressor_, KNeighborsDispatchingBase):
+class KNeighborsRegressor(sklearn_KNeighborsRegressor, KNeighborsDispatchingBase):
     __doc__ = sklearn_KNeighborsRegressor.__doc__
     if sklearn_check_version("1.2"):
-        _parameter_constraints: dict = {**KNeighborsRegressor_._parameter_constraints}
+        _parameter_constraints: dict = {
+            **sklearn_KNeighborsRegressor._parameter_constraints
+        }
     if sklearn_check_version("1.0"):
@@ -188,7 +90,6 @@ class KNeighborsRegressor(KNeighborsRegressor_, KNeighborsDispatchingBase):
             )
     def fit(self, X, y):
-        self._fit_validation(X, y)
         dispatch(
             self,
             "fit",
@@ -244,18 +145,10 @@ class KNeighborsRegressor(KNeighborsRegressor_, KNeighborsDispatchingBase):
             or getattr(self, "_tree", 0) is None
             and self._fit_method == "kd_tree"
         ):
-            if sklearn_check_version("0.24"):
-                sklearn_NearestNeighbors.fit(self, self._fit_X, getattr(self, "_y", None))
-            else:
-                sklearn_NearestNeighbors.fit(self, self._fit_X)
-        if sklearn_check_version("0.22"):
-            result = sklearn_NearestNeighbors.radius_neighbors(
-                self, X, radius, return_distance, sort_results
-            )
-        else:
-            result = sklearn_NearestNeighbors.radius_neighbors(
-                self, X, radius, return_distance
-            )
+            sklearn_NearestNeighbors.fit(self, self._fit_X, getattr(self, "_y", None))
+        result = sklearn_NearestNeighbors.radius_neighbors(
+            self, X, radius, return_distance, sort_results
+        )
         return result

sklearnex/neighbors/knn_unsupervised.py CHANGED Viewed

@@ -14,12 +14,6 @@
 # limitations under the License.
 # ===============================================================================
-try:
-    from packaging.version import Version
-except ImportError:
-    from distutils.version import LooseVersion as Version
-from sklearn import __version__ as sklearn_version
 from sklearn.neighbors._unsupervised import NearestNeighbors as sklearn_NearestNeighbors
 from sklearn.utils.validation import _deprecate_positional_args, check_is_fitted
@@ -30,69 +24,12 @@ from onedal.neighbors import NearestNeighbors as onedal_NearestNeighbors
 from .._device_offload import dispatch, wrap_output_data
 from .common import KNeighborsDispatchingBase
-if sklearn_check_version("0.22") and Version(sklearn_version) < Version("0.23"):
-    class NearestNeighbors_(sklearn_NearestNeighbors):
-        def __init__(
-            self,
-            n_neighbors=5,
-            radius=1.0,
-            algorithm="auto",
-            leaf_size=30,
-            metric="minkowski",
-            p=2,
-            metric_params=None,
-            n_jobs=None,
-        ):
-            super().__init__(
-                n_neighbors=n_neighbors,
-                radius=radius,
-                algorithm=algorithm,
-                leaf_size=leaf_size,
-                metric=metric,
-                p=p,
-                metric_params=metric_params,
-                n_jobs=n_jobs,
-            )
-else:
-    class NearestNeighbors_(sklearn_NearestNeighbors):
-        if sklearn_check_version("1.2"):
-            _parameter_constraints: dict = {
-                **sklearn_NearestNeighbors._parameter_constraints
-            }
-        @_deprecate_positional_args
-        def __init__(
-            self,
-            *,
-            n_neighbors=5,
-            radius=1.0,
-            algorithm="auto",
-            leaf_size=30,
-            metric="minkowski",
-            p=2,
-            metric_params=None,
-            n_jobs=None,
-        ):
-            super().__init__(
-                n_neighbors=n_neighbors,
-                radius=radius,
-                algorithm=algorithm,
-                leaf_size=leaf_size,
-                metric=metric,
-                p=p,
-                metric_params=metric_params,
-                n_jobs=n_jobs,
-            )
 @control_n_jobs(decorated_methods=["fit", "kneighbors"])
-class NearestNeighbors(NearestNeighbors_, KNeighborsDispatchingBase):
+class NearestNeighbors(sklearn_NearestNeighbors, KNeighborsDispatchingBase):
     __doc__ = sklearn_NearestNeighbors.__doc__
     if sklearn_check_version("1.2"):
-        _parameter_constraints: dict = {**NearestNeighbors_._parameter_constraints}
+        _parameter_constraints: dict = {**sklearn_NearestNeighbors._parameter_constraints}
     @_deprecate_positional_args
     def __init__(
@@ -118,7 +55,6 @@ class NearestNeighbors(NearestNeighbors_, KNeighborsDispatchingBase):
         )
     def fit(self, X, y=None):
-        self._fit_validation(X, y)
         dispatch(
             self,
             "fit",
@@ -159,18 +95,10 @@ class NearestNeighbors(NearestNeighbors_, KNeighborsDispatchingBase):
             or getattr(self, "_tree", 0) is None
             and self._fit_method == "kd_tree"
         ):
-            if sklearn_check_version("0.24"):
-                sklearn_NearestNeighbors.fit(self, self._fit_X, getattr(self, "_y", None))
-            else:
-                sklearn_NearestNeighbors.fit(self, self._fit_X)
-        if sklearn_check_version("0.22"):
-            result = sklearn_NearestNeighbors.radius_neighbors(
-                self, X, radius, return_distance, sort_results
-            )
-        else:
-            result = sklearn_NearestNeighbors.radius_neighbors(
-                self, X, radius, return_distance
-            )
+            sklearn_NearestNeighbors.fit(self, self._fit_X, getattr(self, "_y", None))
+        result = sklearn_NearestNeighbors.radius_neighbors(
+            self, X, radius, return_distance, sort_results
+        )
         return result

sklearnex/neighbors/tests/test_neighbors.py CHANGED Viewed

@@ -47,9 +47,9 @@ def test_sklearnex_import_knn_regression(dataframe, queue):
     y = _convert_to_dataframe([0, 0, 1, 1], sycl_queue=queue, target_df=dataframe)
     neigh = KNeighborsRegressor(n_neighbors=2).fit(X, y)
     y_test = _convert_to_dataframe([[1.5]], sycl_queue=queue, target_df=dataframe)
-    pred = _as_numpy(neigh.predict(y_test))
+    pred = _as_numpy(neigh.predict(y_test)).squeeze()
     assert "sklearnex" in neigh.__module__
-    assert_allclose(pred, [0.5])
+    assert_allclose(pred, 0.5)
 @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())

sklearnex/preview/cluster/k_means.py CHANGED Viewed

@@ -172,24 +172,6 @@ if daal_check_version((2023, "P", 200)):
             return patching_status
         def fit(self, X, y=None, sample_weight=None):
-            """Compute k-means clustering.
-            Parameters
-            ----------
-            X : array-like or sparse matrix, shape=(n_samples, n_features)
-                Training instances to cluster. It must be noted that the data
-                will be converted to C ordering, which will cause a memory
-                copy if the given data is not C-contiguous.
-            y : Ignored
-                not used, present here for API consistency by convention.
-            sample_weight : array-like, shape (n_samples,), optional
-                The weights for each observation in X. If None, all observations
-                are assigned equal weight (default: None)
-            """
             if sklearn_check_version("1.0"):
                 self._check_feature_names(X, reset=True)
             if sklearn_check_version("1.2"):
@@ -257,24 +239,6 @@ if daal_check_version((2023, "P", 200)):
         @wrap_output_data
         def predict(self, X):
-            """Compute k-means clustering.
-            Parameters
-            ----------
-            X : array-like or sparse matrix, shape=(n_samples, n_features)
-                Training instances to cluster. It must be noted that the data
-                will be converted to C ordering, which will cause a memory
-                copy if the given data is not C-contiguous.
-            y : Ignored
-                not used, present here for API consistency by convention.
-            sample_weight : array-like, shape (n_samples,), optional
-                The weights for each observation in X. If None, all observations
-                are assigned equal weight (default: None)
-            """
             if sklearn_check_version("1.0"):
                 self._check_feature_names(X, reset=True)
             if sklearn_check_version("1.2"):
@@ -317,52 +281,20 @@ if daal_check_version((2023, "P", 200)):
         @wrap_output_data
         def fit_transform(self, X, y=None, sample_weight=None):
-            """Compute clustering and transform X to cluster-distance space.
-            Equivalent to fit(X).transform(X), but more efficiently implemented.
-            Parameters
-            ----------
-            X : {array-like, sparse matrix} of shape (n_samples, n_features)
-                New data to transform.
-            y : Ignored
-                Not used, present here for API consistency by convention.
-            sample_weight : array-like of shape (n_samples,), default=None
-                The weights for each observation in X. If None, all observations
-                are assigned equal weight.
-            Returns
-            -------
-            X_new : ndarray of shape (n_samples, n_clusters)
-                X transformed in the new space.
-            """
             return self.fit(X, sample_weight=sample_weight)._transform(X)
         @wrap_output_data
         def transform(self, X):
-            """Transform X to a cluster-distance space.
-            In the new space, each dimension is the distance to the cluster
-            centers. Note that even if X is sparse, the array returned by
-            `transform` will typically be dense.
-            Parameters
-            ----------
-            X : {array-like, sparse matrix} of shape (n_samples, n_features)
-                New data to transform.
-            Returns
-            -------
-            X_new : ndarray of shape (n_samples, n_clusters)
-                X transformed in the new space.
-            """
             check_is_fitted(self)
             X = self._check_test_data(X)
             return self._transform(X)
+        fit.__doc__ = sklearn_KMeans.fit.__doc__
+        predict.__doc__ = sklearn_KMeans.predict.__doc__
+        transform.__doc__ = sklearn_KMeans.transform.__doc__
+        fit_transform.__doc__ = sklearn_KMeans.fit_transform.__doc__
 else:
     from daal4py.sklearn.cluster import KMeans

sklearnex/preview/covariance/covariance.py CHANGED Viewed

@@ -22,7 +22,7 @@ from sklearn.covariance import EmpiricalCovariance as sklearn_EmpiricalCovarianc
 from sklearn.utils import check_array
 from daal4py.sklearn._n_jobs_support import control_n_jobs
-from daal4py.sklearn._utils import sklearn_check_version
+from daal4py.sklearn._utils import daal_check_version, sklearn_check_version
 from onedal.common.hyperparameters import get_hyperparameters
 from onedal.covariance import EmpiricalCovariance as onedal_EmpiricalCovariance
 from sklearnex import config_context
@@ -44,6 +44,10 @@ class EmpiricalCovariance(sklearn_EmpiricalCovariance):
     def _save_attributes(self):
         assert hasattr(self, "_onedal_estimator")
+        if not daal_check_version((2024, "P", 400)) and self.assume_centered:
+            location = self._onedal_estimator.location_[None, :]
+            self._onedal_estimator.covariance_ += np.dot(location.T, location)
+            self._onedal_estimator.location_ = np.zeros_like(np.squeeze(location))
         self._set_covariance(self._onedal_estimator.covariance_)
         self.location_ = self._onedal_estimator.location_
@@ -58,6 +62,7 @@ class EmpiricalCovariance(sklearn_EmpiricalCovariance):
         onedal_params = {
             "method": "dense",
             "bias": True,
+            "assume_centered": self.assume_centered,
         }
         self._onedal_estimator = self._onedal_covariance(**onedal_params)
@@ -73,10 +78,6 @@ class EmpiricalCovariance(sklearn_EmpiricalCovariance):
             (X,) = data
             patching_status.and_conditions(
                 [
-                    (
-                        self.assume_centered == False,
-                        "assume_centered parameter is not supported on oneDAL side",
-                    ),
                     (not sp.issparse(X), "X is sparse. Sparse input is not supported."),
                 ]
             )

sklearnex/preview/covariance/tests/test_covariance.py CHANGED Viewed

@@ -27,27 +27,40 @@ from onedal.tests.utils._dataframes_support import (
 @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
 @pytest.mark.parametrize("macro_block", [None, 1024])
-def test_sklearnex_import_covariance(dataframe, queue, macro_block):
+@pytest.mark.parametrize("assume_centered", [True, False])
+def test_sklearnex_import_covariance(dataframe, queue, macro_block, assume_centered):
     from sklearnex.preview.covariance import EmpiricalCovariance
     X = np.array([[0, 1], [0, 1]])
     X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
-    empcov = EmpiricalCovariance()
+    empcov = EmpiricalCovariance(assume_centered=assume_centered)
     if daal_check_version((2024, "P", 0)) and macro_block is not None:
         hparams = empcov.get_hyperparameters("fit")
         hparams.cpu_macro_block = macro_block
     result = empcov.fit(X)
     expected_covariance = np.array([[0, 0], [0, 0]])
-    expected_means = np.array([0, 1])
+    expected_means = np.array([0, 0])
+    if assume_centered:
+        expected_covariance = np.array([[0, 0], [0, 1]])
+    else:
+        expected_means = np.array([0, 1])
     assert_allclose(expected_covariance, result.covariance_)
     assert_allclose(expected_means, result.location_)
     X = np.array([[1, 2], [3, 6]])
     X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
     result = empcov.fit(X)
-    expected_covariance = np.array([[1, 2], [2, 4]])
-    expected_means = np.array([2, 4])
+    if assume_centered:
+        expected_covariance = np.array([[5, 10], [10, 20]])
+    else:
+        expected_covariance = np.array([[1, 2], [2, 4]])
+        expected_means = np.array([2, 4])
     assert_allclose(expected_covariance, result.covariance_)
     assert_allclose(expected_means, result.location_)