PyPI - scikit-learn-intelex - Versions diffs - 2024.1.0__py38-none-win_amd64.whl → 2024.2.0__py38-none-win_amd64.whl - Mend

scikit-learn-intelex 2024.1.0__py38-none-win_amd64.whl → 2024.2.0__py38-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (107) hide show

scikit_learn_intelex-2024.2.0.data/data/Lib/site-packages/sklearnex/linear_model/tests/test_logreg.py ADDED Viewed

@@ -0,0 +1,93 @@
+# ===============================================================================
+# Copyright 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose
+from sklearn.datasets import load_breast_cancer, load_iris
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
+from daal4py.sklearn._utils import daal_check_version
+from onedal.tests.utils._dataframes_support import (
+    _as_numpy,
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
+def prepare_input(X, y, dataframe, queue):
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, train_size=0.8, random_state=42
+    )
+    X_train = _convert_to_dataframe(X_train, sycl_queue=queue, target_df=dataframe)
+    y_train = _convert_to_dataframe(y_train, sycl_queue=queue, target_df=dataframe)
+    X_test = _convert_to_dataframe(X_test, sycl_queue=queue, target_df=dataframe)
+    return X_train, X_test, y_train, y_test
+@pytest.mark.parametrize(
+    "dataframe,queue",
+    get_dataframes_and_queues(device_filter_="cpu"),
+)
+def test_sklearnex_multiclass_classification(dataframe, queue):
+    from sklearnex.linear_model import LogisticRegression
+    X, y = load_iris(return_X_y=True)
+    X_train, X_test, y_train, y_test = prepare_input(X, y, dataframe, queue)
+    logreg = LogisticRegression(fit_intercept=True, solver="lbfgs", max_iter=200).fit(
+        X_train, y_train
+    )
+    if daal_check_version((2024, "P", 1)):
+        assert "sklearnex" in logreg.__module__
+    else:
+        assert "daal4py" in logreg.__module__
+    y_pred = _as_numpy(logreg.predict(X_test))
+    assert accuracy_score(y_test, y_pred) > 0.99
+@pytest.mark.parametrize(
+    "dataframe,queue",
+    get_dataframes_and_queues(),
+)
+def test_sklearnex_binary_classification(dataframe, queue):
+    from sklearnex.linear_model import LogisticRegression
+    X, y = load_breast_cancer(return_X_y=True)
+    X_train, X_test, y_train, y_test = prepare_input(X, y, dataframe, queue)
+    logreg = LogisticRegression(fit_intercept=True, solver="newton-cg", max_iter=100).fit(
+        X_train, y_train
+    )
+    if daal_check_version((2024, "P", 1)):
+        assert "sklearnex" in logreg.__module__
+    else:
+        assert "daal4py" in logreg.__module__
+    if (
+        dataframe != "numpy"
+        and queue is not None
+        and queue.sycl_device.is_gpu
+        and daal_check_version((2024, "P", 1))
+    ):
+        # fit was done on gpu
+        assert hasattr(logreg, "_onedal_estimator")
+    y_pred = _as_numpy(logreg.predict(X_test))
+    assert accuracy_score(y_test, y_pred) > 0.95

{scikit_learn_intelex-2024.1.0.data → scikit_learn_intelex-2024.2.0.data}/data/Lib/site-packages/sklearnex/neighbors/__init__.py RENAMED Viewed

@@ -14,10 +14,10 @@
 # limitations under the License.
 # ===============================================================================
+from ._lof import LocalOutlierFactor
 from .knn_classification import KNeighborsClassifier
 from .knn_regression import KNeighborsRegressor
 from .knn_unsupervised import NearestNeighbors
-from .lof import LocalOutlierFactor
 __all__ = [
     "KNeighborsClassifier",

scikit_learn_intelex-2024.2.0.data/data/Lib/site-packages/sklearnex/neighbors/_lof.py ADDED Viewed

@@ -0,0 +1,167 @@
+# ===============================================================================
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+import warnings
+import numpy as np
+from sklearn.neighbors import LocalOutlierFactor as sklearn_LocalOutlierFactor
+from sklearn.utils.metaestimators import available_if
+from sklearn.utils.validation import check_is_fitted
+from daal4py.sklearn._n_jobs_support import control_n_jobs
+from daal4py.sklearn._utils import sklearn_check_version
+from .._device_offload import dispatch, wrap_output_data
+from .common import KNeighborsDispatchingBase
+from .knn_unsupervised import NearestNeighbors
+@control_n_jobs(decorated_methods=["fit", "kneighbors"])
+class LocalOutlierFactor(KNeighborsDispatchingBase, sklearn_LocalOutlierFactor):
+    __doc__ = (
+        sklearn_LocalOutlierFactor.__doc__
+        + "\n NOTE: When X=None, methods kneighbors, kneighbors_graph, and predict will"
+        + "\n only output numpy arrays. In that case, the only way to offload to gpu"
+        + "\n is to use a global queue (e.g. using config_context)"
+    )
+    if sklearn_check_version("1.2"):
+        _parameter_constraints: dict = {
+            **sklearn_LocalOutlierFactor._parameter_constraints
+        }
+    # Only certain methods should be taken from knn to prevent code
+    # duplication. Inheriting would yield a complicated inheritance
+    # structure and violate the sklearn inheritance path.
+    _save_attributes = NearestNeighbors._save_attributes
+    _onedal_knn_fit = NearestNeighbors._onedal_fit
+    _onedal_kneighbors = NearestNeighbors._onedal_kneighbors
+    def _onedal_fit(self, X, y, queue=None):
+        if sklearn_check_version("1.2"):
+            self._validate_params()
+        self._onedal_knn_fit(X, y, queue)
+        if self.contamination != "auto":
+            if not (0.0 < self.contamination <= 0.5):
+                raise ValueError(
+                    "contamination must be in (0, 0.5], " "got: %f" % self.contamination
+                )
+        n_samples = self.n_samples_fit_
+        if self.n_neighbors > n_samples:
+            warnings.warn(
+                "n_neighbors (%s) is greater than the "
+                "total number of samples (%s). n_neighbors "
+                "will be set to (n_samples - 1) for estimation."
+                % (self.n_neighbors, n_samples)
+            )
+        self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))
+        (
+            self._distances_fit_X_,
+            _neighbors_indices_fit_X_,
+        ) = self._onedal_kneighbors(n_neighbors=self.n_neighbors_, queue=queue)
+        # Sklearn includes a check for float32 at this point which may not be
+        # necessary for onedal
+        self._lrd = self._local_reachability_density(
+            self._distances_fit_X_, _neighbors_indices_fit_X_
+        )
+        # Compute lof score over training samples to define offset_:
+        lrd_ratios_array = self._lrd[_neighbors_indices_fit_X_] / self._lrd[:, np.newaxis]
+        self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)
+        if self.contamination == "auto":
+            # inliers score around -1 (the higher, the less abnormal).
+            self.offset_ = -1.5
+        else:
+            self.offset_ = np.percentile(
+                self.negative_outlier_factor_, 100.0 * self.contamination
+            )
+        return self
+    def fit(self, X, y=None):
+        self._fit_validation(X, y)
+        result = dispatch(
+            self,
+            "fit",
+            {
+                "onedal": self.__class__._onedal_fit,
+                "sklearn": sklearn_LocalOutlierFactor.fit,
+            },
+            X,
+            None,
+        )
+        return result
+    # Subtle order change to remove check_array and preserve dpnp and
+    # dpctl conformance. decision_function will return a dpnp or dpctl
+    # instance via kneighbors and an equivalent check_array exists in
+    # that call already in sklearn so no loss of functionality occurs
+    def _predict(self, X=None):
+        check_is_fitted(self)
+        if X is not None:
+            output = self.decision_function(X) < 0
+            is_inlier = np.ones(output.shape[0], dtype=int)
+            is_inlier[output] = -1
+        else:
+            is_inlier = np.ones(self.n_samples_fit_, dtype=int)
+            is_inlier[self.negative_outlier_factor_ < self.offset_] = -1
+        return is_inlier
+    # This had to be done because predict loses the queue when no
+    # argument is given and it is a dpctl tensor or dpnp array.
+    # This would cause issues in fit_predict. Also, available_if
+    # is hard to unwrap, and this is the most straighforward way.
+    @available_if(sklearn_LocalOutlierFactor._check_novelty_fit_predict)
+    @wrap_output_data
+    def fit_predict(self, X, y=None):
+        return self.fit(X)._predict()
+    @available_if(sklearn_LocalOutlierFactor._check_novelty_predict)
+    @wrap_output_data
+    def predict(self, X=None):
+        return self._predict(X)
+    @wrap_output_data
+    def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
+        check_is_fitted(self)
+        if sklearn_check_version("1.0") and X is not None:
+            self._check_feature_names(X, reset=False)
+        return dispatch(
+            self,
+            "kneighbors",
+            {
+                "onedal": self.__class__._onedal_kneighbors,
+                "sklearn": sklearn_LocalOutlierFactor.kneighbors,
+            },
+            X,
+            n_neighbors=n_neighbors,
+            return_distance=return_distance,
+        )
+    fit.__doc__ = sklearn_LocalOutlierFactor.fit.__doc__
+    fit_predict.__doc__ = sklearn_LocalOutlierFactor.fit_predict.__doc__
+    predict.__doc__ = sklearn_LocalOutlierFactor.predict.__doc__
+    kneighbors.__doc__ = sklearn_LocalOutlierFactor.kneighbors.__doc__

{scikit_learn_intelex-2024.1.0.data → scikit_learn_intelex-2024.2.0.data}/data/Lib/site-packages/sklearnex/neighbors/knn_classification.py RENAMED Viewed

@@ -20,7 +20,8 @@ from sklearn.neighbors._ball_tree import BallTree
 from sklearn.neighbors._base import NeighborsBase as sklearn_NeighborsBase
 from sklearn.neighbors._kd_tree import KDTree
-from daal4py.sklearn._utils import control_n_jobs, run_with_n_jobs, sklearn_check_version
+from daal4py.sklearn._n_jobs_support import control_n_jobs
+from daal4py.sklearn._utils import sklearn_check_version
 if not sklearn_check_version("1.2"):
     from sklearn.neighbors._base import _check_weights
@@ -140,7 +141,7 @@ else:
             self.weights = _check_weights(weights)
-@control_n_jobs
+@control_n_jobs(decorated_methods=["fit", "predict", "predict_proba", "kneighbors"])
 class KNeighborsClassifier(KNeighborsClassifier_, KNeighborsDispatchingBase):
     if sklearn_check_version("1.2"):
         _parameter_constraints: dict = {**KNeighborsClassifier_._parameter_constraints}
@@ -245,7 +246,7 @@ class KNeighborsClassifier(KNeighborsClassifier_, KNeighborsDispatchingBase):
     @wrap_output_data
     def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
         check_is_fitted(self)
-        if sklearn_check_version("1.0"):
+        if sklearn_check_version("1.0") and X is not None:
             self._check_feature_names(X, reset=False)
         return dispatch(
             self,
@@ -255,8 +256,8 @@ class KNeighborsClassifier(KNeighborsClassifier_, KNeighborsDispatchingBase):
                 "sklearn": sklearn_KNeighborsClassifier.kneighbors,
             },
             X,
-            n_neighbors,
-            return_distance,
+            n_neighbors=n_neighbors,
+            return_distance=return_distance,
         )
     @wrap_output_data
@@ -285,7 +286,6 @@ class KNeighborsClassifier(KNeighborsClassifier_, KNeighborsDispatchingBase):
         return result
-    @run_with_n_jobs
     def _onedal_fit(self, X, y, queue=None):
         onedal_params = {
             "n_neighbors": self.n_neighbors,
@@ -308,15 +308,12 @@ class KNeighborsClassifier(KNeighborsClassifier_, KNeighborsDispatchingBase):
         self._save_attributes()
-    @run_with_n_jobs
     def _onedal_predict(self, X, queue=None):
         return self._onedal_estimator.predict(X, queue=queue)
-    @run_with_n_jobs
     def _onedal_predict_proba(self, X, queue=None):
         return self._onedal_estimator.predict_proba(X, queue=queue)
-    @run_with_n_jobs
     def _onedal_kneighbors(
         self, X=None, n_neighbors=None, return_distance=True, queue=None
     ):

{scikit_learn_intelex-2024.1.0.data → scikit_learn_intelex-2024.2.0.data}/data/Lib/site-packages/sklearnex/neighbors/knn_regression.py RENAMED Viewed

@@ -20,7 +20,8 @@ from sklearn.neighbors._ball_tree import BallTree
 from sklearn.neighbors._base import NeighborsBase as sklearn_NeighborsBase
 from sklearn.neighbors._kd_tree import KDTree
-from daal4py.sklearn._utils import control_n_jobs, run_with_n_jobs, sklearn_check_version
+from daal4py.sklearn._n_jobs_support import control_n_jobs
+from daal4py.sklearn._utils import sklearn_check_version
 if not sklearn_check_version("1.2"):
     from sklearn.neighbors._base import _check_weights
@@ -136,7 +137,7 @@ else:
             self.weights = _check_weights(weights)
-@control_n_jobs
+@control_n_jobs(decorated_methods=["fit", "predict", "kneighbors"])
 class KNeighborsRegressor(KNeighborsRegressor_, KNeighborsDispatchingBase):
     if sklearn_check_version("1.2"):
         _parameter_constraints: dict = {**KNeighborsRegressor_._parameter_constraints}
@@ -226,7 +227,7 @@ class KNeighborsRegressor(KNeighborsRegressor_, KNeighborsDispatchingBase):
     @wrap_output_data
     def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
         check_is_fitted(self)
-        if sklearn_check_version("1.0"):
+        if sklearn_check_version("1.0") and X is not None:
             self._check_feature_names(X, reset=False)
         return dispatch(
             self,
@@ -236,8 +237,8 @@ class KNeighborsRegressor(KNeighborsRegressor_, KNeighborsDispatchingBase):
                 "sklearn": sklearn_KNeighborsRegressor.kneighbors,
             },
             X,
-            n_neighbors,
-            return_distance,
+            n_neighbors=n_neighbors,
+            return_distance=return_distance,
         )
     @wrap_output_data
@@ -266,7 +267,6 @@ class KNeighborsRegressor(KNeighborsRegressor_, KNeighborsDispatchingBase):
         return result
-    @run_with_n_jobs
     def _onedal_fit(self, X, y, queue=None):
         onedal_params = {
             "n_neighbors": self.n_neighbors,
@@ -289,11 +289,9 @@ class KNeighborsRegressor(KNeighborsRegressor_, KNeighborsDispatchingBase):
         self._save_attributes()
-    @run_with_n_jobs
     def _onedal_predict(self, X, queue=None):
         return self._onedal_estimator.predict(X, queue=queue)
-    @run_with_n_jobs
     def _onedal_kneighbors(
         self, X=None, n_neighbors=None, return_distance=True, queue=None
     ):

{scikit_learn_intelex-2024.1.0.data → scikit_learn_intelex-2024.2.0.data}/data/Lib/site-packages/sklearnex/neighbors/knn_unsupervised.py RENAMED Viewed

@@ -30,7 +30,8 @@ from sklearn.neighbors._kd_tree import KDTree
 from sklearn.neighbors._unsupervised import NearestNeighbors as sklearn_NearestNeighbors
 from sklearn.utils.validation import _deprecate_positional_args, check_is_fitted
-from daal4py.sklearn._utils import control_n_jobs, run_with_n_jobs, sklearn_check_version
+from daal4py.sklearn._n_jobs_support import control_n_jobs
+from daal4py.sklearn._utils import sklearn_check_version
 from onedal.neighbors import NearestNeighbors as onedal_NearestNeighbors
 from onedal.utils import _check_array, _num_features, _num_samples
@@ -95,7 +96,7 @@ else:
             )
-@control_n_jobs
+@control_n_jobs(decorated_methods=["fit", "kneighbors"])
 class NearestNeighbors(NearestNeighbors_, KNeighborsDispatchingBase):
     if sklearn_check_version("1.2"):
         _parameter_constraints: dict = {**NearestNeighbors_._parameter_constraints}
@@ -150,8 +151,8 @@ class NearestNeighbors(NearestNeighbors_, KNeighborsDispatchingBase):
                 "sklearn": sklearn_NearestNeighbors.kneighbors,
             },
             X,
-            n_neighbors,
-            return_distance,
+            n_neighbors=n_neighbors,
+            return_distance=return_distance,
         )
     @wrap_output_data
@@ -180,7 +181,6 @@ class NearestNeighbors(NearestNeighbors_, KNeighborsDispatchingBase):
         return result
-    @run_with_n_jobs
     def _onedal_fit(self, X, y=None, queue=None):
         onedal_params = {
             "n_neighbors": self.n_neighbors,
@@ -202,11 +202,9 @@ class NearestNeighbors(NearestNeighbors_, KNeighborsDispatchingBase):
         self._save_attributes()
-    @run_with_n_jobs
     def _onedal_predict(self, X, queue=None):
         return self._onedal_estimator.predict(X, queue=queue)
-    @run_with_n_jobs
     def _onedal_kneighbors(
         self, X=None, n_neighbors=None, return_distance=True, queue=None
     ):

{scikit_learn_intelex-2024.1.0.data → scikit_learn_intelex-2024.2.0.data}/data/Lib/site-packages/sklearnex/neighbors/tests/test_neighbors.py RENAMED Viewed

@@ -23,11 +23,16 @@ from onedal.tests.utils._dataframes_support import (
     _convert_to_dataframe,
     get_dataframes_and_queues,
 )
+from sklearnex.neighbors import (
+    KNeighborsClassifier,
+    KNeighborsRegressor,
+    LocalOutlierFactor,
+    NearestNeighbors,
+)
 @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
 def test_sklearnex_import_knn_classifier(dataframe, queue):
-    from sklearnex.neighbors import KNeighborsClassifier
     X = _convert_to_dataframe([[0], [1], [2], [3]], sycl_queue=queue, target_df=dataframe)
     y = _convert_to_dataframe([0, 0, 1, 1], sycl_queue=queue, target_df=dataframe)
@@ -40,7 +45,6 @@ def test_sklearnex_import_knn_classifier(dataframe, queue):
 @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
 def test_sklearnex_import_knn_regression(dataframe, queue):
-    from sklearnex.neighbors import KNeighborsRegressor
     X = _convert_to_dataframe([[0], [1], [2], [3]], sycl_queue=queue, target_df=dataframe)
     y = _convert_to_dataframe([0, 0, 1, 1], sycl_queue=queue, target_df=dataframe)
@@ -51,18 +55,17 @@ def test_sklearnex_import_knn_regression(dataframe, queue):
     assert_allclose(pred, [0.5])
-# TODO:
-# investigate failure for `dpnp.ndarrays` and `dpctl.tensors`.
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
 @pytest.mark.parametrize(
-    "dataframe,queue", get_dataframes_and_queues(dataframe_filter_="numpy")
+    "estimator",
+    [LocalOutlierFactor, NearestNeighbors],
 )
-def test_sklearnex_import_nn(dataframe, queue):
-    from sklearnex.neighbors import NearestNeighbors
+def test_sklearnex_kneighbors(estimator, dataframe, queue):
     X = [[0, 0, 2], [1, 0, 0], [0, 0, 1]]
     X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
     test = _convert_to_dataframe([[0, 0, 1.3]], sycl_queue=queue, target_df=dataframe)
-    neigh = NearestNeighbors(n_neighbors=2).fit(X)
+    neigh = estimator(n_neighbors=2).fit(X)
     result = neigh.kneighbors(test, 2, return_distance=False)
     result = _as_numpy(result)
     assert "sklearnex" in neigh.__module__
@@ -71,14 +74,12 @@ def test_sklearnex_import_nn(dataframe, queue):
 @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
 def test_sklearnex_import_lof(dataframe, queue):
-    from sklearnex.neighbors import LocalOutlierFactor
     X = [[7, 7, 7], [1, 0, 0], [0, 0, 1], [0, 0, 1]]
     X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
     lof = LocalOutlierFactor(n_neighbors=2)
     result = lof.fit_predict(X)
     result = _as_numpy(result)
-    assert hasattr(lof, "_knn")
+    assert hasattr(lof, "_onedal_estimator")
     assert "sklearnex" in lof.__module__
-    assert "sklearnex" in lof._knn.__module__
     assert_allclose(result, [-1, 1, 1, 1])

{scikit_learn_intelex-2024.1.0.data → scikit_learn_intelex-2024.2.0.data}/data/Lib/site-packages/sklearnex/preview/__init__.py RENAMED Viewed

@@ -14,4 +14,4 @@
 # limitations under the License.
 # ==============================================================================
-__all__ = ["cluster", "covariance", "decomposition", "linear_model"]
+__all__ = ["cluster", "covariance", "decomposition"]

{scikit_learn_intelex-2024.1.0.data → scikit_learn_intelex-2024.2.0.data}/data/Lib/site-packages/sklearnex/preview/cluster/k_means.py RENAMED Viewed

@@ -29,18 +29,15 @@ if daal_check_version((2023, "P", 200)):
         check_is_fitted,
     )
-    from daal4py.sklearn._utils import (
-        control_n_jobs,
-        run_with_n_jobs,
-        sklearn_check_version,
-    )
+    from daal4py.sklearn._n_jobs_support import control_n_jobs
+    from daal4py.sklearn._utils import sklearn_check_version
     from onedal.cluster import KMeans as onedal_KMeans
     from ..._device_offload import dispatch, wrap_output_data
     from ..._utils import PatchingConditionsChain
     from ._common import BaseKMeans
-    @control_n_jobs
+    @control_n_jobs(decorated_methods=["fit", "predict"])
     class KMeans(sklearn_KMeans, BaseKMeans):
         __doc__ = sklearn_KMeans.__doc__
         n_iter_, inertia_ = None, None
@@ -212,7 +209,6 @@ if daal_check_version((2023, "P", 200)):
             return self
-        @run_with_n_jobs
         def _onedal_fit(self, X, _, sample_weight, queue=None):
             assert sample_weight is None
@@ -294,7 +290,6 @@ if daal_check_version((2023, "P", 200)):
                 X,
             )
-        @run_with_n_jobs
         def _onedal_predict(self, X, queue=None):
             X = self._validate_data(
                 X, accept_sparse=False, reset=False, dtype=[np.float64, np.float32]

{scikit_learn_intelex-2024.1.0.data → scikit_learn_intelex-2024.2.0.data}/data/Lib/site-packages/sklearnex/preview/covariance/covariance.py RENAMED Viewed

@@ -14,32 +14,47 @@
 # limitations under the License.
 # ===============================================================================
+import warnings
+import numpy as np
 from scipy import sparse as sp
 from sklearn.covariance import EmpiricalCovariance as sklearn_EmpiricalCovariance
 from sklearn.utils import check_array
-from daal4py.sklearn._utils import control_n_jobs, run_with_n_jobs, sklearn_check_version
+from daal4py.sklearn._n_jobs_support import control_n_jobs
+from daal4py.sklearn._utils import sklearn_check_version
 from onedal.common.hyperparameters import get_hyperparameters
 from onedal.covariance import EmpiricalCovariance as onedal_EmpiricalCovariance
+from sklearnex import config_context
+from sklearnex.metrics import pairwise_distances
-from ..._device_offload import dispatch
+from ..._device_offload import dispatch, wrap_output_data
 from ..._utils import PatchingConditionsChain, register_hyperparameters
 @register_hyperparameters({"fit": get_hyperparameters("covariance", "compute")})
-@control_n_jobs
+@control_n_jobs(decorated_methods=["fit", "mahalanobis"])
 class EmpiricalCovariance(sklearn_EmpiricalCovariance):
     __doc__ = sklearn_EmpiricalCovariance.__doc__
+    if sklearn_check_version("1.2"):
+        _parameter_constraints: dict = {
+            **sklearn_EmpiricalCovariance._parameter_constraints,
+        }
     def _save_attributes(self):
         assert hasattr(self, "_onedal_estimator")
-        self.covariance_ = self._onedal_estimator.covariance_
+        self._set_covariance(self._onedal_estimator.covariance_)
         self.location_ = self._onedal_estimator.location_
     _onedal_covariance = staticmethod(onedal_EmpiricalCovariance)
-    @run_with_n_jobs
     def _onedal_fit(self, X, queue=None):
+        if X.shape[0] == 1:
+            warnings.warn(
+                "Only one sample available. You may want to reshape your data array"
+            )
         onedal_params = {
             "method": "dense",
             "bias": True,
@@ -54,7 +69,7 @@ class EmpiricalCovariance(sklearn_EmpiricalCovariance):
         patching_status = PatchingConditionsChain(
             f"sklearn.covariance.{class_name}.{method_name}"
         )
-        if method_name == "fit":
+        if method_name in ["fit", "mahalanobis"]:
             (X,) = data
             patching_status.and_conditions(
                 [
@@ -62,10 +77,6 @@ class EmpiricalCovariance(sklearn_EmpiricalCovariance):
                         self.assume_centered == False,
                         "assume_centered parameter is not supported on oneDAL side",
                     ),
-                    (
-                        self.store_precision == False,
-                        "precision matrix calculation is not supported on oneDAL side",
-                    ),
                     (not sp.issparse(X), "X is sparse. Sparse input is not supported."),
                 ]
             )
@@ -79,9 +90,9 @@ class EmpiricalCovariance(sklearn_EmpiricalCovariance):
         if sklearn_check_version("1.2"):
             self._validate_params()
         if sklearn_check_version("0.23"):
-            self._validate_data(X)
+            X = self._validate_data(X, force_all_finite=False)
         else:
-            check_array(X)
+            X = check_array(X, force_all_finite=False)
         dispatch(
             self,
@@ -95,4 +106,27 @@ class EmpiricalCovariance(sklearn_EmpiricalCovariance):
         return self
+    # expose sklearnex pairwise_distances if mahalanobis distance eventually supported
+    @wrap_output_data
+    def mahalanobis(self, X):
+        if sklearn_check_version("1.0"):
+            X = self._validate_data(X, reset=False)
+        else:
+            X = check_array(X)
+        precision = self.get_precision()
+        with config_context(assume_finite=True):
+            # compute mahalanobis distances
+            dist = pairwise_distances(
+                X, self.location_[np.newaxis, :], metric="mahalanobis", VI=precision
+            )
+        return np.reshape(dist, (len(X),)) ** 2
+    error_norm = wrap_output_data(sklearn_EmpiricalCovariance.error_norm)
+    score = wrap_output_data(sklearn_EmpiricalCovariance.score)
     fit.__doc__ = sklearn_EmpiricalCovariance.fit.__doc__
+    mahalanobis.__doc__ = sklearn_EmpiricalCovariance.mahalanobis
+    error_norm.__doc__ = sklearn_EmpiricalCovariance.error_norm.__doc__
+    score.__doc__ = sklearn_EmpiricalCovariance.score.__doc__