PyPI - scikit-learn-intelex - Versions diffs - 2024.5.0__py39-none-manylinux1_x86_64.whl → 2024.7.0__py39-none-manylinux1_x86_64.whl - Mend

scikit-learn-intelex 2024.5.0__py39-none-manylinux1_x86_64.whl → 2024.7.0__py39-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (73) hide show

{scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/METADATA +2 -2
scikit_learn_intelex-2024.7.0.dist-info/RECORD +122 -0
sklearnex/_config.py +3 -15
sklearnex/_device_offload.py +9 -168
sklearnex/basic_statistics/basic_statistics.py +127 -1
sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +1 -1
sklearnex/cluster/dbscan.py +3 -1
sklearnex/cluster/k_means.py +8 -0
sklearnex/cluster/tests/test_dbscan.py +8 -6
sklearnex/cluster/tests/test_kmeans.py +15 -3
sklearnex/conftest.py +11 -1
sklearnex/covariance/incremental_covariance.py +64 -13
sklearnex/covariance/tests/test_incremental_covariance.py +35 -0
sklearnex/decomposition/pca.py +25 -1
sklearnex/decomposition/tests/test_pca.py +4 -2
sklearnex/dispatcher.py +109 -1
sklearnex/ensemble/_forest.py +121 -57
sklearnex/ensemble/tests/test_forest.py +7 -0
sklearnex/glob/dispatcher.py +16 -2
sklearnex/linear_model/coordinate_descent.py +13 -0
sklearnex/linear_model/incremental_linear.py +102 -25
sklearnex/linear_model/linear.py +25 -39
sklearnex/linear_model/logistic_regression.py +92 -74
sklearnex/linear_model/ridge.py +7 -0
sklearnex/linear_model/tests/test_incremental_linear.py +10 -10
sklearnex/linear_model/tests/test_linear.py +30 -5
sklearnex/linear_model/tests/test_logreg.py +45 -3
sklearnex/manifold/t_sne.py +4 -0
sklearnex/metrics/pairwise.py +5 -0
sklearnex/metrics/ranking.py +3 -0
sklearnex/model_selection/split.py +3 -0
sklearnex/neighbors/_lof.py +9 -0
sklearnex/neighbors/common.py +45 -1
sklearnex/neighbors/knn_classification.py +1 -20
sklearnex/neighbors/knn_regression.py +25 -20
sklearnex/neighbors/knn_unsupervised.py +31 -7
sklearnex/preview/__init__.py +1 -1
sklearnex/preview/decomposition/__init__.py +19 -0
sklearnex/preview/decomposition/incremental_pca.py +228 -0
sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
sklearnex/preview/linear_model/__init__.py +19 -0
sklearnex/preview/linear_model/ridge.py +419 -0
sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +163 -0
sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
sklearnex/svm/_common.py +163 -20
sklearnex/svm/nusvc.py +40 -4
sklearnex/svm/nusvr.py +31 -2
sklearnex/svm/svc.py +40 -4
sklearnex/svm/svr.py +31 -2
sklearnex/svm/tests/test_svm.py +12 -20
sklearnex/tests/_utils.py +185 -30
sklearnex/tests/_utils_spmd.py +185 -0
sklearnex/tests/test_common.py +54 -0
sklearnex/tests/test_config.py +4 -0
sklearnex/tests/test_memory_usage.py +185 -126
sklearnex/tests/test_monkeypatch.py +12 -4
sklearnex/tests/test_patching.py +21 -25
sklearnex/tests/test_run_to_run_stability.py +295 -0
sklearnex/utils/_namespace.py +1 -1
scikit_learn_intelex-2024.5.0.dist-info/RECORD +0 -104
sklearnex/tests/test_run_to_run_stability_tests.py +0 -428
{scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/LICENSE.txt +0 -0
{scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/WHEEL +0 -0
{scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/top_level.txt +0 -0

sklearnex/basic_statistics/tests/test_basic_statistics.py ADDED Viewed

@@ -0,0 +1,251 @@
+# ==============================================================================
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose
+from onedal.basic_statistics.tests.test_basic_statistics import (
+    expected_max,
+    expected_mean,
+    expected_sum,
+    options_and_tests,
+)
+from onedal.tests.utils._dataframes_support import (
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
+from sklearnex.basic_statistics import BasicStatistics
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+def test_sklearnex_import_basic_statistics(dataframe, queue):
+    X = np.array([[0, 0], [1, 1]])
+    X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    weights = np.array([1, 0.5])
+    weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
+    result = BasicStatistics().fit(X_df)
+    expected_mean = np.array([0.5, 0.5])
+    expected_min = np.array([0, 0])
+    expected_max = np.array([1, 1])
+    assert_allclose(expected_mean, result.mean)
+    assert_allclose(expected_max, result.max)
+    assert_allclose(expected_min, result.min)
+    result = BasicStatistics().fit(X_df, sample_weight=weights_df)
+    expected_weighted_mean = np.array([0.25, 0.25])
+    expected_weighted_min = np.array([0, 0])
+    expected_weighted_max = np.array([0.5, 0.5])
+    assert_allclose(expected_weighted_mean, result.mean)
+    assert_allclose(expected_weighted_min, result.min)
+    assert_allclose(expected_weighted_max, result.max)
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("weighted", [True, False])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_multiple_options_on_gold_data(dataframe, queue, weighted, dtype):
+    X = np.array([[0, 0], [1, 1]])
+    X = X.astype(dtype=dtype)
+    X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    if weighted:
+        weights = np.array([1, 0.5])
+        weights = weights.astype(dtype=dtype)
+        weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
+    basicstat = BasicStatistics()
+    if weighted:
+        result = basicstat.fit(X_df, sample_weight=weights_df)
+    else:
+        result = basicstat.fit(X_df)
+    if weighted:
+        expected_weighted_mean = np.array([0.25, 0.25])
+        expected_weighted_min = np.array([0, 0])
+        expected_weighted_max = np.array([0.5, 0.5])
+        assert_allclose(expected_weighted_mean, result.mean)
+        assert_allclose(expected_weighted_max, result.max)
+        assert_allclose(expected_weighted_min, result.min)
+    else:
+        expected_mean = np.array([0.5, 0.5])
+        expected_min = np.array([0, 0])
+        expected_max = np.array([1, 1])
+        assert_allclose(expected_mean, result.mean)
+        assert_allclose(expected_max, result.max)
+        assert_allclose(expected_min, result.min)
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("option", options_and_tests)
+@pytest.mark.parametrize("row_count", [100, 1000])
+@pytest.mark.parametrize("column_count", [10, 100])
+@pytest.mark.parametrize("weighted", [True, False])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_single_option_on_random_data(
+    dataframe, queue, option, row_count, column_count, weighted, dtype
+):
+    result_option, function, tols = option
+    fp32tol, fp64tol = tols
+    seed = 77
+    gen = np.random.default_rng(seed)
+    X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
+    X = X.astype(dtype=dtype)
+    X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    if weighted:
+        weights = gen.uniform(low=-0.5, high=1.0, size=row_count)
+        weights = weights.astype(dtype=dtype)
+        weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
+    basicstat = BasicStatistics(result_options=result_option)
+    if weighted:
+        result = basicstat.fit(X_df, sample_weight=weights_df)
+    else:
+        result = basicstat.fit(X_df)
+    res = getattr(result, result_option)
+    if weighted:
+        weighted_data = np.diag(weights) @ X
+        gtr = function(weighted_data)
+    else:
+        gtr = function(X)
+    tol = fp32tol if res.dtype == np.float32 else fp64tol
+    assert_allclose(gtr, res, atol=tol)
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("row_count", [100, 1000])
+@pytest.mark.parametrize("column_count", [10, 100])
+@pytest.mark.parametrize("weighted", [True, False])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_multiple_options_on_random_data(
+    dataframe, queue, row_count, column_count, weighted, dtype
+):
+    seed = 77
+    gen = np.random.default_rng(seed)
+    X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
+    X = X.astype(dtype=dtype)
+    X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    if weighted:
+        weights = gen.uniform(low=-0.5, high=1.0, size=row_count)
+        weights = weights.astype(dtype=dtype)
+        weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
+    basicstat = BasicStatistics(result_options=["mean", "max", "sum"])
+    if weighted:
+        result = basicstat.fit(X_df, sample_weight=weights_df)
+    else:
+        result = basicstat.fit(X_df)
+    res_mean, res_max, res_sum = result.mean, result.max, result.sum
+    if weighted:
+        weighted_data = np.diag(weights) @ X
+        gtr_mean, gtr_max, gtr_sum = (
+            expected_mean(weighted_data),
+            expected_max(weighted_data),
+            expected_sum(weighted_data),
+        )
+    else:
+        gtr_mean, gtr_max, gtr_sum = (
+            expected_mean(X),
+            expected_max(X),
+            expected_sum(X),
+        )
+    tol = 5e-4 if res_mean.dtype == np.float32 else 1e-7
+    assert_allclose(gtr_mean, res_mean, atol=tol)
+    assert_allclose(gtr_max, res_max, atol=tol)
+    assert_allclose(gtr_sum, res_sum, atol=tol)
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("row_count", [100, 1000])
+@pytest.mark.parametrize("column_count", [10, 100])
+@pytest.mark.parametrize("weighted", [True, False])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_all_option_on_random_data(
+    dataframe, queue, row_count, column_count, weighted, dtype
+):
+    seed = 77
+    gen = np.random.default_rng(seed)
+    X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
+    X = X.astype(dtype=dtype)
+    X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    if weighted:
+        weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
+        weights = weights.astype(dtype=dtype)
+        weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
+    basicstat = BasicStatistics(result_options="all")
+    if weighted:
+        result = basicstat.fit(X_df, sample_weight=weights_df)
+    else:
+        result = basicstat.fit(X_df)
+    if weighted:
+        weighted_data = np.diag(weights) @ X
+    for option in options_and_tests:
+        result_option, function, tols = option
+        fp32tol, fp64tol = tols
+        res = getattr(result, result_option)
+        if weighted:
+            gtr = function(weighted_data)
+        else:
+            gtr = function(X)
+        tol = fp32tol if res.dtype == np.float32 else fp64tol
+        assert_allclose(gtr, res, atol=tol)
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("option", options_and_tests)
+@pytest.mark.parametrize("data_size", [100, 1000])
+@pytest.mark.parametrize("weighted", [True, False])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_1d_input_on_random_data(dataframe, queue, option, data_size, weighted, dtype):
+    result_option, function, tols = option
+    fp32tol, fp64tol = tols
+    seed = 77
+    gen = np.random.default_rng(seed)
+    X = gen.uniform(low=-0.3, high=+0.7, size=data_size)
+    X = X.astype(dtype=dtype)
+    X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    if weighted:
+        weights = gen.uniform(low=-0.5, high=1.0, size=data_size)
+        weights = weights.astype(dtype=dtype)
+        weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
+    basicstat = BasicStatistics(result_options=result_option)
+    if weighted:
+        result = basicstat.fit(X_df, sample_weight=weights_df)
+    else:
+        result = basicstat.fit(X_df)
+    res = getattr(result, result_option)
+    if weighted:
+        weighted_data = weights * X
+        gtr = function(weighted_data)
+    else:
+        gtr = function(X)
+    tol = fp32tol if res.dtype == np.float32 else fp64tol
+    assert_allclose(gtr, res, atol=tol)

sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py CHANGED Viewed

@@ -18,7 +18,7 @@ import numpy as np
 import pytest
 from numpy.testing import assert_allclose
-from onedal.basic_statistics.tests.test_incremental_basic_statistics import (
+from onedal.basic_statistics.tests.test_basic_statistics import (
     expected_max,
     expected_mean,
     expected_sum,

sklearnex/cluster/dbscan.py CHANGED Viewed

@@ -17,7 +17,6 @@
 import numbers
 from abc import ABC
-import numpy as np
 from scipy import sparse as sp
 from sklearn.cluster import DBSCAN as sklearn_DBSCAN
 from sklearn.utils.validation import _check_sample_weight
@@ -85,6 +84,9 @@ class DBSCAN(sklearn_DBSCAN, BaseDBSCAN):
         self.n_jobs = n_jobs
     def _onedal_fit(self, X, y, sample_weight=None, queue=None):
+        if sklearn_check_version("1.0"):
+            X = self._validate_data(X, force_all_finite=False)
         onedal_params = {
             "eps": self.eps,
             "min_samples": self.min_samples,

sklearnex/cluster/k_means.py CHANGED Viewed

@@ -15,3 +15,11 @@
 # ===============================================================================
 from daal4py.sklearn.cluster import KMeans
+from onedal._device_offload import support_usm_ndarray
+# Note: `sklearnex.cluster.KMeans` only has functional
+# sycl GPU support. No GPU device will be offloaded.
+KMeans.fit = support_usm_ndarray(queue_param=False)(KMeans.fit)
+KMeans.fit_predict = support_usm_ndarray(queue_param=False)(KMeans.fit_predict)
+KMeans.predict = support_usm_ndarray(queue_param=False)(KMeans.predict)
+KMeans.score = support_usm_ndarray(queue_param=False)(KMeans.score)

sklearnex/cluster/tests/test_dbscan.py CHANGED Viewed

@@ -18,16 +18,18 @@ import numpy as np
 import pytest
 from numpy.testing import assert_allclose
+from onedal.tests.utils._dataframes_support import (
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
-# TODO:
-# adding this parameterized testing
-# somehow breaks other test with preview module patch:
-# sklearnex/tests/test_monkeypatch.py::test_preview_namespace.
-# @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
-def test_sklearnex_import_dbscan():
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+def test_sklearnex_import_dbscan(dataframe, queue):
     from sklearnex.cluster import DBSCAN
     X = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]])
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
     dbscan = DBSCAN(eps=3, min_samples=2).fit(X)
     assert "sklearnex" in dbscan.__module__

sklearnex/cluster/tests/test_kmeans.py CHANGED Viewed

@@ -15,16 +15,28 @@
 # ===============================================================================
 import numpy as np
+import pytest
 from numpy.testing import assert_allclose
+from onedal.tests.utils._dataframes_support import (
+    _as_numpy,
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+def test_sklearnex_import(dataframe, queue):
-def test_sklearnex_import():
     from sklearnex.cluster import KMeans
     X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
     kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
     assert "daal4py" in kmeans.__module__
-    result = kmeans.predict([[0, 0], [12, 3]])
+    X_test = [[0, 0], [12, 3]]
+    X_test = _convert_to_dataframe(X_test, sycl_queue=queue, target_df=dataframe)
+    result = kmeans.predict(X_test)
     expected = np.array([1, 0], dtype=np.int32)
-    assert_allclose(expected, result)
+    assert_allclose(expected, _as_numpy(result))

sklearnex/conftest.py CHANGED Viewed

@@ -19,7 +19,8 @@ import logging
 import pytest
-from sklearnex import patch_sklearn, unpatch_sklearn
+from daal4py.sklearn._utils import sklearn_check_version
+from sklearnex import config_context, patch_sklearn, unpatch_sklearn
 def pytest_configure(config):
@@ -61,3 +62,12 @@ def with_sklearnex():
     patch_sklearn()
     yield
     unpatch_sklearn()
+@pytest.fixture
+def with_array_api():
+    if sklearn_check_version("1.2"):
+        with config_context(array_api_dispatch=True):
+            yield
+    else:
+        yield

sklearnex/covariance/incremental_covariance.py CHANGED Viewed

@@ -19,13 +19,14 @@ import warnings
 import numpy as np
 from scipy import linalg
-from sklearn.base import BaseEstimator
+from sklearn.base import BaseEstimator, clone
 from sklearn.covariance import EmpiricalCovariance as sklearn_EmpiricalCovariance
+from sklearn.covariance import log_likelihood
 from sklearn.utils import check_array, gen_batches
+from sklearn.utils.validation import _num_features
 from daal4py.sklearn._n_jobs_support import control_n_jobs
 from daal4py.sklearn._utils import daal_check_version, sklearn_check_version
-from onedal._device_offload import support_usm_ndarray
 from onedal.covariance import (
     IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance,
 )
@@ -34,6 +35,7 @@ from sklearnex import config_context
 from .._device_offload import dispatch, wrap_output_data
 from .._utils import PatchingConditionsChain, register_hyperparameters
 from ..metrics import pairwise_distances
+from ..utils import get_namespace
 if sklearn_check_version("1.2"):
     from sklearn.utils._param_validation import Interval
@@ -98,7 +100,6 @@ class IncrementalEmpiricalCovariance(BaseEstimator):
     get_precision = sklearn_EmpiricalCovariance.get_precision
     error_norm = wrap_output_data(sklearn_EmpiricalCovariance.error_norm)
-    score = wrap_output_data(sklearn_EmpiricalCovariance.score)
     def __init__(
         self, *, store_precision=False, assume_centered=False, batch_size=None, copy=True
@@ -197,6 +198,43 @@ class IncrementalEmpiricalCovariance(BaseEstimator):
         return self
+    @wrap_output_data
+    def score(self, X_test, y=None):
+        xp, _ = get_namespace(X_test)
+        location = self.location_
+        if sklearn_check_version("1.0"):
+            X = self._validate_data(
+                X_test,
+                dtype=[np.float64, np.float32],
+                reset=False,
+            )
+        else:
+            X = check_array(
+                X_test,
+                dtype=[np.float64, np.float32],
+            )
+        if "numpy" not in xp.__name__:
+            location = xp.asarray(location, device=X_test.device)
+            # depending on the sklearn version, check_array
+            # and validate_data will return only numpy arrays
+            # which will break dpnp/dpctl support. If the
+            # array namespace isn't from numpy and the data
+            # is now a numpy array, it has been validated and
+            # the original can be used.
+            if isinstance(X, np.ndarray):
+                X = X_test
+        est = clone(self)
+        est.set_params(**{"assume_centered": True})
+        # test_cov is a numpy array, but calculated on device
+        test_cov = est.fit(X - location).covariance_
+        res = log_likelihood(test_cov, self.get_precision())
+        return res
     def partial_fit(self, X, y=None, check_input=True):
         """
         Incremental fit with X. All of X is processed as a single batch.
@@ -293,21 +331,34 @@ class IncrementalEmpiricalCovariance(BaseEstimator):
         return self
     # expose sklearnex pairwise_distances if mahalanobis distance eventually supported
-    @wrap_output_data
     def mahalanobis(self, X):
         if sklearn_check_version("1.0"):
-            self._validate_data(X, reset=False, copy=self.copy)
-        else:
-            check_array(X, copy=self.copy)
+            self._check_feature_names(X, reset=False)
+        xp, _ = get_namespace(X)
         precision = self.get_precision()
-        with config_context(assume_finite=True):
-            # compute mahalanobis distances
-            dist = pairwise_distances(
-                X, self.location_[np.newaxis, :], metric="mahalanobis", VI=precision
-            )
+        # compute mahalanobis distances
+        # pairwise_distances will check n_features (via n_feature matching with
+        # self.location_) , and will check for finiteness via check array
+        # check_feature_names will match _validate_data functionally
+        location = self.location_[np.newaxis, :]
+        if "numpy" not in xp.__name__:
+            # Guarantee that inputs to pairwise_distances match in type and location
+            location = xp.asarray(location, device=X.device)
+        try:
+            dist = pairwise_distances(X, location, metric="mahalanobis", VI=precision)
+        except ValueError as e:
+            # Throw the expected sklearn error in an n_feature length violation
+            if "Incompatible dimension for X and Y matrices: X.shape[1] ==" in str(e):
+                raise ValueError(
+                    f"X has {_num_features(X)} features, but {self.__class__.__name__} "
+                    f"is expecting {self.n_features_in_} features as input."
+                )
+            else:
+                raise e
-        return np.reshape(dist, (len(X),)) ** 2
+        return (xp.reshape(dist, (-1,))) ** 2
     _onedal_cpu_supported = _onedal_supported
     _onedal_gpu_supported = _onedal_supported

sklearnex/covariance/tests/test_incremental_covariance.py CHANGED Viewed

@@ -16,13 +16,18 @@
 import numpy as np
 import pytest
+from numpy.linalg import slogdet
 from numpy.testing import assert_allclose
+from scipy.linalg import pinvh
 from sklearn.covariance.tests.test_covariance import (
     test_covariance,
     test_EmpiricalCovariance_validates_mahalanobis,
 )
+from sklearn.datasets import load_diabetes
+from sklearn.decomposition import PCA
 from onedal.tests.utils._dataframes_support import (
+    _as_numpy,
     _convert_to_dataframe,
     get_dataframes_and_queues,
 )
@@ -163,6 +168,36 @@ def test_sklearnex_fit_on_random_data(
     assert_allclose(expected_means, result.location_, atol=1e-6)
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+def test_whitened_toy_score(dataframe, queue):
+    from sklearnex.covariance import IncrementalEmpiricalCovariance
+    # Load a sklearn toy dataset with sufficient data
+    X, _ = load_diabetes(return_X_y=True)
+    n = X.shape[1]
+    # Transform the data into uncorrelated, unity variance components
+    X = PCA(whiten=True).fit_transform(X)
+    # change dataframe
+    X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    # fit data
+    est = IncrementalEmpiricalCovariance()
+    est.fit(X_df)
+    # location_ attribute approximately zero (10,), covariance_ identity (10,10)
+    # The log-likelihood can be calculated simply due to covariance_
+    # use of scipy.linalg.pinvh, np.linalg.sloget and np.cov for estimator
+    # independence
+    expected_result = (
+        -(n - slogdet(pinvh(np.cov(X.T, bias=1)))[1] + n * np.log(2 * np.pi)) / 2
+    )
+    # expected_result = -14.1780602988
+    result = _as_numpy(est.score(X_df))
+    assert_allclose(expected_result, result, atol=1e-6)
 # Monkeypatch IncrementalEmpiricalCovariance into relevant sklearn.covariance tests
 @pytest.mark.allow_sklearn_fallback
 @pytest.mark.parametrize(

sklearnex/decomposition/pca.py CHANGED Viewed

@@ -32,6 +32,7 @@ if daal_check_version((2024, "P", 100)):
     from .._device_offload import dispatch, wrap_output_data
     from .._utils import PatchingConditionsChain
+    from ..utils import get_namespace
     if sklearn_check_version("1.1") and not sklearn_check_version("1.2"):
         from sklearn.utils import check_scalar
@@ -42,7 +43,6 @@ if daal_check_version((2024, "P", 100)):
     from sklearn.decomposition import PCA as sklearn_PCA
     from onedal.decomposition import PCA as onedal_PCA
-    from sklearnex.utils import get_namespace
     @control_n_jobs(decorated_methods=["fit", "transform", "fit_transform"])
     class PCA(sklearn_PCA):
@@ -210,6 +210,29 @@ if daal_check_version((2024, "P", 100)):
                 # Scikit-learn PCA["covariance_eigh"] was fit
                 return self._transform(X_fit, xp, x_is_centered=x_is_centered)
+        @wrap_output_data
+        def inverse_transform(self, X):
+            xp, _ = get_namespace(X)
+            mean = self.mean_
+            if self.whiten:
+                components = (
+                    xp.sqrt(self.explained_variance_[:, np.newaxis]) * self.components_
+                )
+            else:
+                components = self.components_
+            if "numpy" not in xp.__name__:
+                # DPCtl and dpnp require inputs to be on the same device for
+                # matrix multiplication and division. The type and location
+                # of the components and mean are dependent on the sklearn
+                # version, this makes sure it is of the same type and on the
+                # same device as the data (compute follows data).
+                components = xp.asarray(components, device=X.device)
+                mean = xp.asarray(mean, device=X.device)
+            return X @ components + mean
         def _onedal_supported(self, method_name, X):
             class_name = self.__class__.__name__
             patching_status = PatchingConditionsChain(
@@ -381,6 +404,7 @@ if daal_check_version((2024, "P", 100)):
         fit.__doc__ = sklearn_PCA.fit.__doc__
         transform.__doc__ = sklearn_PCA.transform.__doc__
         fit_transform.__doc__ = sklearn_PCA.fit_transform.__doc__
+        inverse_transform.__doc__ = sklearn_PCA.inverse_transform.__doc__
 else:
     from daal4py.sklearn.decomposition import PCA

sklearnex/decomposition/tests/test_pca.py CHANGED Viewed

@@ -51,6 +51,8 @@ def test_sklearnex_import(dataframe, queue):
         assert hasattr(pca, "_onedal_estimator")
     else:
         assert "daal4py" in pca.__module__
+    tol = 1e-5 if _as_numpy(X_transformed).dtype == np.float32 else 1e-7
     assert_allclose([6.30061232, 0.54980396], _as_numpy(pca.singular_values_))
-    assert_allclose(X_transformed_expected, _as_numpy(X_transformed))
-    assert_allclose(X_transformed_expected, _as_numpy(X_fit_transformed))
+    assert_allclose(X_transformed_expected, _as_numpy(X_transformed), rtol=tol)
+    assert_allclose(X_transformed_expected, _as_numpy(X_fit_transformed), rtol=tol)