PyPI - scikit-learn-intelex - Versions diffs - 2025.1.0__py312-none-manylinux_2_28_x86_64.whl → 2025.4.0__py312-none-manylinux_2_28_x86_64.whl - Mend

scikit-learn-intelex 2025.1.0__py312-none-manylinux_2_28_x86_64.whl → 2025.4.0__py312-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (98) hide show

daal4py/_daal4py.cpython-312-x86_64-linux-gnu.so +0 -0
daal4py/mpi_transceiver.cpython-312-x86_64-linux-gnu.so +0 -0
daal4py/sklearn/utils/validation.py +6 -3
onedal/_device_offload.py +10 -28
onedal/_onedal_py_dpc.cpython-312-x86_64-linux-gnu.so +0 -0
onedal/_onedal_py_host.cpython-312-x86_64-linux-gnu.so +0 -0
onedal/_onedal_py_spmd_dpc.cpython-312-x86_64-linux-gnu.so +0 -0
onedal/basic_statistics/basic_statistics.py +5 -5
onedal/basic_statistics/incremental_basic_statistics.py +34 -19
onedal/basic_statistics/tests/test_basic_statistics.py +16 -72
onedal/basic_statistics/tests/test_incremental_basic_statistics.py +100 -17
onedal/basic_statistics/tests/utils.py +50 -0
onedal/cluster/dbscan.py +5 -10
onedal/cluster/kmeans.py +9 -16
onedal/cluster/kmeans_init.py +7 -10
onedal/common/_policy.py +0 -4
onedal/common/tests/test_sycl.py +128 -0
onedal/covariance/covariance.py +6 -9
onedal/covariance/incremental_covariance.py +41 -26
onedal/covariance/tests/test_incremental_covariance.py +69 -1
onedal/datatypes/__init__.py +2 -2
onedal/datatypes/_data_conversion.py +10 -43
onedal/datatypes/tests/test_data.py +83 -22
onedal/decomposition/incremental_pca.py +42 -32
onedal/decomposition/pca.py +7 -7
onedal/decomposition/tests/test_incremental_pca.py +87 -0
onedal/ensemble/forest.py +20 -11
onedal/linear_model/incremental_linear_model.py +86 -52
onedal/linear_model/linear_model.py +19 -23
onedal/linear_model/logistic_regression.py +9 -11
onedal/linear_model/tests/test_incremental_linear_regression.py +70 -25
onedal/linear_model/tests/test_incremental_ridge_regression.py +64 -0
onedal/linear_model/tests/test_linear_regression.py +13 -4
onedal/neighbors/neighbors.py +21 -25
onedal/primitives/kernel_functions.py +3 -4
onedal/spmd/basic_statistics/incremental_basic_statistics.py +7 -5
onedal/spmd/covariance/incremental_covariance.py +6 -5
onedal/spmd/decomposition/incremental_pca.py +14 -7
onedal/spmd/linear_model/incremental_linear_model.py +12 -8
onedal/svm/svm.py +9 -9
onedal/utils/tests/test_validation.py +142 -0
onedal/utils/validation.py +38 -14
{scikit_learn_intelex-2025.1.0.dist-info → scikit_learn_intelex-2025.4.0.dist-info}/METADATA +39 -80
{scikit_learn_intelex-2025.1.0.dist-info → scikit_learn_intelex-2025.4.0.dist-info}/RECORD +94 -92
sklearnex/_utils.py +58 -13
sklearnex/basic_statistics/basic_statistics.py +39 -8
sklearnex/basic_statistics/incremental_basic_statistics.py +7 -0
sklearnex/basic_statistics/tests/test_basic_statistics.py +171 -36
sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +79 -28
sklearnex/cluster/dbscan.py +2 -2
sklearnex/cluster/k_means.py +4 -2
sklearnex/cluster/tests/test_kmeans.py +0 -2
sklearnex/covariance/incremental_covariance.py +7 -0
sklearnex/covariance/tests/test_incremental_covariance.py +50 -0
sklearnex/decomposition/pca.py +3 -1
sklearnex/dispatcher.py +1 -10
sklearnex/ensemble/_forest.py +2 -2
sklearnex/ensemble/tests/test_forest.py +23 -18
sklearnex/linear_model/incremental_linear.py +25 -12
sklearnex/linear_model/incremental_ridge.py +15 -8
sklearnex/linear_model/linear.py +8 -3
sklearnex/linear_model/logistic_regression.py +15 -13
sklearnex/linear_model/ridge.py +374 -8
sklearnex/linear_model/tests/test_incremental_linear.py +65 -5
sklearnex/linear_model/tests/test_incremental_ridge.py +61 -0
sklearnex/linear_model/tests/test_linear.py +11 -36
sklearnex/linear_model/tests/test_ridge.py +256 -0
sklearnex/manifold/t_sne.py +5 -0
sklearnex/manifold/tests/test_tsne.py +226 -2
sklearnex/neighbors/common.py +2 -2
sklearnex/preview/__init__.py +1 -1
sklearnex/preview/covariance/covariance.py +6 -2
sklearnex/preview/covariance/tests/test_covariance.py +1 -1
sklearnex/preview/decomposition/incremental_pca.py +14 -3
sklearnex/preview/decomposition/tests/test_incremental_pca.py +70 -0
sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +3 -3
sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +6 -7
sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +3 -2
sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +16 -14
sklearnex/svm/_common.py +2 -2
sklearnex/tests/test_common.py +130 -29
sklearnex/tests/test_hyperparameters.py +43 -0
sklearnex/tests/test_memory_usage.py +7 -39
sklearnex/tests/test_monkeypatch.py +4 -11
sklearnex/tests/test_patching.py +0 -8
sklearnex/tests/test_run_to_run_stability.py +7 -2
sklearnex/tests/utils/__init__.py +4 -0
sklearnex/tests/utils/base.py +65 -0
sklearnex/utils/__init__.py +2 -2
sklearnex/utils/tests/test_validation.py +238 -0
sklearnex/utils/validation.py +192 -1
sklearnex/preview/linear_model/__init__.py +0 -19
sklearnex/preview/linear_model/ridge.py +0 -424
sklearnex/preview/linear_model/tests/test_ridge.py +0 -102
sklearnex/utils/tests/test_finite.py +0 -89
{scikit_learn_intelex-2025.1.0.dist-info → scikit_learn_intelex-2025.4.0.dist-info}/LICENSE.txt +0 -0
{scikit_learn_intelex-2025.1.0.dist-info → scikit_learn_intelex-2025.4.0.dist-info}/WHEEL +0 -0
{scikit_learn_intelex-2025.1.0.dist-info → scikit_learn_intelex-2025.4.0.dist-info}/top_level.txt +0 -0

daal4py/_daal4py.cpython-312-x86_64-linux-gnu.so CHANGED Viewed

Binary file

daal4py/mpi_transceiver.cpython-312-x86_64-linux-gnu.so CHANGED Viewed

Binary file

daal4py/sklearn/utils/validation.py CHANGED Viewed

@@ -98,9 +98,12 @@ def _assert_all_finite(
     )
     _dal_ready = _patching_status.and_conditions(
         [
-            (X.ndim in [1, 2], "X has not 1 or 2 dimensions."),
-            (not np.any(np.equal(X.shape, 0)), "X shape contains 0."),
-            (dt in [np.float32, np.float64], "X dtype is not float32 or float64."),
+            (X.ndim in [1, 2], f"Input {input_name} does not have 1 or 2 dimensions."),
+            (not np.any(np.equal(X.shape, 0)), f"Input {input_name} shape contains a 0."),
+            (
+                dt in [np.float32, np.float64],
+                f"Input {input_name} dtype is not float32 or float64.",
+            ),
         ]
     )
     _patching_status.write_log()

onedal/_device_offload.py CHANGED Viewed

@@ -29,6 +29,14 @@ if dpctl_available:
     from dpctl import SyclQueue
     from dpctl.memory import MemoryUSMDevice, as_usm_memory
     from dpctl.tensor import usm_ndarray
+else:
+    import onedal
+    # setting fallback to `object` will make if isinstance call
+    # in _get_global_queue always true for situations without the
+    # dpc backend when `device_offload` is used. Instead, it will
+    # fail at the policy check phase yielding a RuntimeError
+    SyclQueue = getattr(onedal._backend, "SyclQueue", object)
 if dpnp_available:
     import dpnp
@@ -36,30 +44,6 @@ if dpnp_available:
     from .utils._array_api import _convert_to_dpnp
-class DummySyclQueue:
-    """This class is designed to act like dpctl.SyclQueue
-    to allow device dispatching in scenarios when dpctl is not available"""
-    class DummySyclDevice:
-        def __init__(self, filter_string):
-            self._filter_string = filter_string
-            self.is_cpu = "cpu" in filter_string
-            self.is_gpu = "gpu" in filter_string
-            self.has_aspect_fp64 = self.is_cpu
-            if not (self.is_cpu):
-                logging.warning(
-                    "Device support is limited. "
-                    "Please install dpctl for full experience"
-                )
-        def get_filter_string(self):
-            return self._filter_string
-    def __init__(self, filter_string):
-        self.sycl_device = self.DummySyclDevice(filter_string)
 def _copy_to_usm(queue, array):
     if not dpctl_available:
         raise RuntimeError(
@@ -139,12 +123,10 @@ def _transfer_to_host(queue, *data):
 def _get_global_queue():
     target = _get_config()["target_offload"]
-    QueueClass = DummySyclQueue if not dpctl_available else SyclQueue
     if target != "auto":
-        if isinstance(target, QueueClass):
+        if isinstance(target, SyclQueue):
             return target
-        return QueueClass(target)
+        return SyclQueue(target)
     return None

onedal/_onedal_py_dpc.cpython-312-x86_64-linux-gnu.so CHANGED Viewed

Binary file

onedal/_onedal_py_host.cpython-312-x86_64-linux-gnu.so CHANGED Viewed

Binary file

onedal/_onedal_py_spmd_dpc.cpython-312-x86_64-linux-gnu.so CHANGED Viewed

Binary file

onedal/basic_statistics/basic_statistics.py CHANGED Viewed

@@ -20,7 +20,7 @@ from abc import ABCMeta, abstractmethod
 import numpy as np
 from ..common._base import BaseEstimator
-from ..datatypes import _convert_to_supported, from_table, to_table
+from ..datatypes import from_table, to_table
 from ..utils import _is_csr
 from ..utils.validation import _check_array
@@ -57,7 +57,7 @@ class BaseBasicStatistics(BaseEstimator, metaclass=ABCMeta):
     def _get_onedal_params(self, is_csr, dtype=np.float32):
         options = self._get_result_options(self.options)
         return {
-            "fptype": "float" if dtype == np.float32 else "double",
+            "fptype": dtype,
             "method": "sparse" if is_csr else self.algorithm,
             "result_option": options,
         }
@@ -81,11 +81,11 @@ class BasicStatistics(BaseBasicStatistics):
         if sample_weight is not None:
             sample_weight = _check_array(sample_weight, ensure_2d=False)
-        data, sample_weight = _convert_to_supported(policy, data, sample_weight)
         is_single_dim = data.ndim == 1
-        data_table, weights_table = to_table(data, sample_weight)
-        dtype = data.dtype
+        data_table, weights_table = to_table(data, sample_weight, queue=queue)
+        dtype = data_table.dtype
         raw_result = self._compute_raw(data_table, weights_table, policy, dtype, is_csr)
         for opt, raw_value in raw_result.items():
             value = from_table(raw_value).ravel()

onedal/basic_statistics/incremental_basic_statistics.py CHANGED Viewed

@@ -18,7 +18,7 @@ import numpy as np
 from daal4py.sklearn._utils import get_dtype
-from ..datatypes import _convert_to_supported, from_table, to_table
+from ..datatypes import from_table, to_table
 from ..utils import _check_array
 from .basic_statistics import BaseBasicStatistics
@@ -70,10 +70,21 @@ class IncrementalBasicStatistics(BaseBasicStatistics):
         self._reset()
     def _reset(self):
+        self._need_to_finalize = False
         self._partial_result = self._get_backend(
             "basic_statistics", None, "partial_compute_result"
         )
+    def __getstate__(self):
+        # Since finalize_fit can't be dispatched without directly provided queue
+        # and the dispatching policy can't be serialized, the computation is finalized
+        # here and the policy is not saved in serialized data.
+        self.finalize_fit()
+        data = self.__dict__.copy()
+        data.pop("_queue", None)
+        return data
     def partial_fit(self, X, weights=None, queue=None):
         """
         Computes partial data for basic statistics
@@ -95,7 +106,6 @@ class IncrementalBasicStatistics(BaseBasicStatistics):
         """
         self._queue = queue
         policy = self._get_policy(queue, X)
-        X, weights = _convert_to_supported(policy, X, weights)
         X = _check_array(
             X, dtype=[np.float64, np.float32], ensure_2d=False, force_all_finite=False
@@ -112,7 +122,7 @@ class IncrementalBasicStatistics(BaseBasicStatistics):
             dtype = get_dtype(X)
             self._onedal_params = self._get_onedal_params(False, dtype=dtype)
-        X_table, weights_table = to_table(X, weights)
+        X_table, weights_table = to_table(X, weights, queue=queue)
         self._partial_result = self._get_backend(
             "basic_statistics",
             None,
@@ -124,6 +134,9 @@ class IncrementalBasicStatistics(BaseBasicStatistics):
             weights_table,
         )
+        self._need_to_finalize = True
+        return self
     def finalize_fit(self, queue=None):
         """
         Finalizes basic statistics computation and obtains result
@@ -139,22 +152,24 @@ class IncrementalBasicStatistics(BaseBasicStatistics):
         self : object
             Returns the instance itself.
         """
+        if self._need_to_finalize:
+            if queue is not None:
+                policy = self._get_policy(queue)
+            else:
+                policy = self._get_policy(self._queue)
+            result = self._get_backend(
+                "basic_statistics",
+                None,
+                "finalize_compute",
+                policy,
+                self._onedal_params,
+                self._partial_result,
+            )
+            options = self._get_result_options(self.options).split("|")
+            for opt in options:
+                setattr(self, opt, from_table(getattr(result, opt)).ravel())
-        if queue is not None:
-            policy = self._get_policy(queue)
-        else:
-            policy = self._get_policy(self._queue)
-        result = self._get_backend(
-            "basic_statistics",
-            None,
-            "finalize_compute",
-            policy,
-            self._onedal_params,
-            self._partial_result,
-        )
-        options = self._get_result_options(self.options).split("|")
-        for opt in options:
-            setattr(self, opt, from_table(getattr(result, opt)).ravel())
+            self._need_to_finalize = False
         return self

onedal/basic_statistics/tests/test_basic_statistics.py CHANGED Viewed

@@ -21,66 +21,9 @@ from scipy import sparse as sp
 from daal4py.sklearn._utils import daal_check_version
 from onedal.basic_statistics import BasicStatistics
+from onedal.basic_statistics.tests.utils import options_and_tests
 from onedal.tests.utils._device_selection import get_queues
-def expected_sum(X):
-    return np.sum(X, axis=0)
-def expected_max(X):
-    return np.max(X, axis=0)
-def expected_min(X):
-    return np.min(X, axis=0)
-def expected_mean(X):
-    return np.mean(X, axis=0)
-def expected_standard_deviation(X):
-    return np.std(X, axis=0)
-def expected_variance(X):
-    return np.var(X, axis=0)
-def expected_variation(X):
-    return expected_standard_deviation(X) / expected_mean(X)
-def expected_sum_squares(X):
-    return np.sum(np.square(X), axis=0)
-def expected_sum_squares_centered(X):
-    return np.sum(np.square(X - expected_mean(X)), axis=0)
-def expected_standard_deviation(X):
-    return np.sqrt(expected_variance(X))
-def expected_second_order_raw_moment(X):
-    return np.mean(np.square(X), axis=0)
-options_and_tests = [
-    ("sum", expected_sum, (5e-4, 1e-7)),
-    ("min", expected_min, (1e-7, 1e-7)),
-    ("max", expected_max, (1e-7, 1e-7)),
-    ("mean", expected_mean, (5e-7, 1e-7)),
-    ("variance", expected_variance, (2e-3, 2e-3)),
-    ("variation", expected_variation, (5e-2, 5e-2)),
-    ("sum_squares", expected_sum_squares, (2e-4, 1e-7)),
-    ("sum_squares_centered", expected_sum_squares_centered, (2e-4, 1e-7)),
-    ("standard_deviation", expected_standard_deviation, (2e-3, 2e-3)),
-    ("second_order_raw_moment", expected_second_order_raw_moment, (1e-6, 1e-7)),
-]
 options_and_tests_csr = [
     ("sum", "sum", (5e-6, 1e-9)),
     ("min", "min", (0, 0)),
@@ -90,15 +33,15 @@ options_and_tests_csr = [
 @pytest.mark.parametrize("queue", get_queues())
-@pytest.mark.parametrize("option", options_and_tests)
+@pytest.mark.parametrize("result_option", options_and_tests.keys())
 @pytest.mark.parametrize("row_count", [100, 1000])
 @pytest.mark.parametrize("column_count", [10, 100])
 @pytest.mark.parametrize("weighted", [True, False])
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
 def test_single_option_on_random_data(
-    queue, option, row_count, column_count, weighted, dtype
+    queue, result_option, row_count, column_count, weighted, dtype
 ):
-    result_option, function, tols = option
+    function, tols = options_and_tests[result_option]
     fp32tol, fp64tol = tols
     seed = 77
     gen = np.random.default_rng(seed)
@@ -150,15 +93,15 @@ def test_multiple_options_on_random_data(queue, row_count, column_count, weighte
     if weighted:
         weighted_data = np.diag(weights) @ data
         gtr_mean, gtr_max, gtr_sum = (
-            expected_mean(weighted_data),
-            expected_max(weighted_data),
-            expected_sum(weighted_data),
+            options_and_tests["mean"][0](weighted_data),
+            options_and_tests["max"][0](weighted_data),
+            options_and_tests["sum"][0](weighted_data),
         )
     else:
         gtr_mean, gtr_max, gtr_sum = (
-            expected_mean(data),
-            expected_max(data),
-            expected_sum(data),
+            options_and_tests["mean"][0](data),
+            options_and_tests["max"][0](data),
+            options_and_tests["sum"][0](data),
         )
     tol = 5e-4 if res_mean.dtype == np.float32 else 1e-7
@@ -190,8 +133,8 @@ def test_all_option_on_random_data(queue, row_count, column_count, weighted, dty
     if weighted:
         weighted_data = np.diag(weights) @ data
-    for option in options_and_tests:
-        result_option, function, tols = option
+    for result_option in options_and_tests:
+        function, tols = options_and_tests[result_option]
         fp32tol, fp64tol = tols
         res = getattr(result, result_option)
         if weighted:
@@ -203,12 +146,13 @@ def test_all_option_on_random_data(queue, row_count, column_count, weighted, dty
 @pytest.mark.parametrize("queue", get_queues())
-@pytest.mark.parametrize("option", options_and_tests)
+@pytest.mark.parametrize("result_option", options_and_tests.keys())
 @pytest.mark.parametrize("data_size", [100, 1000])
 @pytest.mark.parametrize("weighted", [True, False])
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
-def test_1d_input_on_random_data(queue, option, data_size, weighted, dtype):
-    result_option, function, tols = option
+def test_1d_input_on_random_data(queue, result_option, data_size, weighted, dtype):
+    function, tols = options_and_tests[result_option]
     fp32tol, fp64tol = tols
     seed = 77
     gen = np.random.default_rng(seed)

onedal/basic_statistics/tests/test_incremental_basic_statistics.py CHANGED Viewed

@@ -19,12 +19,8 @@ import pytest
 from numpy.testing import assert_allclose
 from onedal.basic_statistics import IncrementalBasicStatistics
-from onedal.basic_statistics.tests.test_basic_statistics import (
-    expected_max,
-    expected_mean,
-    expected_sum,
-    options_and_tests,
-)
+from onedal.basic_statistics.tests.utils import options_and_tests
+from onedal.datatypes import from_table
 from onedal.tests.utils._device_selection import get_queues
@@ -67,15 +63,15 @@ def test_multiple_options_on_gold_data(queue, weighted, dtype):
 @pytest.mark.parametrize("queue", get_queues())
 @pytest.mark.parametrize("num_batches", [2, 10])
-@pytest.mark.parametrize("option", options_and_tests)
+@pytest.mark.parametrize("result_option", options_and_tests.keys())
 @pytest.mark.parametrize("row_count", [100, 1000])
 @pytest.mark.parametrize("column_count", [10, 100])
 @pytest.mark.parametrize("weighted", [True, False])
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
 def test_single_option_on_random_data(
-    queue, num_batches, option, row_count, column_count, weighted, dtype
+    queue, num_batches, result_option, row_count, column_count, weighted, dtype
 ):
-    result_option, function, tols = option
+    function, tols = options_and_tests[result_option]
     fp32tol, fp64tol = tols
     seed = 77
     gen = np.random.default_rng(seed)
@@ -137,15 +133,15 @@ def test_multiple_options_on_random_data(
     if weighted:
         weighted_data = np.diag(weights) @ data
         gtr_mean, gtr_max, gtr_sum = (
-            expected_mean(weighted_data),
-            expected_max(weighted_data),
-            expected_sum(weighted_data),
+            options_and_tests["mean"][0](weighted_data),
+            options_and_tests["max"][0](weighted_data),
+            options_and_tests["sum"][0](weighted_data),
         )
     else:
         gtr_mean, gtr_max, gtr_sum = (
-            expected_mean(data),
-            expected_max(data),
-            expected_sum(data),
+            options_and_tests["mean"][0](data),
+            options_and_tests["max"][0](data),
+            options_and_tests["sum"][0](data),
         )
     tol = 3e-4 if res_mean.dtype == np.float32 else 1e-7
@@ -184,8 +180,8 @@ def test_all_option_on_random_data(
     if weighted:
         weighted_data = np.diag(weights) @ data
-    for option in options_and_tests:
-        result_option, function, tols = option
+    for result_option in options_and_tests:
+        function, tols = options_and_tests[result_option]
         fp32tol, fp64tol = tols
         res = getattr(result, result_option)
         if weighted:
@@ -194,3 +190,90 @@ def test_all_option_on_random_data(
             gtr = function(data)
         tol = fp32tol if res.dtype == np.float32 else fp64tol
         assert_allclose(gtr, res, atol=tol)
+@pytest.mark.parametrize("queue", get_queues())
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_incremental_estimator_pickle(queue, dtype):
+    import pickle
+    from onedal.basic_statistics import IncrementalBasicStatistics
+    incbs = IncrementalBasicStatistics()
+    # Check that estimator can be serialized without any data.
+    dump = pickle.dumps(incbs)
+    incbs_loaded = pickle.loads(dump)
+    seed = 77
+    gen = np.random.default_rng(seed)
+    X = gen.uniform(low=-0.3, high=+0.7, size=(10, 10))
+    X = X.astype(dtype)
+    X_split = np.array_split(X, 2)
+    incbs.partial_fit(X_split[0], queue=queue)
+    incbs_loaded.partial_fit(X_split[0], queue=queue)
+    assert incbs._need_to_finalize == True
+    assert incbs_loaded._need_to_finalize == True
+    # Check that estimator can be serialized after partial_fit call.
+    dump = pickle.dumps(incbs)
+    incbs_loaded = pickle.loads(dump)
+    assert incbs._need_to_finalize == False
+    # Finalize is called during serialization to make sure partial results are finalized correctly.
+    assert incbs_loaded._need_to_finalize == False
+    partial_n_rows = from_table(incbs._partial_result.partial_n_rows)
+    partial_n_rows_loaded = from_table(incbs_loaded._partial_result.partial_n_rows)
+    assert_allclose(partial_n_rows, partial_n_rows_loaded)
+    partial_min = from_table(incbs._partial_result.partial_min)
+    partial_min_loaded = from_table(incbs_loaded._partial_result.partial_min)
+    assert_allclose(partial_min, partial_min_loaded)
+    partial_max = from_table(incbs._partial_result.partial_max)
+    partial_max_loaded = from_table(incbs_loaded._partial_result.partial_max)
+    assert_allclose(partial_max, partial_max_loaded)
+    partial_sum = from_table(incbs._partial_result.partial_sum)
+    partial_sum_loaded = from_table(incbs_loaded._partial_result.partial_sum)
+    assert_allclose(partial_sum, partial_sum_loaded)
+    partial_sum_squares = from_table(incbs._partial_result.partial_sum_squares)
+    partial_sum_squares_loaded = from_table(
+        incbs_loaded._partial_result.partial_sum_squares
+    )
+    assert_allclose(partial_sum_squares, partial_sum_squares_loaded)
+    partial_sum_squares_centered = from_table(
+        incbs._partial_result.partial_sum_squares_centered
+    )
+    partial_sum_squares_centered_loaded = from_table(
+        incbs_loaded._partial_result.partial_sum_squares_centered
+    )
+    assert_allclose(partial_sum_squares_centered, partial_sum_squares_centered_loaded)
+    incbs.partial_fit(X_split[1], queue=queue)
+    incbs_loaded.partial_fit(X_split[1], queue=queue)
+    assert incbs._need_to_finalize == True
+    assert incbs_loaded._need_to_finalize == True
+    dump = pickle.dumps(incbs_loaded)
+    incbs_loaded = pickle.loads(dump)
+    assert incbs._need_to_finalize == True
+    assert incbs_loaded._need_to_finalize == False
+    incbs.finalize_fit()
+    incbs_loaded.finalize_fit()
+    # Check that finalized estimator can be serialized.
+    dump = pickle.dumps(incbs_loaded)
+    incbs_loaded = pickle.loads(dump)
+    for result_option in options_and_tests:
+        _, tols = options_and_tests[result_option]
+        fp32tol, fp64tol = tols
+        res = getattr(incbs, result_option)
+        res_loaded = getattr(incbs_loaded, result_option)
+        tol = fp32tol if res.dtype == np.float32 else fp64tol
+        assert_allclose(res, res_loaded, atol=tol)

onedal/basic_statistics/tests/utils.py ADDED Viewed

@@ -0,0 +1,50 @@
+# ===============================================================================
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+import numpy as np
+# Compute unbiased variation for the columns of array-like X
+def variation(X):
+    X_mean = np.mean(X, axis=0)
+    if np.all(X_mean):
+        # Avoid division by zero
+        return np.std(X, axis=0, ddof=1) / X_mean
+    else:
+        return np.array(
+            [
+                x / y if y != 0 else np.nan
+                for x, y in zip(np.std(X, axis=0, ddof=1), X_mean)
+            ]
+        )
+options_and_tests = {
+    "sum": (lambda X: np.sum(X, axis=0), (5e-4, 1e-7)),
+    "min": (lambda X: np.min(X, axis=0), (1e-7, 1e-7)),
+    "max": (lambda X: np.max(X, axis=0), (1e-7, 1e-7)),
+    "mean": (lambda X: np.mean(X, axis=0), (5e-7, 1e-7)),
+    # sklearnex computes unbiased variance and standard deviation that is why ddof=1
+    "variance": (lambda X: np.var(X, axis=0, ddof=1), (2e-4, 1e-7)),
+    "variation": (lambda X: variation(X), (1e-3, 1e-6)),
+    "sum_squares": (lambda X: np.sum(np.square(X), axis=0), (2e-4, 1e-7)),
+    "sum_squares_centered": (
+        lambda X: np.sum(np.square(X - np.mean(X, axis=0)), axis=0),
+        (1e-3, 1e-7),
+    ),
+    "standard_deviation": (lambda X: np.std(X, axis=0, ddof=1), (2e-3, 1e-7)),
+    "second_order_raw_moment": (lambda X: np.mean(np.square(X), axis=0), (1e-6, 1e-7)),
+}

onedal/cluster/dbscan.py CHANGED Viewed

@@ -20,7 +20,7 @@ from daal4py.sklearn._utils import get_dtype, make2d
 from ..common._base import BaseEstimator
 from ..common._mixin import ClusterMixin
-from ..datatypes import _convert_to_supported, from_table, to_table
+from ..datatypes import from_table, to_table
 from ..utils import _check_array
@@ -48,7 +48,7 @@ class BaseDBSCAN(BaseEstimator, ClusterMixin):
     def _get_onedal_params(self, dtype=np.float32):
         return {
-            "fptype": "float" if dtype == np.float32 else "double",
+            "fptype": dtype,
             "method": "by_default",
             "min_observations": int(self.min_samples),
             "epsilon": float(self.eps),
@@ -60,15 +60,10 @@ class BaseDBSCAN(BaseEstimator, ClusterMixin):
         policy = self._get_policy(queue, X)
         X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
         sample_weight = make2d(sample_weight) if sample_weight is not None else None
-        X = make2d(X)
+        X_table, sample_weight_table = to_table(X, sample_weight, queue=queue)
-        types = [np.float32, np.float64]
-        if get_dtype(X) not in types:
-            X = X.astype(np.float64)
-        X = _convert_to_supported(policy, X)
-        dtype = get_dtype(X)
-        params = self._get_onedal_params(dtype)
-        result = module.compute(policy, params, to_table(X), to_table(sample_weight))
+        params = self._get_onedal_params(X_table.dtype)
+        result = module.compute(policy, params, X_table, sample_weight_table)
         self.labels_ = from_table(result.responses).ravel()
         if result.core_observation_indices is not None:

onedal/cluster/kmeans.py CHANGED Viewed

@@ -34,7 +34,7 @@ from sklearn.utils import check_random_state
 from ..common._base import BaseEstimator as onedal_BaseEstimator
 from ..common._mixin import ClusterMixin, TransformerMixin
-from ..datatypes import _convert_to_supported, from_table, to_table
+from ..datatypes import from_table, to_table
 from ..utils import _check_array, _is_arraylike_not_scalar, _is_csr
@@ -145,7 +145,7 @@ class _BaseKMeans(onedal_BaseEstimator, TransformerMixin, ClusterMixin, ABC):
     def _get_onedal_params(self, is_csr=False, dtype=np.float32, result_options=None):
         thr = self._tol if hasattr(self, "_tol") else self.tol
         return {
-            "fptype": "float" if dtype == np.float32 else "double",
+            "fptype": dtype,
             "method": "lloyd_csr" if is_csr else "by_default",
             "seed": -1,
             "max_iteration_count": self.max_iter,
@@ -205,8 +205,7 @@ class _BaseKMeans(onedal_BaseEstimator, TransformerMixin, ClusterMixin, ABC):
             assert centers.shape[1] == X_table.column_count
             # KMeans is implemented on both CPU and GPU for Dense and CSR data
             # The original policy can be used here
-            centers = _convert_to_supported(policy, centers)
-            centers_table = to_table(centers)
+            centers_table = to_table(centers, queue=getattr(policy, "_queue", None))
         else:
             raise TypeError("Unsupported type of the `init` value")
@@ -240,16 +239,14 @@ class _BaseKMeans(onedal_BaseEstimator, TransformerMixin, ClusterMixin, ABC):
                 f"callable, got '{ init }' instead."
             )
-        centers = _convert_to_supported(policy, centers)
-        return to_table(centers)
+        return to_table(centers, queue=getattr(policy, "_queue", None))
     def _fit_backend(
         self, X_table, centroids_table, module, policy, dtype=np.float32, is_csr=False
     ):
         params = self._get_onedal_params(is_csr, dtype)
-        meta = _backend.get_table_metadata(X_table)
-        assert meta.get_npy_dtype(0) == dtype
+        assert X_table.dtype == dtype
         result = module.train(policy, params, X_table, centroids_table)
@@ -266,14 +263,11 @@ class _BaseKMeans(onedal_BaseEstimator, TransformerMixin, ClusterMixin, ABC):
         X = _check_array(
             X, dtype=[np.float64, np.float32], accept_sparse="csr", force_all_finite=False
         )
-        X = _convert_to_supported(policy, X)
-        dtype = get_dtype(X)
-        X_table = to_table(X)
+        X_table = to_table(X, queue=queue)
+        dtype = X_table.dtype
         self._check_params_vs_input(X_table, is_csr, policy, dtype=dtype)
-        params = self._get_onedal_params(is_csr, dtype)
         self.n_features_in_ = X_table.column_count
         best_model, best_n_iter = None, None
@@ -381,9 +375,8 @@ class _BaseKMeans(onedal_BaseEstimator, TransformerMixin, ClusterMixin, ABC):
         is_csr = _is_csr(X)
         policy = self._get_policy(queue, X)
-        X = _convert_to_supported(policy, X)
-        X_table, dtype = to_table(X), X.dtype
-        params = self._get_onedal_params(is_csr, dtype, result_options)
+        X_table = to_table(X, queue=queue)
+        params = self._get_onedal_params(is_csr, X_table.dtype, result_options)
         result = module.infer(policy, params, self.model_, X_table)