PyPI - scikit-learn-intelex - Versions diffs - 2025.1.0__py311-none-manylinux_2_28_x86_64.whl - Mend

scikit-learn-intelex 2025.1.0__py311-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (280) hide show

daal4py/__init__.py +73 -0
daal4py/__main__.py +58 -0
daal4py/_daal4py.cpython-311-x86_64-linux-gnu.so +0 -0
daal4py/doc/third-party-programs.txt +424 -0
daal4py/mb/__init__.py +19 -0
daal4py/mb/model_builders.py +377 -0
daal4py/mpi_transceiver.cpython-311-x86_64-linux-gnu.so +0 -0
daal4py/sklearn/__init__.py +40 -0
daal4py/sklearn/_n_jobs_support.py +248 -0
daal4py/sklearn/_utils.py +245 -0
daal4py/sklearn/cluster/__init__.py +20 -0
daal4py/sklearn/cluster/dbscan.py +165 -0
daal4py/sklearn/cluster/k_means.py +597 -0
daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
daal4py/sklearn/decomposition/__init__.py +19 -0
daal4py/sklearn/decomposition/_pca.py +524 -0
daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
daal4py/sklearn/ensemble/__init__.py +27 -0
daal4py/sklearn/ensemble/_forest.py +1397 -0
daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
daal4py/sklearn/linear_model/__init__.py +29 -0
daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
daal4py/sklearn/linear_model/_linear.py +272 -0
daal4py/sklearn/linear_model/_ridge.py +325 -0
daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
daal4py/sklearn/linear_model/linear.py +17 -0
daal4py/sklearn/linear_model/logistic_loss.py +195 -0
daal4py/sklearn/linear_model/logistic_path.py +1026 -0
daal4py/sklearn/linear_model/ridge.py +17 -0
daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
daal4py/sklearn/manifold/__init__.py +19 -0
daal4py/sklearn/manifold/_t_sne.py +405 -0
daal4py/sklearn/metrics/__init__.py +20 -0
daal4py/sklearn/metrics/_pairwise.py +236 -0
daal4py/sklearn/metrics/_ranking.py +210 -0
daal4py/sklearn/model_selection/__init__.py +19 -0
daal4py/sklearn/model_selection/_split.py +309 -0
daal4py/sklearn/model_selection/tests/test_split.py +56 -0
daal4py/sklearn/monkeypatch/__init__.py +0 -0
daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
daal4py/sklearn/neighbors/__init__.py +21 -0
daal4py/sklearn/neighbors/_base.py +503 -0
daal4py/sklearn/neighbors/_classification.py +139 -0
daal4py/sklearn/neighbors/_regression.py +74 -0
daal4py/sklearn/neighbors/_unsupervised.py +55 -0
daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
daal4py/sklearn/svm/__init__.py +19 -0
daal4py/sklearn/svm/svm.py +734 -0
daal4py/sklearn/utils/__init__.py +21 -0
daal4py/sklearn/utils/base.py +75 -0
daal4py/sklearn/utils/tests/test_utils.py +51 -0
daal4py/sklearn/utils/validation.py +693 -0
onedal/__init__.py +83 -0
onedal/_config.py +54 -0
onedal/_device_offload.py +222 -0
onedal/_onedal_py_dpc.cpython-311-x86_64-linux-gnu.so +0 -0
onedal/_onedal_py_host.cpython-311-x86_64-linux-gnu.so +0 -0
onedal/_onedal_py_spmd_dpc.cpython-311-x86_64-linux-gnu.so +0 -0
onedal/basic_statistics/__init__.py +20 -0
onedal/basic_statistics/basic_statistics.py +107 -0
onedal/basic_statistics/incremental_basic_statistics.py +160 -0
onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
onedal/cluster/__init__.py +27 -0
onedal/cluster/dbscan.py +110 -0
onedal/cluster/kmeans.py +564 -0
onedal/cluster/kmeans_init.py +115 -0
onedal/cluster/tests/test_dbscan.py +125 -0
onedal/cluster/tests/test_kmeans.py +88 -0
onedal/cluster/tests/test_kmeans_init.py +93 -0
onedal/common/_base.py +38 -0
onedal/common/_estimator_checks.py +47 -0
onedal/common/_mixin.py +62 -0
onedal/common/_policy.py +59 -0
onedal/common/_spmd_policy.py +30 -0
onedal/common/hyperparameters.py +125 -0
onedal/common/tests/test_policy.py +76 -0
onedal/covariance/__init__.py +20 -0
onedal/covariance/covariance.py +125 -0
onedal/covariance/incremental_covariance.py +146 -0
onedal/covariance/tests/test_covariance.py +50 -0
onedal/covariance/tests/test_incremental_covariance.py +122 -0
onedal/datatypes/__init__.py +19 -0
onedal/datatypes/_data_conversion.py +154 -0
onedal/datatypes/tests/common.py +126 -0
onedal/datatypes/tests/test_data.py +414 -0
onedal/decomposition/__init__.py +20 -0
onedal/decomposition/incremental_pca.py +204 -0
onedal/decomposition/pca.py +186 -0
onedal/decomposition/tests/test_incremental_pca.py +198 -0
onedal/ensemble/__init__.py +29 -0
onedal/ensemble/forest.py +727 -0
onedal/ensemble/tests/test_random_forest.py +97 -0
onedal/linear_model/__init__.py +27 -0
onedal/linear_model/incremental_linear_model.py +258 -0
onedal/linear_model/linear_model.py +329 -0
onedal/linear_model/logistic_regression.py +249 -0
onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
onedal/linear_model/tests/test_linear_regression.py +250 -0
onedal/linear_model/tests/test_logistic_regression.py +95 -0
onedal/linear_model/tests/test_ridge.py +95 -0
onedal/neighbors/__init__.py +19 -0
onedal/neighbors/neighbors.py +767 -0
onedal/neighbors/tests/test_knn_classification.py +49 -0
onedal/primitives/__init__.py +27 -0
onedal/primitives/get_tree.py +25 -0
onedal/primitives/kernel_functions.py +153 -0
onedal/primitives/tests/test_kernel_functions.py +159 -0
onedal/spmd/__init__.py +25 -0
onedal/spmd/_base.py +30 -0
onedal/spmd/basic_statistics/__init__.py +20 -0
onedal/spmd/basic_statistics/basic_statistics.py +30 -0
onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
onedal/spmd/cluster/__init__.py +28 -0
onedal/spmd/cluster/dbscan.py +23 -0
onedal/spmd/cluster/kmeans.py +56 -0
onedal/spmd/covariance/__init__.py +20 -0
onedal/spmd/covariance/covariance.py +26 -0
onedal/spmd/covariance/incremental_covariance.py +82 -0
onedal/spmd/decomposition/__init__.py +20 -0
onedal/spmd/decomposition/incremental_pca.py +117 -0
onedal/spmd/decomposition/pca.py +26 -0
onedal/spmd/ensemble/__init__.py +19 -0
onedal/spmd/ensemble/forest.py +28 -0
onedal/spmd/linear_model/__init__.py +21 -0
onedal/spmd/linear_model/incremental_linear_model.py +97 -0
onedal/spmd/linear_model/linear_model.py +30 -0
onedal/spmd/linear_model/logistic_regression.py +38 -0
onedal/spmd/neighbors/__init__.py +19 -0
onedal/spmd/neighbors/neighbors.py +75 -0
onedal/svm/__init__.py +19 -0
onedal/svm/svm.py +556 -0
onedal/svm/tests/test_csr_svm.py +351 -0
onedal/svm/tests/test_nusvc.py +204 -0
onedal/svm/tests/test_nusvr.py +210 -0
onedal/svm/tests/test_svc.py +176 -0
onedal/svm/tests/test_svr.py +243 -0
onedal/tests/test_common.py +57 -0
onedal/tests/utils/_dataframes_support.py +162 -0
onedal/tests/utils/_device_selection.py +102 -0
onedal/utils/__init__.py +49 -0
onedal/utils/_array_api.py +81 -0
onedal/utils/_dpep_helpers.py +56 -0
onedal/utils/validation.py +440 -0
scikit_learn_intelex-2025.1.0.dist-info/LICENSE.txt +202 -0
scikit_learn_intelex-2025.1.0.dist-info/METADATA +231 -0
scikit_learn_intelex-2025.1.0.dist-info/RECORD +280 -0
scikit_learn_intelex-2025.1.0.dist-info/WHEEL +5 -0
scikit_learn_intelex-2025.1.0.dist-info/top_level.txt +3 -0
sklearnex/__init__.py +66 -0
sklearnex/__main__.py +58 -0
sklearnex/_config.py +116 -0
sklearnex/_device_offload.py +126 -0
sklearnex/_utils.py +132 -0
sklearnex/basic_statistics/__init__.py +20 -0
sklearnex/basic_statistics/basic_statistics.py +230 -0
sklearnex/basic_statistics/incremental_basic_statistics.py +345 -0
sklearnex/basic_statistics/tests/test_basic_statistics.py +270 -0
sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +404 -0
sklearnex/cluster/__init__.py +20 -0
sklearnex/cluster/dbscan.py +197 -0
sklearnex/cluster/k_means.py +395 -0
sklearnex/cluster/tests/test_dbscan.py +38 -0
sklearnex/cluster/tests/test_kmeans.py +159 -0
sklearnex/conftest.py +82 -0
sklearnex/covariance/__init__.py +19 -0
sklearnex/covariance/incremental_covariance.py +398 -0
sklearnex/covariance/tests/test_incremental_covariance.py +237 -0
sklearnex/decomposition/__init__.py +19 -0
sklearnex/decomposition/pca.py +425 -0
sklearnex/decomposition/tests/test_pca.py +58 -0
sklearnex/dispatcher.py +543 -0
sklearnex/doc/third-party-programs.txt +424 -0
sklearnex/ensemble/__init__.py +29 -0
sklearnex/ensemble/_forest.py +2029 -0
sklearnex/ensemble/tests/test_forest.py +135 -0
sklearnex/glob/__main__.py +72 -0
sklearnex/glob/dispatcher.py +101 -0
sklearnex/linear_model/__init__.py +32 -0
sklearnex/linear_model/coordinate_descent.py +30 -0
sklearnex/linear_model/incremental_linear.py +482 -0
sklearnex/linear_model/incremental_ridge.py +425 -0
sklearnex/linear_model/linear.py +341 -0
sklearnex/linear_model/logistic_regression.py +413 -0
sklearnex/linear_model/ridge.py +24 -0
sklearnex/linear_model/tests/test_incremental_linear.py +207 -0
sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
sklearnex/linear_model/tests/test_linear.py +167 -0
sklearnex/linear_model/tests/test_logreg.py +134 -0
sklearnex/manifold/__init__.py +19 -0
sklearnex/manifold/t_sne.py +21 -0
sklearnex/manifold/tests/test_tsne.py +26 -0
sklearnex/metrics/__init__.py +23 -0
sklearnex/metrics/pairwise.py +22 -0
sklearnex/metrics/ranking.py +20 -0
sklearnex/metrics/tests/test_metrics.py +39 -0
sklearnex/model_selection/__init__.py +21 -0
sklearnex/model_selection/split.py +22 -0
sklearnex/model_selection/tests/test_model_selection.py +34 -0
sklearnex/neighbors/__init__.py +27 -0
sklearnex/neighbors/_lof.py +236 -0
sklearnex/neighbors/common.py +310 -0
sklearnex/neighbors/knn_classification.py +231 -0
sklearnex/neighbors/knn_regression.py +207 -0
sklearnex/neighbors/knn_unsupervised.py +178 -0
sklearnex/neighbors/tests/test_neighbors.py +82 -0
sklearnex/preview/__init__.py +17 -0
sklearnex/preview/covariance/__init__.py +19 -0
sklearnex/preview/covariance/covariance.py +138 -0
sklearnex/preview/covariance/tests/test_covariance.py +66 -0
sklearnex/preview/decomposition/__init__.py +19 -0
sklearnex/preview/decomposition/incremental_pca.py +233 -0
sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
sklearnex/preview/linear_model/__init__.py +19 -0
sklearnex/preview/linear_model/ridge.py +424 -0
sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
sklearnex/spmd/__init__.py +25 -0
sklearnex/spmd/basic_statistics/__init__.py +20 -0
sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
sklearnex/spmd/cluster/__init__.py +30 -0
sklearnex/spmd/cluster/dbscan.py +50 -0
sklearnex/spmd/cluster/kmeans.py +21 -0
sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
sklearnex/spmd/covariance/__init__.py +20 -0
sklearnex/spmd/covariance/covariance.py +21 -0
sklearnex/spmd/covariance/incremental_covariance.py +37 -0
sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
sklearnex/spmd/decomposition/__init__.py +20 -0
sklearnex/spmd/decomposition/incremental_pca.py +30 -0
sklearnex/spmd/decomposition/pca.py +21 -0
sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
sklearnex/spmd/ensemble/__init__.py +19 -0
sklearnex/spmd/ensemble/forest.py +71 -0
sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
sklearnex/spmd/linear_model/__init__.py +21 -0
sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
sklearnex/spmd/linear_model/linear_model.py +21 -0
sklearnex/spmd/linear_model/logistic_regression.py +21 -0
sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
sklearnex/spmd/neighbors/__init__.py +19 -0
sklearnex/spmd/neighbors/neighbors.py +25 -0
sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
sklearnex/svm/__init__.py +29 -0
sklearnex/svm/_common.py +339 -0
sklearnex/svm/nusvc.py +371 -0
sklearnex/svm/nusvr.py +170 -0
sklearnex/svm/svc.py +399 -0
sklearnex/svm/svr.py +167 -0
sklearnex/svm/tests/test_svm.py +93 -0
sklearnex/tests/test_common.py +390 -0
sklearnex/tests/test_config.py +123 -0
sklearnex/tests/test_memory_usage.py +379 -0
sklearnex/tests/test_monkeypatch.py +276 -0
sklearnex/tests/test_n_jobs_support.py +108 -0
sklearnex/tests/test_parallel.py +48 -0
sklearnex/tests/test_patching.py +385 -0
sklearnex/tests/test_run_to_run_stability.py +321 -0
sklearnex/tests/utils/__init__.py +44 -0
sklearnex/tests/utils/base.py +371 -0
sklearnex/tests/utils/spmd.py +198 -0
sklearnex/utils/__init__.py +19 -0
sklearnex/utils/_array_api.py +82 -0
sklearnex/utils/parallel.py +59 -0
sklearnex/utils/tests/test_finite.py +89 -0
sklearnex/utils/validation.py +17 -0

onedal/cluster/tests/test_dbscan.py ADDED Viewed

@@ -0,0 +1,125 @@
+# ===============================================================================
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+import numpy as np
+import pytest
+from sklearn.cluster import DBSCAN as DBSCAN_SKLEARN
+from sklearn.cluster.tests.common import generate_clustered_data
+from onedal.cluster import DBSCAN as ONEDAL_DBSCAN
+from onedal.tests.utils._device_selection import get_queues
+def generate_data(
+    low: int, high: int, samples_number: int, sample_dimension: tuple
+) -> tuple:
+    generator = np.random.RandomState()
+    table_size = (samples_number, sample_dimension)
+    return generator.uniform(low=low, high=high, size=table_size), generator.uniform(
+        size=samples_number
+    )
+def check_labels_equals(left_labels: np.ndarray, right_labels: np.ndarray) -> bool:
+    if left_labels.shape != right_labels.shape:
+        raise Exception("Shapes not equal")
+    if len(left_labels.shape) != 1:
+        raise Exception("Shapes size not equals 1")
+    if len(set(left_labels)) != len(set(right_labels)):
+        raise Exception("Cluster counts not equal")
+    dict_checker = {}
+    for index_sample in range(left_labels.shape[0]):
+        if left_labels[index_sample] not in dict_checker:
+            dict_checker[left_labels[index_sample]] = right_labels[index_sample]
+        elif dict_checker[left_labels[index_sample]] != right_labels[index_sample]:
+            raise Exception("Wrong clustering")
+    return True
+def _test_dbscan_big_data_numpy_gen(
+    queue,
+    eps: float,
+    min_samples: int,
+    metric: str,
+    use_weights: bool,
+    low=-100.0,
+    high=100.0,
+    samples_number=1000,
+    sample_dimension=4,
+):
+    data, weights = generate_data(
+        low=low,
+        high=high,
+        samples_number=samples_number,
+        sample_dimension=sample_dimension,
+    )
+    if use_weights is False:
+        weights = None
+    initialized_daal_dbscan = ONEDAL_DBSCAN(
+        eps=eps, min_samples=min_samples, metric=metric
+    ).fit(X=data, sample_weight=weights, queue=queue)
+    initialized_sklearn_dbscan = DBSCAN_SKLEARN(
+        metric=metric, eps=eps, min_samples=min_samples
+    ).fit(X=data, sample_weight=weights)
+    check_labels_equals(
+        initialized_daal_dbscan.labels_, initialized_sklearn_dbscan.labels_
+    )
+@pytest.mark.parametrize(
+    "metric",
+    [
+        "euclidean",
+    ],
+)
+@pytest.mark.parametrize("use_weights", [True, False])
+@pytest.mark.parametrize("queue", get_queues())
+def test_dbscan_big_data_numpy_gen(queue, metric, use_weights: bool):
+    eps = 35.0
+    min_samples = 6
+    _test_dbscan_big_data_numpy_gen(
+        queue, eps=eps, min_samples=min_samples, metric=metric, use_weights=use_weights
+    )
+def _test_across_grid_parameter_numpy_gen(queue, metric, use_weights: bool):
+    eps_begin = 0.05
+    eps_end = 0.5
+    eps_step = 0.05
+    min_samples_begin = 5
+    min_samples_end = 15
+    min_samples_step = 1
+    for eps in np.arange(eps_begin, eps_end, eps_step):
+        for min_samples in range(min_samples_begin, min_samples_end, min_samples_step):
+            _test_dbscan_big_data_numpy_gen(
+                queue,
+                eps=eps,
+                min_samples=min_samples,
+                metric=metric,
+                use_weights=use_weights,
+            )
+@pytest.mark.parametrize(
+    "metric",
+    [
+        "euclidean",
+    ],
+)
+@pytest.mark.parametrize("use_weights", [True, False])
+@pytest.mark.parametrize("queue", get_queues())
+def test_across_grid_parameter_numpy_gen(queue, metric, use_weights: bool):
+    _test_across_grid_parameter_numpy_gen(queue, metric=metric, use_weights=use_weights)

onedal/cluster/tests/test_kmeans.py ADDED Viewed

@@ -0,0 +1,88 @@
+# ==============================================================================
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+from daal4py.sklearn._utils import daal_check_version
+if daal_check_version((2023, "P", 200)):
+    from sklearn.cluster import kmeans_plusplus as init_external
+    from sklearn.neighbors import NearestNeighbors
+    from onedal.cluster import KMeans
+    from onedal.cluster import kmeans_plusplus as init_internal
+    from onedal.tests.utils._device_selection import get_queues
+    def generate_dataset(n_dim, n_cluster, n_points=None, seed=777, dtype=np.float32):
+        # We need some reference value of points for each cluster
+        n_points = (n_dim * n_cluster) if n_points is None else n_points
+        # Creating generator and generating cluster points
+        gen = np.random.Generator(np.random.MT19937(seed))
+        cs = gen.uniform(low=-1.0, high=+1.0, size=(n_cluster, n_dim))
+        # Finding variances for each cluster using 3 sigma criteria
+        # It ensures that point is in the Voronoi cell of cluster
+        nn = NearestNeighbors(n_neighbors=2)
+        d, i = nn.fit(cs).kneighbors(cs)
+        assert_array_equal(i[:, 0], np.arange(n_cluster))
+        vs = d[:, 1] / 3
+        # Generating dataset
+        def gen_one(c):
+            params = {"loc": cs[c, :], "scale": vs[c], "size": (n_points, n_dim)}
+            return gen.normal(**params)
+        data = [gen_one(c) for c in range(n_cluster)]
+        data = np.concatenate(data, axis=0)
+        gen.shuffle(data, axis=0)
+        data = data.astype(dtype)
+        return (cs, vs, data)
+    @pytest.mark.parametrize("queue", get_queues())
+    @pytest.mark.parametrize("dtype", [np.float32, np.float64])
+    @pytest.mark.parametrize("n_dim", [3, 4, 17, 24])
+    @pytest.mark.parametrize("n_cluster", [9, 11, 32])
+    @pytest.mark.parametrize("pipeline", ["implicit", "external", "internal"])
+    def test_generated_dataset(queue, dtype, n_dim, n_cluster, pipeline):
+        seed = 777 * n_dim * n_cluster
+        cs, vs, X = generate_dataset(n_dim, n_cluster, seed=seed, dtype=dtype)
+        if pipeline == "external":
+            init_data, _ = init_external(X, n_cluster)
+            m = KMeans(n_cluster, init=init_data, max_iter=5)
+        elif pipeline == "internal":
+            init_data, _ = init_internal(X, n_cluster, queue=queue)
+            m = KMeans(n_cluster, init=init_data, max_iter=5)
+        else:
+            m = KMeans(n_cluster, init="k-means++", max_iter=5)
+        m.fit(X, queue=queue)
+        rs_centroids = m.cluster_centers_
+        nn = NearestNeighbors(n_neighbors=1)
+        d, i = nn.fit(rs_centroids).kneighbors(cs)
+        # We have applied 3 sigma rule once
+        desired_accuracy = int(0.9973 * n_cluster)
+        correctness = d.reshape(-1) <= (vs * 3)
+        exp_accuracy = np.count_nonzero(correctness)
+        # TODO: investigate accuracy with kmeans++ init and remove - 1
+        assert desired_accuracy - 1 <= exp_accuracy

onedal/cluster/tests/test_kmeans_init.py ADDED Viewed

@@ -0,0 +1,93 @@
+# ===============================================================================
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+from daal4py.sklearn._utils import daal_check_version
+if daal_check_version((2023, "P", 200)):
+    from sklearn.datasets import load_breast_cancer
+    from sklearn.metrics import davies_bouldin_score
+    from onedal.cluster import KMeans, kmeans_plusplus
+    from onedal.tests.utils._device_selection import get_queues
+    @pytest.mark.parametrize("queue", get_queues())
+    @pytest.mark.parametrize("dtype", [np.float32, np.float64])
+    @pytest.mark.parametrize("n_cluster", [2, 5, 11, 128])
+    def test_breast_cancer(queue, dtype, n_cluster):
+        X, _ = load_breast_cancer(return_X_y=True)
+        X = np.asarray(X).astype(dtype=dtype)
+        init_data, _ = kmeans_plusplus(X, n_cluster, random_state=777, queue=queue)
+        m = KMeans(n_cluster, init=init_data, max_iter=1)
+        res = davies_bouldin_score(X, m.fit(X).predict(X))
+        thr = 0.45 if n_cluster < 20 else 0.55
+        assert res > thr
+    from sklearn.neighbors import NearestNeighbors
+    def generate_dataset(n_dim, n_cluster, n_points=None, seed=777, dtype=np.float32):
+        # We need some reference value of points for each cluster
+        n_points = (n_dim * n_cluster) if n_points is None else n_points
+        # Creating generator and generating cluster points
+        gen = np.random.Generator(np.random.MT19937(seed))
+        cs = gen.uniform(low=-1.0, high=+1.0, size=(n_cluster, n_dim))
+        # Finding variances for each cluster using 3 sigma criteria
+        # It ensures that point is in the Voronoi cell of cluster
+        nn = NearestNeighbors(n_neighbors=2)
+        d, i = nn.fit(cs).kneighbors(cs)
+        assert_array_equal(i[:, 0], np.arange(n_cluster))
+        vs = d[:, 1] / 3
+        # Generating dataset
+        def gen_one(c):
+            params = {"loc": cs[c, :], "scale": vs[c], "size": (n_points, n_dim)}
+            return gen.normal(**params)
+        data = [gen_one(c) for c in range(n_cluster)]
+        data = np.concatenate(data, axis=0)
+        gen.shuffle(data, axis=0)
+        data = data.astype(dtype)
+        return (cs, vs, data)
+    @pytest.mark.parametrize("queue", get_queues())
+    @pytest.mark.parametrize("dtype", [np.float32, np.float64])
+    @pytest.mark.parametrize("n_dim", [3, 12, 17])
+    @pytest.mark.parametrize("n_cluster", [2, 15, 61])
+    def test_generated_dataset(queue, dtype, n_dim, n_cluster):
+        seed = 777 * n_dim * n_cluster
+        cs, vs, X = generate_dataset(n_dim, n_cluster, seed=seed, dtype=dtype)
+        init_data, _ = kmeans_plusplus(X, n_cluster, random_state=seed, queue=queue)
+        m = KMeans(n_cluster, init=init_data, max_iter=3, algorithm="lloyd").fit(X)
+        rs_centroids = m.cluster_centers_
+        nn = NearestNeighbors(n_neighbors=1)
+        d, i = nn.fit(rs_centroids).kneighbors(cs)
+        # We have applied 2 sigma rule once
+        desired_accuracy = int(0.9973 * n_cluster)
+        if d.dtype == np.float64:
+            desired_accuracy = desired_accuracy - 1
+        correctness = d.reshape(-1) <= (vs * 3)
+        exp_accuracy = np.count_nonzero(correctness)
+        assert desired_accuracy <= exp_accuracy

onedal/common/_base.py ADDED Viewed

@@ -0,0 +1,38 @@
+# ==============================================================================
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from abc import ABC
+from onedal import _backend
+from ._policy import _get_policy
+def _get_backend(backend, module, submodule=None, method=None, *args, **kwargs):
+    result = getattr(backend, module)
+    if submodule:
+        result = getattr(result, submodule)
+    if method:
+        return getattr(result, method)(*args, **kwargs)
+    return result
+class BaseEstimator(ABC):
+    def _get_backend(self, module, submodule=None, method=None, *args, **kwargs):
+        return _get_backend(_backend, module, submodule, method, *args, **kwargs)
+    def _get_policy(self, queue, *data):
+        return _get_policy(queue, *data)

onedal/common/_estimator_checks.py ADDED Viewed

@@ -0,0 +1,47 @@
+# ===============================================================================
+# Copyright 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+def _check_is_fitted(estimator, attributes=None, *, msg=None):
+    if msg is None:
+        msg = (
+            "This %(name)s instance is not fitted yet. Call 'fit' with "
+            "appropriate arguments before using this estimator."
+        )
+    if not (
+        hasattr(estimator, "fit")
+        or (hasattr(estimator, "partial_fit") and hasattr(estimator, "finalize_fit"))
+    ):
+        raise TypeError("%s is not an estimator instance." % (estimator))
+    if attributes is not None:
+        if not isinstance(attributes, (list, tuple)):
+            attributes = [attributes]
+        attrs = all([hasattr(estimator, attr) for attr in attributes])
+    else:
+        attrs = [v for v in vars(estimator) if v.endswith("_") and not v.startswith("__")]
+    if not attrs:
+        raise AttributeError(msg % {"name": type(estimator).__name__})
+def _is_classifier(estimator):
+    return getattr(estimator, "_estimator_type", None) == "classifier"
+def _is_regressor(estimator):
+    return getattr(estimator, "_estimator_type", None) == "regressor"

onedal/common/_mixin.py ADDED Viewed

@@ -0,0 +1,62 @@
+# ==============================================================================
+# Copyright 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+class ClusterMixin:
+    _estimator_type = "clusterer"
+    def fit_predict(self, X, y=None, queue=None, **kwargs):
+        self.fit(X, queue=queue, **kwargs)
+        return self.labels_
+    def _more_tags(self):
+        return {"preserves_dtype": []}
+class ClassifierMixin:
+    _estimator_type = "classifier"
+    def score(self, X, y, sample_weight=None, queue=None):
+        from sklearn.metrics import accuracy_score
+        return accuracy_score(
+            y, self.predict(X, queue=queue), sample_weight=sample_weight
+        )
+    def _more_tags(self):
+        return {"requires_y": True}
+class RegressorMixin:
+    _estimator_type = "regressor"
+    def score(self, X, y, sample_weight=None, queue=None):
+        from sklearn.metrics import r2_score
+        return r2_score(y, self.predict(X, queue=queue), sample_weight=sample_weight)
+    def _more_tags(self):
+        return {"requires_y": True}
+class TransformerMixin:
+    _estimator_type = "transformer"
+    def fit_transform(self, X, y=None, queue=None, **fit_params):
+        if y is None:
+            return self.fit(X, queue=queue, **fit_params).transform(X, queue=queue)
+        else:
+            return self.fit(X, y, queue=queue, **fit_params).transform(X, queue=queue)

onedal/common/_policy.py ADDED Viewed

@@ -0,0 +1,59 @@
+# ==============================================================================
+# Copyright 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import sys
+from onedal import _backend, _is_dpc_backend
+def _get_policy(queue, *data):
+    data_queue = _get_queue(*data)
+    if _is_dpc_backend:
+        if queue is None:
+            if data_queue is None:
+                return _HostInteropPolicy()
+            return _DataParallelInteropPolicy(data_queue)
+        return _DataParallelInteropPolicy(queue)
+    else:
+        if not (data_queue is None and queue is None):
+            raise RuntimeError(
+                "Operation using the requested SYCL queue requires the DPC backend"
+            )
+        return _HostInteropPolicy()
+def _get_queue(*data):
+    if len(data) > 0 and hasattr(data[0], "__sycl_usm_array_interface__"):
+        # Assume that all data reside on the same device
+        return data[0].__sycl_usm_array_interface__["syclobj"]
+    return None
+class _HostInteropPolicy(_backend.host_policy):
+    def __init__(self):
+        super().__init__()
+if _is_dpc_backend:
+    from onedal._device_offload import DummySyclQueue
+    class _DataParallelInteropPolicy(_backend.data_parallel_policy):
+        def __init__(self, queue):
+            self._queue = queue
+            if isinstance(queue, DummySyclQueue):
+                super().__init__(self._queue.sycl_device.get_filter_string())
+                return
+            super().__init__(self._queue)

onedal/common/_spmd_policy.py ADDED Viewed

@@ -0,0 +1,30 @@
+# ==============================================================================
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from onedal import _is_spmd_backend
+if _is_spmd_backend:
+    from onedal import _spmd_backend
+    class _SPMDDataParallelInteropPolicy(_spmd_backend.spmd_data_parallel_policy):
+        def __init__(self, queue):
+            self._queue = queue
+            super().__init__(self._queue)
+    def _get_spmd_policy(queue):
+        # TODO:
+        # cases when queue is None
+        return _SPMDDataParallelInteropPolicy(queue)

onedal/common/hyperparameters.py ADDED Viewed

@@ -0,0 +1,125 @@
+# ==============================================================================
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import logging
+from typing import Any, Dict, Tuple
+from warnings import warn
+from daal4py.sklearn._utils import daal_check_version
+from onedal import _backend
+if not daal_check_version((2024, "P", 0)):
+    warn("Hyperparameters are supported in oneDAL starting from 2024.0.0 version.")
+    hyperparameters_map = {}
+else:
+    _hparams_reserved_words = [
+        "algorithm",
+        "op",
+        "setters",
+        "getters",
+        "backend",
+        "is_default",
+        "to_dict",
+    ]
+    class HyperParameters:
+        """Class for simplified interaction with oneDAL hyperparameters.
+        Overrides `__getattribute__` and `__setattr__` to utilize getters and setters
+        of hyperparameter class from onedal backend.
+        """
+        def __init__(self, algorithm, op, setters, getters, backend):
+            self.algorithm = algorithm
+            self.op = op
+            self.setters = setters
+            self.getters = getters
+            self.backend = backend
+            self.is_default = True
+        def __getattribute__(self, __name):
+            if __name in _hparams_reserved_words:
+                if __name == "backend":
+                    # `backend` attribute accessed only for oneDAL kernel calls
+                    logging.getLogger("sklearnex").debug(
+                        "Using next hyperparameters for "
+                        f"'{self.algorithm}.{self.op}': {self.to_dict()}"
+                    )
+                return super().__getattribute__(__name)
+            elif __name in self.getters.keys():
+                return self.getters[__name]()
+            try:
+                # try to return attribute from base class
+                # required to read builtin attributes like __class__, __doc__, etc.
+                # which are used in debuggers
+                return super().__getattribute__(__name)
+            except AttributeError:
+                # raise an AttributeError with a hyperparameter-specific message
+                # for easier debugging
+                raise AttributeError(
+                    f"Unknown attribute '{__name}' in "
+                    f"'{self.algorithm}.{self.op}' hyperparameters"
+                )
+        def __setattr__(self, __name, __value):
+            if __name in _hparams_reserved_words:
+                super().__setattr__(__name, __value)
+            elif __name in self.setters.keys():
+                self.is_default = False
+                self.setters[__name](__value)
+            else:
+                raise ValueError(
+                    f"Unknown attribute '{__name}' in "
+                    f"'{self.algorithm}.{self.op}' hyperparameters"
+                )
+        def to_dict(self):
+            return {name: getter() for name, getter in self.getters.items()}
+    def get_methods_with_prefix(obj, prefix):
+        return {
+            method.replace(prefix, ""): getattr(obj, method)
+            for method in filter(lambda f: f.startswith(prefix), dir(obj))
+        }
+    hyperparameters_backend: Dict[Tuple[str, str], Any] = {
+        (
+            "linear_regression",
+            "train",
+        ): _backend.linear_model.regression.train_hyperparameters(),
+        ("covariance", "compute"): _backend.covariance.compute_hyperparameters(),
+    }
+    if daal_check_version((2024, "P", 300)):
+        df_infer_hp = _backend.decision_forest.infer_hyperparameters
+        hyperparameters_backend[("decision_forest", "infer")] = df_infer_hp()
+    hyperparameters_map = {}
+    for (algorithm, op), hyperparameters in hyperparameters_backend.items():
+        setters = get_methods_with_prefix(hyperparameters, "set_")
+        getters = get_methods_with_prefix(hyperparameters, "get_")
+        if set(setters.keys()) != set(getters.keys()):
+            raise ValueError(
+                f"Setters and getters in '{algorithm}.{op}' "
+                "hyperparameters wrapper do not correspond."
+            )
+        hyperparameters_map[(algorithm, op)] = HyperParameters(
+            algorithm, op, setters, getters, hyperparameters
+        )
+def get_hyperparameters(algorithm, op):
+    return hyperparameters_map.get((algorithm, op), None)