PyPI - scikit-learn-intelex - Versions diffs - 2025.4.0__py313-none-manylinux_2_28_x86_64.whl - Mend

scikit-learn-intelex 2025.4.0__py313-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (282) hide show

daal4py/__init__.py +73 -0
daal4py/__main__.py +58 -0
daal4py/_daal4py.cpython-313-x86_64-linux-gnu.so +0 -0
daal4py/doc/third-party-programs.txt +424 -0
daal4py/mb/__init__.py +19 -0
daal4py/mb/model_builders.py +377 -0
daal4py/mpi_transceiver.cpython-313-x86_64-linux-gnu.so +0 -0
daal4py/sklearn/__init__.py +40 -0
daal4py/sklearn/_n_jobs_support.py +248 -0
daal4py/sklearn/_utils.py +245 -0
daal4py/sklearn/cluster/__init__.py +20 -0
daal4py/sklearn/cluster/dbscan.py +165 -0
daal4py/sklearn/cluster/k_means.py +597 -0
daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
daal4py/sklearn/decomposition/__init__.py +19 -0
daal4py/sklearn/decomposition/_pca.py +524 -0
daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
daal4py/sklearn/ensemble/__init__.py +27 -0
daal4py/sklearn/ensemble/_forest.py +1397 -0
daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
daal4py/sklearn/linear_model/__init__.py +29 -0
daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
daal4py/sklearn/linear_model/_linear.py +272 -0
daal4py/sklearn/linear_model/_ridge.py +325 -0
daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
daal4py/sklearn/linear_model/linear.py +17 -0
daal4py/sklearn/linear_model/logistic_loss.py +195 -0
daal4py/sklearn/linear_model/logistic_path.py +1026 -0
daal4py/sklearn/linear_model/ridge.py +17 -0
daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
daal4py/sklearn/manifold/__init__.py +19 -0
daal4py/sklearn/manifold/_t_sne.py +405 -0
daal4py/sklearn/metrics/__init__.py +20 -0
daal4py/sklearn/metrics/_pairwise.py +236 -0
daal4py/sklearn/metrics/_ranking.py +210 -0
daal4py/sklearn/model_selection/__init__.py +19 -0
daal4py/sklearn/model_selection/_split.py +309 -0
daal4py/sklearn/model_selection/tests/test_split.py +56 -0
daal4py/sklearn/monkeypatch/__init__.py +0 -0
daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
daal4py/sklearn/neighbors/__init__.py +21 -0
daal4py/sklearn/neighbors/_base.py +503 -0
daal4py/sklearn/neighbors/_classification.py +139 -0
daal4py/sklearn/neighbors/_regression.py +74 -0
daal4py/sklearn/neighbors/_unsupervised.py +55 -0
daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
daal4py/sklearn/svm/__init__.py +19 -0
daal4py/sklearn/svm/svm.py +734 -0
daal4py/sklearn/utils/__init__.py +21 -0
daal4py/sklearn/utils/base.py +75 -0
daal4py/sklearn/utils/tests/test_utils.py +51 -0
daal4py/sklearn/utils/validation.py +696 -0
onedal/__init__.py +83 -0
onedal/_config.py +54 -0
onedal/_device_offload.py +204 -0
onedal/_onedal_py_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
onedal/_onedal_py_host.cpython-313-x86_64-linux-gnu.so +0 -0
onedal/_onedal_py_spmd_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
onedal/basic_statistics/__init__.py +20 -0
onedal/basic_statistics/basic_statistics.py +107 -0
onedal/basic_statistics/incremental_basic_statistics.py +175 -0
onedal/basic_statistics/tests/test_basic_statistics.py +242 -0
onedal/basic_statistics/tests/test_incremental_basic_statistics.py +279 -0
onedal/basic_statistics/tests/utils.py +50 -0
onedal/cluster/__init__.py +27 -0
onedal/cluster/dbscan.py +105 -0
onedal/cluster/kmeans.py +557 -0
onedal/cluster/kmeans_init.py +112 -0
onedal/cluster/tests/test_dbscan.py +125 -0
onedal/cluster/tests/test_kmeans.py +88 -0
onedal/cluster/tests/test_kmeans_init.py +93 -0
onedal/common/_base.py +38 -0
onedal/common/_estimator_checks.py +47 -0
onedal/common/_mixin.py +62 -0
onedal/common/_policy.py +55 -0
onedal/common/_spmd_policy.py +30 -0
onedal/common/hyperparameters.py +125 -0
onedal/common/tests/test_policy.py +76 -0
onedal/common/tests/test_sycl.py +128 -0
onedal/covariance/__init__.py +20 -0
onedal/covariance/covariance.py +122 -0
onedal/covariance/incremental_covariance.py +161 -0
onedal/covariance/tests/test_covariance.py +50 -0
onedal/covariance/tests/test_incremental_covariance.py +190 -0
onedal/datatypes/__init__.py +19 -0
onedal/datatypes/_data_conversion.py +121 -0
onedal/datatypes/tests/common.py +126 -0
onedal/datatypes/tests/test_data.py +475 -0
onedal/decomposition/__init__.py +20 -0
onedal/decomposition/incremental_pca.py +214 -0
onedal/decomposition/pca.py +186 -0
onedal/decomposition/tests/test_incremental_pca.py +285 -0
onedal/ensemble/__init__.py +29 -0
onedal/ensemble/forest.py +736 -0
onedal/ensemble/tests/test_random_forest.py +97 -0
onedal/linear_model/__init__.py +27 -0
onedal/linear_model/incremental_linear_model.py +292 -0
onedal/linear_model/linear_model.py +325 -0
onedal/linear_model/logistic_regression.py +247 -0
onedal/linear_model/tests/test_incremental_linear_regression.py +213 -0
onedal/linear_model/tests/test_incremental_ridge_regression.py +171 -0
onedal/linear_model/tests/test_linear_regression.py +259 -0
onedal/linear_model/tests/test_logistic_regression.py +95 -0
onedal/linear_model/tests/test_ridge.py +95 -0
onedal/neighbors/__init__.py +19 -0
onedal/neighbors/neighbors.py +763 -0
onedal/neighbors/tests/test_knn_classification.py +49 -0
onedal/primitives/__init__.py +27 -0
onedal/primitives/get_tree.py +25 -0
onedal/primitives/kernel_functions.py +152 -0
onedal/primitives/tests/test_kernel_functions.py +159 -0
onedal/spmd/__init__.py +25 -0
onedal/spmd/_base.py +30 -0
onedal/spmd/basic_statistics/__init__.py +20 -0
onedal/spmd/basic_statistics/basic_statistics.py +30 -0
onedal/spmd/basic_statistics/incremental_basic_statistics.py +71 -0
onedal/spmd/cluster/__init__.py +28 -0
onedal/spmd/cluster/dbscan.py +23 -0
onedal/spmd/cluster/kmeans.py +56 -0
onedal/spmd/covariance/__init__.py +20 -0
onedal/spmd/covariance/covariance.py +26 -0
onedal/spmd/covariance/incremental_covariance.py +83 -0
onedal/spmd/decomposition/__init__.py +20 -0
onedal/spmd/decomposition/incremental_pca.py +124 -0
onedal/spmd/decomposition/pca.py +26 -0
onedal/spmd/ensemble/__init__.py +19 -0
onedal/spmd/ensemble/forest.py +28 -0
onedal/spmd/linear_model/__init__.py +21 -0
onedal/spmd/linear_model/incremental_linear_model.py +101 -0
onedal/spmd/linear_model/linear_model.py +30 -0
onedal/spmd/linear_model/logistic_regression.py +38 -0
onedal/spmd/neighbors/__init__.py +19 -0
onedal/spmd/neighbors/neighbors.py +75 -0
onedal/svm/__init__.py +19 -0
onedal/svm/svm.py +556 -0
onedal/svm/tests/test_csr_svm.py +351 -0
onedal/svm/tests/test_nusvc.py +204 -0
onedal/svm/tests/test_nusvr.py +210 -0
onedal/svm/tests/test_svc.py +176 -0
onedal/svm/tests/test_svr.py +243 -0
onedal/tests/test_common.py +57 -0
onedal/tests/utils/_dataframes_support.py +162 -0
onedal/tests/utils/_device_selection.py +102 -0
onedal/utils/__init__.py +49 -0
onedal/utils/_array_api.py +81 -0
onedal/utils/_dpep_helpers.py +56 -0
onedal/utils/tests/test_validation.py +142 -0
onedal/utils/validation.py +464 -0
scikit_learn_intelex-2025.4.0.dist-info/LICENSE.txt +202 -0
scikit_learn_intelex-2025.4.0.dist-info/METADATA +190 -0
scikit_learn_intelex-2025.4.0.dist-info/RECORD +282 -0
scikit_learn_intelex-2025.4.0.dist-info/WHEEL +5 -0
scikit_learn_intelex-2025.4.0.dist-info/top_level.txt +3 -0
sklearnex/__init__.py +66 -0
sklearnex/__main__.py +58 -0
sklearnex/_config.py +116 -0
sklearnex/_device_offload.py +126 -0
sklearnex/_utils.py +177 -0
sklearnex/basic_statistics/__init__.py +20 -0
sklearnex/basic_statistics/basic_statistics.py +261 -0
sklearnex/basic_statistics/incremental_basic_statistics.py +352 -0
sklearnex/basic_statistics/tests/test_basic_statistics.py +405 -0
sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +455 -0
sklearnex/cluster/__init__.py +20 -0
sklearnex/cluster/dbscan.py +197 -0
sklearnex/cluster/k_means.py +397 -0
sklearnex/cluster/tests/test_dbscan.py +38 -0
sklearnex/cluster/tests/test_kmeans.py +157 -0
sklearnex/conftest.py +82 -0
sklearnex/covariance/__init__.py +19 -0
sklearnex/covariance/incremental_covariance.py +405 -0
sklearnex/covariance/tests/test_incremental_covariance.py +287 -0
sklearnex/decomposition/__init__.py +19 -0
sklearnex/decomposition/pca.py +427 -0
sklearnex/decomposition/tests/test_pca.py +58 -0
sklearnex/dispatcher.py +534 -0
sklearnex/doc/third-party-programs.txt +424 -0
sklearnex/ensemble/__init__.py +29 -0
sklearnex/ensemble/_forest.py +2029 -0
sklearnex/ensemble/tests/test_forest.py +140 -0
sklearnex/glob/__main__.py +72 -0
sklearnex/glob/dispatcher.py +101 -0
sklearnex/linear_model/__init__.py +32 -0
sklearnex/linear_model/coordinate_descent.py +30 -0
sklearnex/linear_model/incremental_linear.py +495 -0
sklearnex/linear_model/incremental_ridge.py +432 -0
sklearnex/linear_model/linear.py +346 -0
sklearnex/linear_model/logistic_regression.py +415 -0
sklearnex/linear_model/ridge.py +390 -0
sklearnex/linear_model/tests/test_incremental_linear.py +267 -0
sklearnex/linear_model/tests/test_incremental_ridge.py +214 -0
sklearnex/linear_model/tests/test_linear.py +142 -0
sklearnex/linear_model/tests/test_logreg.py +134 -0
sklearnex/linear_model/tests/test_ridge.py +256 -0
sklearnex/manifold/__init__.py +19 -0
sklearnex/manifold/t_sne.py +26 -0
sklearnex/manifold/tests/test_tsne.py +250 -0
sklearnex/metrics/__init__.py +23 -0
sklearnex/metrics/pairwise.py +22 -0
sklearnex/metrics/ranking.py +20 -0
sklearnex/metrics/tests/test_metrics.py +39 -0
sklearnex/model_selection/__init__.py +21 -0
sklearnex/model_selection/split.py +22 -0
sklearnex/model_selection/tests/test_model_selection.py +34 -0
sklearnex/neighbors/__init__.py +27 -0
sklearnex/neighbors/_lof.py +236 -0
sklearnex/neighbors/common.py +310 -0
sklearnex/neighbors/knn_classification.py +231 -0
sklearnex/neighbors/knn_regression.py +207 -0
sklearnex/neighbors/knn_unsupervised.py +178 -0
sklearnex/neighbors/tests/test_neighbors.py +82 -0
sklearnex/preview/__init__.py +17 -0
sklearnex/preview/covariance/__init__.py +19 -0
sklearnex/preview/covariance/covariance.py +142 -0
sklearnex/preview/covariance/tests/test_covariance.py +66 -0
sklearnex/preview/decomposition/__init__.py +19 -0
sklearnex/preview/decomposition/incremental_pca.py +244 -0
sklearnex/preview/decomposition/tests/test_incremental_pca.py +336 -0
sklearnex/spmd/__init__.py +25 -0
sklearnex/spmd/basic_statistics/__init__.py +20 -0
sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +306 -0
sklearnex/spmd/cluster/__init__.py +30 -0
sklearnex/spmd/cluster/dbscan.py +50 -0
sklearnex/spmd/cluster/kmeans.py +21 -0
sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +173 -0
sklearnex/spmd/covariance/__init__.py +20 -0
sklearnex/spmd/covariance/covariance.py +21 -0
sklearnex/spmd/covariance/incremental_covariance.py +37 -0
sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
sklearnex/spmd/decomposition/__init__.py +20 -0
sklearnex/spmd/decomposition/incremental_pca.py +30 -0
sklearnex/spmd/decomposition/pca.py +21 -0
sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
sklearnex/spmd/ensemble/__init__.py +19 -0
sklearnex/spmd/ensemble/forest.py +71 -0
sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
sklearnex/spmd/linear_model/__init__.py +21 -0
sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
sklearnex/spmd/linear_model/linear_model.py +21 -0
sklearnex/spmd/linear_model/logistic_regression.py +21 -0
sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +331 -0
sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
sklearnex/spmd/neighbors/__init__.py +19 -0
sklearnex/spmd/neighbors/neighbors.py +25 -0
sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
sklearnex/svm/__init__.py +29 -0
sklearnex/svm/_common.py +339 -0
sklearnex/svm/nusvc.py +371 -0
sklearnex/svm/nusvr.py +170 -0
sklearnex/svm/svc.py +399 -0
sklearnex/svm/svr.py +167 -0
sklearnex/svm/tests/test_svm.py +93 -0
sklearnex/tests/test_common.py +491 -0
sklearnex/tests/test_config.py +123 -0
sklearnex/tests/test_hyperparameters.py +43 -0
sklearnex/tests/test_memory_usage.py +347 -0
sklearnex/tests/test_monkeypatch.py +269 -0
sklearnex/tests/test_n_jobs_support.py +108 -0
sklearnex/tests/test_parallel.py +48 -0
sklearnex/tests/test_patching.py +377 -0
sklearnex/tests/test_run_to_run_stability.py +326 -0
sklearnex/tests/utils/__init__.py +48 -0
sklearnex/tests/utils/base.py +436 -0
sklearnex/tests/utils/spmd.py +198 -0
sklearnex/utils/__init__.py +19 -0
sklearnex/utils/_array_api.py +82 -0
sklearnex/utils/parallel.py +59 -0
sklearnex/utils/tests/test_validation.py +238 -0
sklearnex/utils/validation.py +208 -0

sklearnex/utils/_array_api.py ADDED Viewed

@@ -0,0 +1,82 @@
+# ==============================================================================
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tools to support array_api."""
+import numpy as np
+from daal4py.sklearn._utils import sklearn_check_version
+from onedal.utils._array_api import _get_sycl_namespace
+if sklearn_check_version("1.2"):
+    from sklearn.utils._array_api import get_namespace as sklearn_get_namespace
+def get_namespace(*arrays):
+    """Get namespace of arrays.
+    Introspect `arrays` arguments and return their common Array API
+    compatible namespace object, if any. NumPy 1.22 and later can
+    construct such containers using the `numpy.array_api` namespace
+    for instance.
+    This function will return the namespace of SYCL-related arrays
+    which define the __sycl_usm_array_interface__ attribute
+    regardless of array_api support, the configuration of
+    array_api_dispatch, or scikit-learn version.
+    See: https://numpy.org/neps/nep-0047-array-api-standard.html
+    If `arrays` are regular numpy arrays, an instance of the
+    `_NumPyApiWrapper` compatibility wrapper is returned instead.
+    Namespace support is not enabled by default. To enabled it
+    call:
+      sklearn.set_config(array_api_dispatch=True)
+    or:
+      with sklearn.config_context(array_api_dispatch=True):
+          # your code here
+    Otherwise an instance of the `_NumPyApiWrapper`
+    compatibility wrapper is always returned irrespective of
+    the fact that arrays implement the `__array_namespace__`
+    protocol or not.
+    Parameters
+    ----------
+    *arrays : array objects
+        Array objects.
+    Returns
+    -------
+    namespace : module
+        Namespace shared by array objects.
+    is_array_api : bool
+        True of the arrays are containers that implement the Array API spec.
+    """
+    sycl_type, xp, is_array_api_compliant = _get_sycl_namespace(*arrays)
+    if sycl_type:
+        return xp, is_array_api_compliant
+    elif sklearn_check_version("1.2"):
+        return sklearn_get_namespace(*arrays)
+    else:
+        return np, False

sklearnex/utils/parallel.py ADDED Viewed

@@ -0,0 +1,59 @@
+# ===============================================================================
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+import warnings
+from functools import update_wrapper
+from .._config import config_context, get_config
+class _FuncWrapper:
+    """Load the global configuration before calling the function."""
+    def __init__(self, function):
+        self.function = function
+        update_wrapper(self, self.function)
+    def with_config(self, config):
+        self.config = config
+        return self
+    def __call__(self, *args, **kwargs):
+        config = getattr(self, "config", None)
+        if config is None:
+            warnings.warn(
+                "`sklearn.utils.parallel.delayed` should be used with "
+                "`sklearn.utils.parallel.Parallel` to make it possible to propagate "
+                "the scikit-learn configuration of the current thread to the "
+                "joblib workers.",
+                UserWarning,
+            )
+            config = {}
+        with config_context(**config):
+            return self.function(*args, **kwargs)
+class _FuncWrapperOld:
+    """Load the global configuration before calling the function."""
+    def __init__(self, function):
+        self.function = function
+        self.config = get_config()
+        update_wrapper(self, self.function)
+    def __call__(self, *args, **kwargs):
+        with config_context(**self.config):
+            return self.function(*args, **kwargs)

sklearnex/utils/tests/test_validation.py ADDED Viewed

@@ -0,0 +1,238 @@
+# ==============================================================================
+# Copyright contributors to the oneDAL project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+import numpy.random as rand
+import pytest
+from daal4py.sklearn._utils import sklearn_check_version
+from onedal.tests.utils._dataframes_support import (
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
+from sklearnex import config_context
+from sklearnex.tests.utils import DummyEstimator, gen_dataset
+from sklearnex.utils.validation import _check_sample_weight, validate_data
+# array_api support starts in sklearn 1.2, and array_api_strict conformance starts in sklearn 1.3
+_dataframes_supported = (
+    "numpy,pandas"
+    + (",dpctl" if sklearn_check_version("1.2") else "")
+    + (",array_api" if sklearn_check_version("1.3") else "")
+)
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize(
+    "shape",
+    [
+        [16, 2048],
+        [2**16 + 3],
+        [1000, 1000],
+    ],
+)
+@pytest.mark.parametrize("ensure_all_finite", ["allow-nan", True])
+def test_sum_infinite_actually_finite(dtype, shape, ensure_all_finite):
+    est = DummyEstimator()
+    X = np.empty(shape, dtype=dtype)
+    X.fill(np.finfo(dtype).max)
+    X = np.atleast_2d(X)
+    X_array = validate_data(est, X, ensure_all_finite=ensure_all_finite)
+    assert type(X_array) == type(X)
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize(
+    "shape",
+    [
+        [16, 2048],
+        [2**16 + 3],
+        [1000, 1000],
+    ],
+)
+@pytest.mark.parametrize("ensure_all_finite", ["allow-nan", True])
+@pytest.mark.parametrize("check", ["inf", "NaN", None])
+@pytest.mark.parametrize("seed", [0, 123456])
+@pytest.mark.parametrize(
+    "dataframe, queue",
+    get_dataframes_and_queues(_dataframes_supported),
+)
+def test_validate_data_random_location(
+    dataframe, queue, dtype, shape, ensure_all_finite, check, seed
+):
+    est = DummyEstimator()
+    rand.seed(seed)
+    X = rand.uniform(high=np.finfo(dtype).max, size=shape).astype(dtype)
+    if check:
+        loc = rand.randint(0, X.size - 1)
+        X.reshape((-1,))[loc] = float(check)
+    # column heavy pandas inputs are very slow in sklearn's check_array even without
+    # the finite check, just transpose inputs to guarantee fast processing in tests
+    X = _convert_to_dataframe(
+        np.atleast_2d(X).T,
+        target_df=dataframe,
+        sycl_queue=queue,
+    )
+    dispatch = {}
+    if sklearn_check_version("1.2") and dataframe != "pandas":
+        dispatch["array_api_dispatch"] = True
+    with config_context(**dispatch):
+        allow_nan = ensure_all_finite == "allow-nan"
+        if check is None or (allow_nan and check == "NaN"):
+            validate_data(est, X, ensure_all_finite=ensure_all_finite)
+        else:
+            type_err = "infinity" if allow_nan else "[NaN|infinity]"
+            msg_err = f"Input X contains {type_err}"
+            with pytest.raises(ValueError, match=msg_err):
+                validate_data(est, X, ensure_all_finite=ensure_all_finite)
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("ensure_all_finite", ["allow-nan", True])
+@pytest.mark.parametrize("check", ["inf", "NaN", None])
+@pytest.mark.parametrize("seed", [0, 123456])
+@pytest.mark.parametrize(
+    "dataframe, queue",
+    get_dataframes_and_queues(_dataframes_supported),
+)
+def test_validate_data_random_shape_and_location(
+    dataframe, queue, dtype, ensure_all_finite, check, seed
+):
+    est = DummyEstimator()
+    lb, ub = 32768, 1048576  # lb is a patching condition, ub 2^20
+    rand.seed(seed)
+    X = rand.uniform(high=np.finfo(dtype).max, size=rand.randint(lb, ub)).astype(dtype)
+    if check:
+        loc = rand.randint(0, X.size - 1)
+        X[loc] = float(check)
+    X = _convert_to_dataframe(
+        np.atleast_2d(X).T,
+        target_df=dataframe,
+        sycl_queue=queue,
+    )
+    dispatch = {}
+    if sklearn_check_version("1.2") and dataframe != "pandas":
+        dispatch["array_api_dispatch"] = True
+    with config_context(**dispatch):
+        allow_nan = ensure_all_finite == "allow-nan"
+        if check is None or (allow_nan and check == "NaN"):
+            validate_data(est, X, ensure_all_finite=ensure_all_finite)
+        else:
+            type_err = "infinity" if allow_nan else "[NaN|infinity]"
+            msg_err = f"Input X contains {type_err}."
+            with pytest.raises(ValueError, match=msg_err):
+                validate_data(est, X, ensure_all_finite=ensure_all_finite)
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("check", ["inf", "NaN", None])
+@pytest.mark.parametrize("seed", [0, 123456])
+@pytest.mark.parametrize(
+    "dataframe, queue",
+    get_dataframes_and_queues(_dataframes_supported),
+)
+def test__check_sample_weight_random_shape_and_location(
+    dataframe, queue, dtype, check, seed
+):
+    # This testing assumes that array api inputs to validate_data will only occur
+    # with sklearn array_api support which began in sklearn 1.2. This would assume
+    # that somewhere upstream of the validate_data call, a data conversion of dpnp,
+    # dpctl, or array_api inputs to numpy inputs would have occurred.
+    lb, ub = 32768, 1048576  # lb is a patching condition, ub 2^20
+    rand.seed(seed)
+    shape = (rand.randint(lb, ub), 2)
+    X = rand.uniform(high=np.finfo(dtype).max, size=shape).astype(dtype)
+    sample_weight = rand.uniform(high=np.finfo(dtype).max, size=shape[0]).astype(dtype)
+    if check:
+        loc = rand.randint(0, shape[0] - 1)
+        sample_weight[loc] = float(check)
+    X = _convert_to_dataframe(
+        X,
+        target_df=dataframe,
+        sycl_queue=queue,
+    )
+    sample_weight = _convert_to_dataframe(
+        sample_weight,
+        target_df=dataframe,
+        sycl_queue=queue,
+    )
+    dispatch = {}
+    if sklearn_check_version("1.2") and dataframe != "pandas":
+        dispatch["array_api_dispatch"] = True
+    with config_context(**dispatch):
+        if check is None:
+            X_out = _check_sample_weight(sample_weight, X)
+            if dispatch:
+                assert type(X_out) == type(X)
+            else:
+                assert isinstance(X_out, np.ndarray)
+        else:
+            msg_err = "Input sample_weight contains [NaN|infinity]"
+            with pytest.raises(ValueError, match=msg_err):
+                X_out = _check_sample_weight(sample_weight, X)
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize(
+    "dataframe, queue",
+    get_dataframes_and_queues(_dataframes_supported),
+)
+def test_validate_data_output(dtype, dataframe, queue):
+    # This testing assumes that array api inputs to validate_data will only occur
+    # with sklearn array_api support which began in sklearn 1.2. This would assume
+    # that somewhere upstream of the validate_data call, a data conversion of dpnp,
+    # dpctl, or array_api inputs to numpy inputs would have occurred.
+    est = DummyEstimator()
+    X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0]
+    dispatch = {}
+    if sklearn_check_version("1.2") and dataframe != "pandas":
+        dispatch["array_api_dispatch"] = True
+    with config_context(**dispatch):
+        X_out, y_out = validate_data(est, X, y)
+        # check sklearn validate_data operations work underneath
+        X_array = validate_data(est, X, reset=False)
+    for orig, first, second in ((X, X_out, X_array), (y, y_out, None)):
+        if dispatch:
+            assert type(orig) == type(
+                first
+            ), f"validate_data converted {type(orig)} to {type(first)}"
+            if second is not None:
+                assert type(orig) == type(
+                    second
+                ), f"from_array converted {type(orig)} to {type(second)}"
+        else:
+            # array_api_strict from sklearn < 1.2 and pandas will convert to numpy arrays
+            assert isinstance(first, np.ndarray)
+            assert second is None or isinstance(second, np.ndarray)

sklearnex/utils/validation.py ADDED Viewed

@@ -0,0 +1,208 @@
+# ===============================================================================
+# Copyright 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+import numbers
+import scipy.sparse as sp
+from sklearn.utils.validation import _assert_all_finite as _sklearn_assert_all_finite
+from sklearn.utils.validation import _num_samples, check_array, check_non_negative
+from daal4py.sklearn._utils import daal_check_version, sklearn_check_version
+from ._array_api import get_namespace
+if sklearn_check_version("1.6"):
+    from sklearn.utils.validation import validate_data as _sklearn_validate_data
+    _finite_keyword = "ensure_all_finite"
+else:
+    from sklearn.base import BaseEstimator
+    _sklearn_validate_data = BaseEstimator._validate_data
+    _finite_keyword = "force_all_finite"
+if daal_check_version((2024, "P", 700)):
+    from onedal.utils.validation import _assert_all_finite as _onedal_assert_all_finite
+    def _onedal_supported_format(X, xp):
+        # array_api does not have a `strides` or `flags` attribute for testing memory
+        # order. When dlpack support is brought in for oneDAL, general support for
+        # array_api can be enabled and the hasattr check can be removed.
+        # _onedal_supported_format is therefore conservative in verifying attributes and
+        # does not support array_api. This will block onedal_assert_all_finite from being
+        # used for array_api inputs but will allow dpnp ndarrays and dpctl tensors.
+        # only check contiguous arrays to prevent unnecessary copying of data, even if
+        # non-contiguous arrays can now be converted to oneDAL tables.
+        return (
+            X.dtype in [xp.float32, xp.float64]
+            and hasattr(X, "flags")
+            and (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"])
+        )
+else:
+    from daal4py.utils.validation import _assert_all_finite as _onedal_assert_all_finite
+    from onedal.utils._array_api import _is_numpy_namespace
+    def _onedal_supported_format(X, xp):
+        # daal4py _assert_all_finite only supports numpy namespaces, use internally-
+        # defined check to validate inputs, otherwise offload to sklearn
+        return X.dtype in [xp.float32, xp.float64] and _is_numpy_namespace(xp)
+def _sklearnex_assert_all_finite(
+    X,
+    *,
+    allow_nan=False,
+    input_name="",
+):
+    # size check is an initial match to daal4py for performance reasons, can be
+    # optimized later
+    xp, _ = get_namespace(X)
+    if X.size < 32768 or not _onedal_supported_format(X, xp):
+        if sklearn_check_version("1.1"):
+            _sklearn_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name)
+        else:
+            _sklearn_assert_all_finite(X, allow_nan=allow_nan)
+    else:
+        _onedal_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name)
+def assert_all_finite(
+    X,
+    *,
+    allow_nan=False,
+    input_name="",
+):
+    _sklearnex_assert_all_finite(
+        X.data if sp.issparse(X) else X,
+        allow_nan=allow_nan,
+        input_name=input_name,
+    )
+def validate_data(
+    _estimator,
+    /,
+    X="no_validation",
+    y="no_validation",
+    **kwargs,
+):
+    # force finite check to not occur in sklearn, default is True
+    # `ensure_all_finite` is the most up-to-date keyword name in sklearn
+    # _finite_keyword provides backward compatability for `force_all_finite`
+    ensure_all_finite = kwargs.pop("ensure_all_finite", True)
+    kwargs[_finite_keyword] = False
+    out = _sklearn_validate_data(
+        _estimator,
+        X=X,
+        y=y,
+        **kwargs,
+    )
+    check_x = not isinstance(X, str) or X != "no_validation"
+    check_y = not (y is None or isinstance(y, str) and y == "no_validation")
+    if ensure_all_finite:
+        # run local finite check
+        allow_nan = ensure_all_finite == "allow-nan"
+        # the return object from validate_data can be a single
+        # element (either x or y) or both (as a tuple). An iterator along with
+        # check_x and check_y can go through the output properly without
+        # stacking layers of if statements to make sure the proper input_name
+        # is used
+        arg = iter(out if isinstance(out, tuple) else (out,))
+        if check_x:
+            assert_all_finite(next(arg), allow_nan=allow_nan, input_name="X")
+        if check_y:
+            assert_all_finite(next(arg), allow_nan=allow_nan, input_name="y")
+    if check_y and "dtype" in kwargs:
+        # validate_data does not do full dtype conversions, as it uses check_X_y
+        # oneDAL can make tables from [int32, float32, float64], requiring
+        # a dtype check and conversion. This will query the array_namespace and
+        # convert y as necessary. This is important especially for regressors.
+        dtype = kwargs["dtype"]
+        if not isinstance(dtype, (tuple, list)):
+            dtype = tuple(dtype)
+        outx, outy = out if check_x else (None, out)
+        if outy.dtype not in dtype:
+            yp, _ = get_namespace(outy)
+            # use asarray rather than astype because of numpy support
+            outy = yp.asarray(outy, dtype=dtype[0])
+            out = (outx, outy) if check_x else outy
+    return out
+def _check_sample_weight(
+    sample_weight, X, dtype=None, copy=False, ensure_non_negative=False
+):
+    n_samples = _num_samples(X)
+    xp, _ = get_namespace(X)
+    if dtype is not None and dtype not in [xp.float32, xp.float64]:
+        dtype = xp.float64
+    if sample_weight is None:
+        if hasattr(X, "device"):
+            sample_weight = xp.ones(n_samples, dtype=dtype, device=X.device)
+        else:
+            sample_weight = xp.ones(n_samples, dtype=dtype)
+    elif isinstance(sample_weight, numbers.Number):
+        if hasattr(X, "device"):
+            sample_weight = xp.full(
+                n_samples, sample_weight, dtype=dtype, device=X.device
+            )
+        else:
+            sample_weight = xp.full(n_samples, sample_weight, dtype=dtype)
+    else:
+        if dtype is None:
+            dtype = [xp.float64, xp.float32]
+        params = {
+            "accept_sparse": False,
+            "ensure_2d": False,
+            "dtype": dtype,
+            "order": "C",
+            "copy": copy,
+            _finite_keyword: False,
+        }
+        if sklearn_check_version("1.1"):
+            params["input_name"] = "sample_weight"
+        sample_weight = check_array(sample_weight, **params)
+        assert_all_finite(sample_weight, input_name="sample_weight")
+        if sample_weight.ndim != 1:
+            raise ValueError("Sample weights must be 1D array or scalar")
+        if sample_weight.shape != (n_samples,):
+            raise ValueError(
+                "sample_weight.shape == {}, expected {}!".format(
+                    sample_weight.shape, (n_samples,)
+                )
+            )
+    if ensure_non_negative:
+        check_non_negative(sample_weight, "`sample_weight`")
+    return sample_weight