scikit-learn-intelex 2024.7.0__py312-none-win_amd64.whl → 2025.0.1__py312-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/__init__.py +73 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/__main__.py +58 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/_daal4py.cp312-win_amd64.pyd +0 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/doc/third-party-programs.txt +424 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/mb/__init__.py +19 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/mb/model_builders.py +377 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/mpi_transceiver.cp312-win_amd64.pyd +0 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/__init__.py +40 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/_n_jobs_support.py +242 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/_utils.py +241 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/cluster/__init__.py +20 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/cluster/dbscan.py +165 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/cluster/k_means.py +597 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
- {scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/spmd → scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn}/decomposition/__init__.py +2 -2
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/decomposition/_pca.py +524 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/ensemble/AdaBoostClassifier.py +192 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/ensemble/GBTDAAL.py +318 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/ensemble/__init__.py +27 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/ensemble/_forest.py +1397 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/linear_model/__init__.py +29 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/linear_model/_linear.py +272 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/linear_model/_ridge.py +325 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/linear_model/linear.py +17 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/linear_model/logistic_loss.py +195 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/linear_model/logistic_path.py +1026 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/linear_model/ridge.py +17 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/linear_model/tests/test_linear.py +196 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/manifold/__init__.py +19 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/manifold/_t_sne.py +405 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/metrics/__init__.py +20 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/metrics/_pairwise.py +155 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/metrics/_ranking.py +210 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/model_selection/__init__.py +19 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/model_selection/_split.py +309 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/model_selection/tests/test_split.py +56 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/monkeypatch/__init__.py +0 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/monkeypatch/tests/test_patching.py +87 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +118 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/neighbors/__init__.py +21 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/neighbors/_base.py +503 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/neighbors/_classification.py +139 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/neighbors/_regression.py +74 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/neighbors/_unsupervised.py +55 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/svm/__init__.py +19 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/svm/svm.py +734 -0
- {scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/spmd/covariance → scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/utils}/__init__.py +5 -3
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/utils/base.py +75 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/utils/tests/test_utils.py +51 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/daal4py/sklearn/utils/validation.py +693 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/__init__.py +83 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/_config.py +53 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/_device_offload.py +229 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/_onedal_py_dpc.cp312-win_amd64.pyd +0 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/_onedal_py_host.cp312-win_amd64.pyd +0 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/basic_statistics/basic_statistics.py +107 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/basic_statistics/incremental_basic_statistics.py +160 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/cluster/__init__.py +27 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/cluster/dbscan.py +110 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/cluster/kmeans.py +560 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/cluster/kmeans_init.py +115 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/cluster/tests/test_dbscan.py +125 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/cluster/tests/test_kmeans.py +88 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/cluster/tests/test_kmeans_init.py +93 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/common/_base.py +38 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/common/_estimator_checks.py +47 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/common/_mixin.py +62 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/common/_policy.py +59 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/common/_spmd_policy.py +30 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/common/hyperparameters.py +116 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/common/tests/test_policy.py +75 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/covariance/__init__.py +20 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/covariance/covariance.py +125 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/covariance/incremental_covariance.py +146 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/covariance/tests/test_covariance.py +50 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/covariance/tests/test_incremental_covariance.py +122 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/datatypes/__init__.py +19 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/datatypes/_data_conversion.py +95 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/datatypes/tests/test_data.py +235 -0
- {scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/spmd/basic_statistics → scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/decomposition}/__init__.py +3 -2
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/decomposition/incremental_pca.py +204 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/decomposition/pca.py +186 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/decomposition/tests/test_incremental_pca.py +198 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/ensemble/__init__.py +29 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/ensemble/forest.py +720 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/ensemble/tests/test_random_forest.py +97 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/linear_model/__init__.py +27 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/linear_model/incremental_linear_model.py +258 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/linear_model/linear_model.py +329 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/linear_model/logistic_regression.py +249 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/linear_model/tests/test_linear_regression.py +149 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/linear_model/tests/test_logistic_regression.py +95 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/linear_model/tests/test_ridge.py +95 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/neighbors/__init__.py +19 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/neighbors/neighbors.py +778 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/neighbors/tests/test_knn_classification.py +49 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/primitives/__init__.py +27 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/primitives/get_tree.py +25 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/primitives/kernel_functions.py +153 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/primitives/tests/test_kernel_functions.py +159 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/svm/__init__.py +19 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/svm/svm.py +556 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/svm/tests/test_csr_svm.py +351 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/svm/tests/test_nusvc.py +204 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/svm/tests/test_nusvr.py +210 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/svm/tests/test_svc.py +168 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/svm/tests/test_svr.py +243 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/tests/test_common.py +41 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/tests/utils/_dataframes_support.py +168 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/tests/utils/_device_selection.py +107 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/utils/__init__.py +49 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/utils/_array_api.py +91 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal/utils/validation.py +432 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/_device_offload.py +36 -13
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/basic_statistics/__init__.py +20 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/basic_statistics/basic_statistics.py +30 -8
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/basic_statistics/incremental_basic_statistics.py +49 -16
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/cluster/__init__.py +1 -1
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/cluster/k_means.py +383 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/cluster/tests/test_kmeans.py +153 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/covariance/incremental_covariance.py +28 -10
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/covariance/tests/test_incremental_covariance.py +11 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/decomposition/pca.py +1 -1
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/dispatcher.py +19 -9
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/ensemble/_forest.py +1 -1
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/__init__.py +2 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/coordinate_descent.py +7 -7
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/incremental_linear.py +45 -26
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/linear_model/incremental_ridge.py +418 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/ridge.py +4 -4
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/tests/test_incremental_linear.py +13 -10
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/tests/test_logreg.py +5 -4
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/manifold/t_sne.py +3 -3
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/metrics/pairwise.py +2 -2
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/metrics/ranking.py +2 -2
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/model_selection/split.py +4 -2
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/_lof.py +2 -1
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/common.py +1 -1
- {scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/preview/cluster → scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/preview}/__init__.py +1 -3
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/preview/decomposition/incremental_pca.py +8 -8
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/preview/decomposition/tests/test_incremental_pca.py +2 -2
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/preview/linear_model/ridge.py +4 -4
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/spmd/basic_statistics/__init__.py +20 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/spmd/covariance/__init__.py +20 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/spmd/covariance/incremental_covariance.py +37 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
- {scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/preview → scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/spmd/decomposition}/__init__.py +4 -1
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/spmd/decomposition/incremental_pca.py +30 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/__init__.py +2 -1
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
- scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +4 -1
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/svm/nusvc.py +1 -1
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/svm/svc.py +1 -1
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/tests/_utils_spmd.py +18 -5
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_memory_usage.py +2 -1
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_patching.py +0 -1
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_run_to_run_stability.py +12 -11
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/utils/__init__.py +1 -2
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/utils/_namespace.py → scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/utils/_array_api.py +5 -20
- {scikit_learn_intelex-2024.7.0.dist-info → scikit_learn_intelex-2025.0.1.dist-info}/METADATA +3 -2
- scikit_learn_intelex-2025.0.1.dist-info/RECORD +255 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/cluster/k_means.py +0 -25
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/cluster/tests/test_kmeans.py +0 -42
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/preview/cluster/_common.py +0 -84
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/preview/cluster/k_means.py +0 -303
- scikit_learn_intelex-2024.7.0.dist-info/RECORD +0 -122
- {scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex → scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/onedal}/basic_statistics/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/__main__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/_config.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/_utils.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/basic_statistics/tests/test_basic_statistics.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/cluster/dbscan.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/cluster/tests/test_dbscan.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/conftest.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/covariance/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/decomposition/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/decomposition/tests/test_pca.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/doc/third-party-programs.txt +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/ensemble/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/ensemble/tests/test_forest.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/glob/__main__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/glob/dispatcher.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/linear.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/logistic_path.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/logistic_regression.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/tests/test_linear.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/manifold/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/manifold/tests/test_tsne.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/metrics/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/metrics/tests/test_metrics.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/model_selection/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/model_selection/tests/test_model_selection.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/knn_classification.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/knn_regression.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/knn_unsupervised.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/tests/test_neighbors.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/preview/covariance/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/preview/covariance/covariance.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/preview/covariance/tests/test_covariance.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/preview/decomposition/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/preview/linear_model/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/preview/linear_model/tests/test_ridge.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/basic_statistics/basic_statistics.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/cluster/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/cluster/dbscan.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/cluster/kmeans.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/covariance/covariance.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/covariance/tests/test_covariance_spmd.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/decomposition/pca.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/decomposition/tests/test_pca_spmd.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/ensemble/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/ensemble/forest.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/ensemble/tests/test_forest_spmd.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/linear_model.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/logistic_regression.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/neighbors/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/neighbors/neighbors.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/svm/__init__.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/svm/_common.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/svm/nusvr.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/svm/svr.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/svm/tests/test_svm.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/tests/_utils.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_common.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_config.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_monkeypatch.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_n_jobs_support.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_parallel.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/utils/parallel.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/utils/tests/test_finite.py +0 -0
- {scikit_learn_intelex-2024.7.0.data → scikit_learn_intelex-2025.0.1.data}/data/Lib/site-packages/sklearnex/utils/validation.py +0 -0
- {scikit_learn_intelex-2024.7.0.dist-info → scikit_learn_intelex-2025.0.1.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2024.7.0.dist-info → scikit_learn_intelex-2025.0.1.dist-info}/WHEEL +0 -0
- {scikit_learn_intelex-2024.7.0.dist-info → scikit_learn_intelex-2025.0.1.dist-info}/top_level.txt +0 -0
|
@@ -37,8 +37,10 @@ import numbers
|
|
|
37
37
|
@control_n_jobs(decorated_methods=["partial_fit", "_onedal_finalize_fit"])
|
|
38
38
|
class IncrementalBasicStatistics(BaseEstimator):
|
|
39
39
|
"""
|
|
40
|
-
|
|
41
|
-
|
|
40
|
+
Calculates basic statistics on the given data, allows for computation when the data are split into
|
|
41
|
+
batches. The user can use ``partial_fit`` method to provide a single batch of data or use the ``fit`` method to provide
|
|
42
|
+
the entire dataset.
|
|
43
|
+
|
|
42
44
|
Parameters
|
|
43
45
|
----------
|
|
44
46
|
result_options: string or list, default='all'
|
|
@@ -47,10 +49,9 @@ class IncrementalBasicStatistics(BaseEstimator):
|
|
|
47
49
|
batch_size : int, default=None
|
|
48
50
|
The number of samples to use for each batch. Only used when calling
|
|
49
51
|
``fit``. If ``batch_size`` is ``None``, then ``batch_size``
|
|
50
|
-
is inferred from the data and set to ``5 * n_features
|
|
51
|
-
balance between approximation accuracy and memory consumption.
|
|
52
|
+
is inferred from the data and set to ``5 * n_features``.
|
|
52
53
|
|
|
53
|
-
Attributes
|
|
54
|
+
Attributes
|
|
54
55
|
----------
|
|
55
56
|
min : ndarray of shape (n_features,)
|
|
56
57
|
Minimum of each feature over all samples.
|
|
@@ -81,6 +82,38 @@ class IncrementalBasicStatistics(BaseEstimator):
|
|
|
81
82
|
|
|
82
83
|
second_order_raw_moment : ndarray of shape (n_features,)
|
|
83
84
|
Second order moment of each feature over all samples.
|
|
85
|
+
|
|
86
|
+
n_samples_seen_ : int
|
|
87
|
+
The number of samples processed by the estimator. Will be reset on
|
|
88
|
+
new calls to ``fit``, but increments across ``partial_fit`` calls.
|
|
89
|
+
|
|
90
|
+
batch_size_ : int
|
|
91
|
+
Inferred batch size from ``batch_size``.
|
|
92
|
+
|
|
93
|
+
n_features_in_ : int
|
|
94
|
+
Number of features seen during ``fit`` or ``partial_fit``.
|
|
95
|
+
|
|
96
|
+
Note
|
|
97
|
+
----
|
|
98
|
+
Attribute exists only if corresponding result option has been provided.
|
|
99
|
+
|
|
100
|
+
Examples
|
|
101
|
+
--------
|
|
102
|
+
>>> import numpy as np
|
|
103
|
+
>>> from sklearnex.basic_statistics import IncrementalBasicStatistics
|
|
104
|
+
>>> incbs = IncrementalBasicStatistics(batch_size=1)
|
|
105
|
+
>>> X = np.array([[1, 2], [3, 4]])
|
|
106
|
+
>>> incbs.partial_fit(X[:1])
|
|
107
|
+
>>> incbs.partial_fit(X[1:])
|
|
108
|
+
>>> incbs.sum_
|
|
109
|
+
np.array([4., 6.])
|
|
110
|
+
>>> incbs.min_
|
|
111
|
+
np.array([1., 2.])
|
|
112
|
+
>>> incbs.fit(X)
|
|
113
|
+
>>> incbs.sum_
|
|
114
|
+
np.array([4., 6.])
|
|
115
|
+
>>> incbs.max_
|
|
116
|
+
np.array([3., 4.])
|
|
84
117
|
"""
|
|
85
118
|
|
|
86
119
|
_onedal_incremental_basic_statistics = staticmethod(onedal_IncrementalBasicStatistics)
|
|
@@ -120,7 +153,7 @@ class IncrementalBasicStatistics(BaseEstimator):
|
|
|
120
153
|
|
|
121
154
|
def _onedal_supported(self, method_name, *data):
|
|
122
155
|
patching_status = PatchingConditionsChain(
|
|
123
|
-
f"sklearn.
|
|
156
|
+
f"sklearn.basic_statistics.{self.__class__.__name__}.{method_name}"
|
|
124
157
|
)
|
|
125
158
|
return patching_status
|
|
126
159
|
|
|
@@ -135,9 +168,9 @@ class IncrementalBasicStatistics(BaseEstimator):
|
|
|
135
168
|
assert isinstance(onedal_options, str)
|
|
136
169
|
return options
|
|
137
170
|
|
|
138
|
-
def _onedal_finalize_fit(self):
|
|
171
|
+
def _onedal_finalize_fit(self, queue=None):
|
|
139
172
|
assert hasattr(self, "_onedal_estimator")
|
|
140
|
-
self._onedal_estimator.finalize_fit()
|
|
173
|
+
self._onedal_estimator.finalize_fit(queue=queue)
|
|
141
174
|
self._need_to_finalize = False
|
|
142
175
|
|
|
143
176
|
def _onedal_partial_fit(self, X, sample_weight=None, queue=None):
|
|
@@ -171,7 +204,7 @@ class IncrementalBasicStatistics(BaseEstimator):
|
|
|
171
204
|
self._onedal_estimator = self._onedal_incremental_basic_statistics(
|
|
172
205
|
**onedal_params
|
|
173
206
|
)
|
|
174
|
-
self._onedal_estimator.partial_fit(X, sample_weight, queue)
|
|
207
|
+
self._onedal_estimator.partial_fit(X, weights=sample_weight, queue=queue)
|
|
175
208
|
self._need_to_finalize = True
|
|
176
209
|
|
|
177
210
|
def _onedal_fit(self, X, sample_weight=None, queue=None):
|
|
@@ -203,7 +236,7 @@ class IncrementalBasicStatistics(BaseEstimator):
|
|
|
203
236
|
|
|
204
237
|
self.n_features_in_ = X.shape[1]
|
|
205
238
|
|
|
206
|
-
self._onedal_finalize_fit()
|
|
239
|
+
self._onedal_finalize_fit(queue=queue)
|
|
207
240
|
|
|
208
241
|
return self
|
|
209
242
|
|
|
@@ -229,14 +262,14 @@ class IncrementalBasicStatistics(BaseEstimator):
|
|
|
229
262
|
Parameters
|
|
230
263
|
----------
|
|
231
264
|
X : array-like of shape (n_samples, n_features)
|
|
232
|
-
Data for compute, where
|
|
233
|
-
|
|
265
|
+
Data for compute, where ``n_samples`` is the number of samples and
|
|
266
|
+
``n_features`` is the number of features.
|
|
234
267
|
|
|
235
268
|
y : Ignored
|
|
236
269
|
Not used, present for API consistency by convention.
|
|
237
270
|
|
|
238
271
|
sample_weight : array-like of shape (n_samples,), default=None
|
|
239
|
-
Weights for compute weighted statistics, where
|
|
272
|
+
Weights for compute weighted statistics, where ``n_samples`` is the number of samples.
|
|
240
273
|
|
|
241
274
|
Returns
|
|
242
275
|
-------
|
|
@@ -261,14 +294,14 @@ class IncrementalBasicStatistics(BaseEstimator):
|
|
|
261
294
|
Parameters
|
|
262
295
|
----------
|
|
263
296
|
X : array-like of shape (n_samples, n_features)
|
|
264
|
-
Data for compute, where
|
|
265
|
-
|
|
297
|
+
Data for compute, where ``n_samples`` is the number of samples and
|
|
298
|
+
``n_features`` is the number of features.
|
|
266
299
|
|
|
267
300
|
y : Ignored
|
|
268
301
|
Not used, present for API consistency by convention.
|
|
269
302
|
|
|
270
303
|
sample_weight : array-like of shape (n_samples,), default=None
|
|
271
|
-
Weights for compute weighted statistics, where
|
|
304
|
+
Weights for compute weighted statistics, where ``n_samples`` is the number of samples.
|
|
272
305
|
|
|
273
306
|
Returns
|
|
274
307
|
-------
|
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
# ==============================================================================
|
|
2
|
+
# Copyright 2021 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ==============================================================================
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
|
|
19
|
+
from daal4py.sklearn._utils import daal_check_version
|
|
20
|
+
|
|
21
|
+
if daal_check_version((2023, "P", 200)):
|
|
22
|
+
|
|
23
|
+
import numbers
|
|
24
|
+
import warnings
|
|
25
|
+
|
|
26
|
+
import numpy as np
|
|
27
|
+
from scipy.sparse import issparse
|
|
28
|
+
from sklearn.cluster import KMeans as sklearn_KMeans
|
|
29
|
+
from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
|
|
30
|
+
from sklearn.utils.validation import (
|
|
31
|
+
_check_sample_weight,
|
|
32
|
+
_num_samples,
|
|
33
|
+
check_is_fitted,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
from daal4py.sklearn._n_jobs_support import control_n_jobs
|
|
37
|
+
from daal4py.sklearn._utils import sklearn_check_version
|
|
38
|
+
from onedal.cluster import KMeans as onedal_KMeans
|
|
39
|
+
from onedal.utils import _is_csr
|
|
40
|
+
|
|
41
|
+
from .._device_offload import dispatch, wrap_output_data
|
|
42
|
+
from .._utils import PatchingConditionsChain
|
|
43
|
+
|
|
44
|
+
@control_n_jobs(decorated_methods=["fit", "predict", "transform", "fit_transform"])
|
|
45
|
+
class KMeans(sklearn_KMeans):
|
|
46
|
+
__doc__ = sklearn_KMeans.__doc__
|
|
47
|
+
|
|
48
|
+
if sklearn_check_version("1.2"):
|
|
49
|
+
_parameter_constraints: dict = {**sklearn_KMeans._parameter_constraints}
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
n_clusters=8,
|
|
54
|
+
*,
|
|
55
|
+
init="k-means++",
|
|
56
|
+
n_init=(
|
|
57
|
+
"auto"
|
|
58
|
+
if sklearn_check_version("1.4")
|
|
59
|
+
else "warn" if sklearn_check_version("1.2") else 10
|
|
60
|
+
),
|
|
61
|
+
max_iter=300,
|
|
62
|
+
tol=1e-4,
|
|
63
|
+
verbose=0,
|
|
64
|
+
random_state=None,
|
|
65
|
+
copy_x=True,
|
|
66
|
+
algorithm="lloyd" if sklearn_check_version("1.1") else "auto",
|
|
67
|
+
):
|
|
68
|
+
super().__init__(
|
|
69
|
+
n_clusters=n_clusters,
|
|
70
|
+
init=init,
|
|
71
|
+
max_iter=max_iter,
|
|
72
|
+
tol=tol,
|
|
73
|
+
n_init=n_init,
|
|
74
|
+
verbose=verbose,
|
|
75
|
+
random_state=random_state,
|
|
76
|
+
copy_x=copy_x,
|
|
77
|
+
algorithm=algorithm,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def _initialize_onedal_estimator(self):
|
|
81
|
+
onedal_params = {
|
|
82
|
+
"n_clusters": self.n_clusters,
|
|
83
|
+
"init": self.init,
|
|
84
|
+
"max_iter": self.max_iter,
|
|
85
|
+
"tol": self.tol,
|
|
86
|
+
"n_init": self.n_init,
|
|
87
|
+
"verbose": self.verbose,
|
|
88
|
+
"random_state": self.random_state,
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
self._onedal_estimator = onedal_KMeans(**onedal_params)
|
|
92
|
+
|
|
93
|
+
def _onedal_fit_supported(self, method_name, X, y=None, sample_weight=None):
|
|
94
|
+
assert method_name == "fit"
|
|
95
|
+
|
|
96
|
+
class_name = self.__class__.__name__
|
|
97
|
+
patching_status = PatchingConditionsChain(f"sklearn.cluster.{class_name}.fit")
|
|
98
|
+
|
|
99
|
+
sample_count = _num_samples(X)
|
|
100
|
+
self._algorithm = self.algorithm
|
|
101
|
+
supported_algs = ["auto", "full", "lloyd", "elkan"]
|
|
102
|
+
if self.algorithm == "elkan":
|
|
103
|
+
logging.getLogger("sklearnex").info(
|
|
104
|
+
"oneDAL does not support 'elkan', using 'lloyd' algorithm instead."
|
|
105
|
+
)
|
|
106
|
+
correct_count = self.n_clusters < sample_count
|
|
107
|
+
|
|
108
|
+
is_data_supported = (
|
|
109
|
+
_is_csr(X) and daal_check_version((2024, "P", 700))
|
|
110
|
+
) or not issparse(X)
|
|
111
|
+
|
|
112
|
+
_acceptable_sample_weights = self._validate_sample_weight(sample_weight, X)
|
|
113
|
+
|
|
114
|
+
patching_status.and_conditions(
|
|
115
|
+
[
|
|
116
|
+
(
|
|
117
|
+
self.algorithm in supported_algs,
|
|
118
|
+
"Only 'lloyd' algorithm is supported, 'elkan' is computed using lloyd",
|
|
119
|
+
),
|
|
120
|
+
(correct_count, "n_clusters is smaller than number of samples"),
|
|
121
|
+
(
|
|
122
|
+
_acceptable_sample_weights,
|
|
123
|
+
"oneDAL doesn't support sample_weight. Accepted options are None, constant, or equal weights.",
|
|
124
|
+
),
|
|
125
|
+
(
|
|
126
|
+
is_data_supported,
|
|
127
|
+
"Supported data formats: Dense, CSR (oneDAL version >= 2024.7.0).",
|
|
128
|
+
),
|
|
129
|
+
]
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return patching_status
|
|
133
|
+
|
|
134
|
+
def fit(self, X, y=None, sample_weight=None):
|
|
135
|
+
if sklearn_check_version("1.2"):
|
|
136
|
+
self._validate_params()
|
|
137
|
+
|
|
138
|
+
dispatch(
|
|
139
|
+
self,
|
|
140
|
+
"fit",
|
|
141
|
+
{
|
|
142
|
+
"onedal": self.__class__._onedal_fit,
|
|
143
|
+
"sklearn": sklearn_KMeans.fit,
|
|
144
|
+
},
|
|
145
|
+
X,
|
|
146
|
+
y,
|
|
147
|
+
sample_weight,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
return self
|
|
151
|
+
|
|
152
|
+
def _onedal_fit(self, X, _, sample_weight, queue=None):
|
|
153
|
+
X = self._validate_data(
|
|
154
|
+
X,
|
|
155
|
+
accept_sparse="csr",
|
|
156
|
+
dtype=[np.float64, np.float32],
|
|
157
|
+
order="C",
|
|
158
|
+
copy=self.copy_x,
|
|
159
|
+
accept_large_sparse=False,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
if sklearn_check_version("1.2"):
|
|
163
|
+
self._check_params_vs_input(X)
|
|
164
|
+
else:
|
|
165
|
+
self._check_params(X)
|
|
166
|
+
|
|
167
|
+
self._n_features_out = self.n_clusters
|
|
168
|
+
|
|
169
|
+
self._initialize_onedal_estimator()
|
|
170
|
+
self._n_threads = _openmp_effective_n_threads()
|
|
171
|
+
self._onedal_estimator.fit(X, queue=queue)
|
|
172
|
+
|
|
173
|
+
self._save_attributes()
|
|
174
|
+
|
|
175
|
+
def _validate_sample_weight(self, sample_weight, X):
|
|
176
|
+
if sample_weight is None:
|
|
177
|
+
return True
|
|
178
|
+
elif isinstance(sample_weight, numbers.Number):
|
|
179
|
+
return True
|
|
180
|
+
else:
|
|
181
|
+
sample_weight = _check_sample_weight(
|
|
182
|
+
sample_weight,
|
|
183
|
+
X,
|
|
184
|
+
dtype=X.dtype if hasattr(X, "dtype") else None,
|
|
185
|
+
)
|
|
186
|
+
if np.all(sample_weight == sample_weight[0]):
|
|
187
|
+
return True
|
|
188
|
+
else:
|
|
189
|
+
return False
|
|
190
|
+
|
|
191
|
+
def _onedal_predict_supported(self, method_name, X, sample_weight=None):
|
|
192
|
+
class_name = self.__class__.__name__
|
|
193
|
+
is_data_supported = (
|
|
194
|
+
_is_csr(X) and daal_check_version((2024, "P", 700))
|
|
195
|
+
) or not issparse(X)
|
|
196
|
+
patching_status = PatchingConditionsChain(
|
|
197
|
+
f"sklearn.cluster.{class_name}.predict"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# algorithm "auto" has been deprecated since 1.1,
|
|
201
|
+
# algorithm "full" has been replaced by "lloyd"
|
|
202
|
+
supported_algs = ["auto", "full", "lloyd", "elkan"]
|
|
203
|
+
if self.algorithm == "elkan":
|
|
204
|
+
logging.getLogger("sklearnex").info(
|
|
205
|
+
"oneDAL does not support 'elkan', using 'lloyd' algorithm instead."
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
_acceptable_sample_weights = True
|
|
209
|
+
if not sklearn_check_version("1.5"):
|
|
210
|
+
_acceptable_sample_weights = self._validate_sample_weight(
|
|
211
|
+
sample_weight, X
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
patching_status.and_conditions(
|
|
215
|
+
[
|
|
216
|
+
(
|
|
217
|
+
self.algorithm in supported_algs,
|
|
218
|
+
"Only 'lloyd' algorithm is supported, 'elkan' is computed using lloyd.",
|
|
219
|
+
),
|
|
220
|
+
(
|
|
221
|
+
is_data_supported,
|
|
222
|
+
"Supported data formats: Dense, CSR (oneDAL version >= 2024.7.0).",
|
|
223
|
+
),
|
|
224
|
+
(
|
|
225
|
+
_acceptable_sample_weights,
|
|
226
|
+
"oneDAL doesn't support sample_weight. Acceptable options are None, constant, or equal weights.",
|
|
227
|
+
),
|
|
228
|
+
]
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
return patching_status
|
|
232
|
+
|
|
233
|
+
if sklearn_check_version("1.5"):
|
|
234
|
+
|
|
235
|
+
@wrap_output_data
|
|
236
|
+
def predict(self, X):
|
|
237
|
+
self._validate_params()
|
|
238
|
+
|
|
239
|
+
return dispatch(
|
|
240
|
+
self,
|
|
241
|
+
"predict",
|
|
242
|
+
{
|
|
243
|
+
"onedal": self.__class__._onedal_predict,
|
|
244
|
+
"sklearn": sklearn_KMeans.predict,
|
|
245
|
+
},
|
|
246
|
+
X,
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
else:
|
|
250
|
+
|
|
251
|
+
@wrap_output_data
|
|
252
|
+
def predict(
|
|
253
|
+
self,
|
|
254
|
+
X,
|
|
255
|
+
sample_weight="deprecated" if sklearn_check_version("1.3") else None,
|
|
256
|
+
):
|
|
257
|
+
if sklearn_check_version("1.2"):
|
|
258
|
+
self._validate_params()
|
|
259
|
+
|
|
260
|
+
return dispatch(
|
|
261
|
+
self,
|
|
262
|
+
"predict",
|
|
263
|
+
{
|
|
264
|
+
"onedal": self.__class__._onedal_predict,
|
|
265
|
+
"sklearn": sklearn_KMeans.predict,
|
|
266
|
+
},
|
|
267
|
+
X,
|
|
268
|
+
sample_weight=sample_weight,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
def _onedal_predict(self, X, sample_weight=None, queue=None):
|
|
272
|
+
check_is_fitted(self)
|
|
273
|
+
|
|
274
|
+
X = self._validate_data(
|
|
275
|
+
X,
|
|
276
|
+
accept_sparse="csr",
|
|
277
|
+
reset=False,
|
|
278
|
+
dtype=[np.float64, np.float32],
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
if not sklearn_check_version("1.5") and sklearn_check_version("1.3"):
|
|
282
|
+
if isinstance(sample_weight, str) and sample_weight == "deprecated":
|
|
283
|
+
sample_weight = None
|
|
284
|
+
|
|
285
|
+
if sample_weight is not None:
|
|
286
|
+
warnings.warn(
|
|
287
|
+
"'sample_weight' was deprecated in version 1.3 and "
|
|
288
|
+
"will be removed in 1.5.",
|
|
289
|
+
FutureWarning,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
if not hasattr(self, "_onedal_estimator"):
|
|
293
|
+
self._initialize_onedal_estimator()
|
|
294
|
+
self._onedal_estimator.cluster_centers_ = self.cluster_centers_
|
|
295
|
+
|
|
296
|
+
return self._onedal_estimator.predict(X, queue=queue)
|
|
297
|
+
|
|
298
|
+
def _onedal_supported(self, method_name, *data):
|
|
299
|
+
if method_name == "fit":
|
|
300
|
+
return self._onedal_fit_supported(method_name, *data)
|
|
301
|
+
if method_name in ["predict", "score"]:
|
|
302
|
+
return self._onedal_predict_supported(method_name, *data)
|
|
303
|
+
raise RuntimeError(
|
|
304
|
+
f"Unknown method {method_name} in {self.__class__.__name__}"
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
_onedal_gpu_supported = _onedal_supported
|
|
308
|
+
_onedal_cpu_supported = _onedal_supported
|
|
309
|
+
|
|
310
|
+
@wrap_output_data
|
|
311
|
+
def fit_transform(self, X, y=None, sample_weight=None):
|
|
312
|
+
return self.fit(X, sample_weight=sample_weight)._transform(X)
|
|
313
|
+
|
|
314
|
+
@wrap_output_data
|
|
315
|
+
def transform(self, X):
|
|
316
|
+
check_is_fitted(self)
|
|
317
|
+
|
|
318
|
+
X = self._check_test_data(X)
|
|
319
|
+
return self._transform(X)
|
|
320
|
+
|
|
321
|
+
@wrap_output_data
|
|
322
|
+
def score(self, X, y=None, sample_weight=None):
|
|
323
|
+
return dispatch(
|
|
324
|
+
self,
|
|
325
|
+
"score",
|
|
326
|
+
{
|
|
327
|
+
"onedal": self.__class__._onedal_score,
|
|
328
|
+
"sklearn": sklearn_KMeans.score,
|
|
329
|
+
},
|
|
330
|
+
X,
|
|
331
|
+
y,
|
|
332
|
+
sample_weight=sample_weight,
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
def _onedal_score(self, X, y, sample_weight=None, queue=None):
|
|
336
|
+
check_is_fitted(self)
|
|
337
|
+
|
|
338
|
+
X = self._validate_data(
|
|
339
|
+
X,
|
|
340
|
+
accept_sparse="csr",
|
|
341
|
+
reset=False,
|
|
342
|
+
dtype=[np.float64, np.float32],
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
if not sklearn_check_version("1.5") and sklearn_check_version("1.3"):
|
|
346
|
+
if isinstance(sample_weight, str) and sample_weight == "deprecated":
|
|
347
|
+
sample_weight = None
|
|
348
|
+
|
|
349
|
+
if sample_weight is not None:
|
|
350
|
+
warnings.warn(
|
|
351
|
+
"'sample_weight' was deprecated in version 1.3 and "
|
|
352
|
+
"will be removed in 1.5.",
|
|
353
|
+
FutureWarning,
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
if not hasattr(self, "_onedal_estimator"):
|
|
357
|
+
self._initialize_onedal_estimator()
|
|
358
|
+
self._onedal_estimator.cluster_centers_ = self.cluster_centers_
|
|
359
|
+
|
|
360
|
+
return self._onedal_estimator.score(X, queue=queue)
|
|
361
|
+
|
|
362
|
+
def _save_attributes(self):
|
|
363
|
+
assert hasattr(self, "_onedal_estimator")
|
|
364
|
+
self.cluster_centers_ = self._onedal_estimator.cluster_centers_
|
|
365
|
+
self.labels_ = self._onedal_estimator.labels_
|
|
366
|
+
self.inertia_ = self._onedal_estimator.inertia_
|
|
367
|
+
self.n_iter_ = self._onedal_estimator.n_iter_
|
|
368
|
+
self.n_features_in_ = self._onedal_estimator.n_features_in_
|
|
369
|
+
|
|
370
|
+
self._n_init = self._onedal_estimator._n_init
|
|
371
|
+
|
|
372
|
+
fit.__doc__ = sklearn_KMeans.fit.__doc__
|
|
373
|
+
predict.__doc__ = sklearn_KMeans.predict.__doc__
|
|
374
|
+
transform.__doc__ = sklearn_KMeans.transform.__doc__
|
|
375
|
+
fit_transform.__doc__ = sklearn_KMeans.fit_transform.__doc__
|
|
376
|
+
score.__doc__ = sklearn_KMeans.score.__doc__
|
|
377
|
+
|
|
378
|
+
else:
|
|
379
|
+
from daal4py.sklearn.cluster import KMeans
|
|
380
|
+
|
|
381
|
+
logging.warning(
|
|
382
|
+
"Sklearnex KMeans requires oneDAL version >= 2023.2, falling back to daal4py."
|
|
383
|
+
)
|
scikit_learn_intelex-2025.0.1.data/data/Lib/site-packages/sklearnex/cluster/tests/test_kmeans.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# ===============================================================================
|
|
2
|
+
# Copyright 2021 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ===============================================================================
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pytest
|
|
19
|
+
from numpy.testing import assert_allclose
|
|
20
|
+
from scipy.sparse import csr_matrix
|
|
21
|
+
from sklearn.datasets import make_blobs
|
|
22
|
+
|
|
23
|
+
from daal4py.sklearn._utils import daal_check_version, sklearn_check_version
|
|
24
|
+
from onedal.tests.utils._dataframes_support import (
|
|
25
|
+
_as_numpy,
|
|
26
|
+
_convert_to_dataframe,
|
|
27
|
+
get_dataframes_and_queues,
|
|
28
|
+
get_queues,
|
|
29
|
+
)
|
|
30
|
+
from sklearnex import config_context
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def generate_dense_dataset(n_samples, n_features, density, n_clusters):
|
|
34
|
+
np.random.seed(2024 + n_samples + n_features + n_clusters)
|
|
35
|
+
X, _ = make_blobs(
|
|
36
|
+
n_samples=n_samples,
|
|
37
|
+
n_features=n_features,
|
|
38
|
+
centers=n_clusters,
|
|
39
|
+
cluster_std=1.0,
|
|
40
|
+
random_state=42,
|
|
41
|
+
)
|
|
42
|
+
mask = np.random.binomial(1, density, (n_samples, n_features))
|
|
43
|
+
X = X * mask
|
|
44
|
+
return X
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
48
|
+
@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
|
|
49
|
+
@pytest.mark.parametrize("init", ["k-means++", "random"])
|
|
50
|
+
def test_sklearnex_import_for_dense_data(dataframe, queue, algorithm, init):
|
|
51
|
+
if not sklearn_check_version("1.1") and algorithm == "lloyd":
|
|
52
|
+
pytest.skip("lloyd requires sklearn>=1.1.")
|
|
53
|
+
from sklearnex.cluster import KMeans
|
|
54
|
+
|
|
55
|
+
X_dense = generate_dense_dataset(1000, 10, 0.5, 3)
|
|
56
|
+
X_dense_df = _convert_to_dataframe(X_dense, sycl_queue=queue, target_df=dataframe)
|
|
57
|
+
|
|
58
|
+
kmeans_dense = KMeans(
|
|
59
|
+
n_clusters=3, random_state=0, algorithm=algorithm, init=init
|
|
60
|
+
).fit(X_dense_df)
|
|
61
|
+
|
|
62
|
+
if daal_check_version((2023, "P", 200)):
|
|
63
|
+
assert "sklearnex" in kmeans_dense.__module__
|
|
64
|
+
else:
|
|
65
|
+
assert "daal4py" in kmeans_dense.__module__
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@pytest.mark.skipif(
|
|
69
|
+
not daal_check_version((2024, "P", 700)),
|
|
70
|
+
reason="Sparse data requires oneDAL>=2024.7.0",
|
|
71
|
+
)
|
|
72
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
73
|
+
@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
|
|
74
|
+
@pytest.mark.parametrize("init", ["k-means++", "random"])
|
|
75
|
+
def test_sklearnex_import_for_sparse_data(queue, algorithm, init):
|
|
76
|
+
from sklearnex.cluster import KMeans
|
|
77
|
+
|
|
78
|
+
X_dense = generate_dense_dataset(1000, 10, 0.5, 3)
|
|
79
|
+
X_sparse = csr_matrix(X_dense)
|
|
80
|
+
|
|
81
|
+
kmeans_sparse = KMeans(
|
|
82
|
+
n_clusters=3, random_state=0, algorithm=algorithm, init=init
|
|
83
|
+
).fit(X_sparse)
|
|
84
|
+
|
|
85
|
+
assert "sklearnex" in kmeans_sparse.__module__
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
|
|
89
|
+
@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
|
|
90
|
+
def test_results_on_dense_gold_data(dataframe, queue, algorithm):
|
|
91
|
+
if not sklearn_check_version("1.1") and algorithm == "lloyd":
|
|
92
|
+
pytest.skip("lloyd requires sklearn>=1.1.")
|
|
93
|
+
|
|
94
|
+
from sklearnex.cluster import KMeans
|
|
95
|
+
|
|
96
|
+
X_train = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
|
|
97
|
+
X_test = np.array([[0, 0], [12, 3]])
|
|
98
|
+
X_train_df = _convert_to_dataframe(X_train, sycl_queue=queue, target_df=dataframe)
|
|
99
|
+
X_test_df = _convert_to_dataframe(X_test, sycl_queue=queue, target_df=dataframe)
|
|
100
|
+
|
|
101
|
+
kmeans = KMeans(n_clusters=2, random_state=0, algorithm=algorithm).fit(X_train_df)
|
|
102
|
+
|
|
103
|
+
if queue and queue.sycl_device.is_gpu:
|
|
104
|
+
# KMeans Init Dense GPU implementation is different from CPU
|
|
105
|
+
expected_cluster_labels = np.array([0, 1], dtype=np.int32)
|
|
106
|
+
expected_cluster_centers = np.array([[1.0, 2.0], [10.0, 2.0]], dtype=np.float32)
|
|
107
|
+
expected_inertia = 16.0
|
|
108
|
+
else:
|
|
109
|
+
expected_cluster_labels = np.array([1, 0], dtype=np.int32)
|
|
110
|
+
expected_cluster_centers = np.array([[10.0, 2.0], [1.0, 2.0]], dtype=np.float32)
|
|
111
|
+
expected_inertia = 16.0
|
|
112
|
+
|
|
113
|
+
assert_allclose(expected_cluster_labels, _as_numpy(kmeans.predict(X_test_df)))
|
|
114
|
+
assert_allclose(expected_cluster_centers, _as_numpy(kmeans.cluster_centers_))
|
|
115
|
+
assert expected_inertia == kmeans.inertia_
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@pytest.mark.skipif(
|
|
119
|
+
not daal_check_version((2024, "P", 700)),
|
|
120
|
+
reason="Sparse data requires oneDAL>=2024.7.0",
|
|
121
|
+
)
|
|
122
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
123
|
+
@pytest.mark.parametrize("init", ["k-means++", "random", "arraylike"])
|
|
124
|
+
@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
|
|
125
|
+
@pytest.mark.parametrize(
|
|
126
|
+
"dims", [(1000, 10, 0.95, 3), (50000, 100, 0.75, 10), (10000, 10, 0.8, 5)]
|
|
127
|
+
)
|
|
128
|
+
def test_dense_vs_sparse(queue, init, algorithm, dims):
|
|
129
|
+
from sklearnex.cluster import KMeans
|
|
130
|
+
|
|
131
|
+
if init == "random":
|
|
132
|
+
pytest.skip("Random initialization in sparse K-means is buggy.")
|
|
133
|
+
|
|
134
|
+
# For higher level of sparsity (smaller density) the test may fail
|
|
135
|
+
n_samples, n_features, density, n_clusters = dims
|
|
136
|
+
X_dense = generate_dense_dataset(n_samples, n_features, density, n_clusters)
|
|
137
|
+
X_sparse = csr_matrix(X_dense)
|
|
138
|
+
|
|
139
|
+
if init == "arraylike":
|
|
140
|
+
np.random.seed(2024 + n_samples + n_features + n_clusters)
|
|
141
|
+
init = X_dense[np.random.choice(n_samples, size=n_clusters, replace=False)]
|
|
142
|
+
|
|
143
|
+
kmeans_dense = KMeans(
|
|
144
|
+
n_clusters=n_clusters, random_state=0, init=init, algorithm=algorithm
|
|
145
|
+
).fit(X_dense)
|
|
146
|
+
kmeans_sparse = KMeans(
|
|
147
|
+
n_clusters=n_clusters, random_state=0, init=init, algorithm=algorithm
|
|
148
|
+
).fit(X_sparse)
|
|
149
|
+
|
|
150
|
+
assert_allclose(
|
|
151
|
+
kmeans_dense.cluster_centers_,
|
|
152
|
+
kmeans_sparse.cluster_centers_,
|
|
153
|
+
)
|