scikit-learn-intelex 2024.5.0__py311-none-win_amd64.whl → 2024.7.0__py311-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/_config.py +3 -15
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/_device_offload.py +98 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/basic_statistics/basic_statistics.py +143 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +1 -1
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/cluster/dbscan.py +3 -1
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/cluster/k_means.py +8 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/cluster/tests/test_dbscan.py +8 -6
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/cluster/tests/test_kmeans.py +15 -3
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/conftest.py +11 -1
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/covariance/incremental_covariance.py +64 -13
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/covariance/tests/test_incremental_covariance.py +35 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/decomposition/pca.py +25 -1
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/decomposition/tests/test_pca.py +4 -2
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/dispatcher.py +109 -1
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/ensemble/_forest.py +121 -57
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/ensemble/tests/test_forest.py +7 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/glob/dispatcher.py +16 -2
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/linear_model/coordinate_descent.py +13 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/linear_model/incremental_linear.py +102 -25
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/linear_model/linear.py +25 -39
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/linear_model/logistic_regression.py +92 -74
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/linear_model/ridge.py +7 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/linear_model/tests/test_incremental_linear.py +10 -10
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/linear_model/tests/test_linear.py +30 -5
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/linear_model/tests/test_logreg.py +45 -3
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/manifold/t_sne.py +21 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/metrics/pairwise.py +5 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/metrics/ranking.py +3 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/model_selection/split.py +3 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/neighbors/_lof.py +9 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/neighbors/common.py +45 -1
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/neighbors/knn_classification.py +1 -20
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/neighbors/knn_regression.py +25 -20
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/neighbors/knn_unsupervised.py +31 -7
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/preview/__init__.py +1 -1
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/preview/decomposition/__init__.py +19 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/preview/decomposition/incremental_pca.py +228 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
- scikit_learn_intelex-2024.5.0.data/data/Lib/site-packages/sklearnex/manifold/t_sne.py → scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/preview/linear_model/__init__.py +19 -17
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/preview/linear_model/ridge.py +419 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +163 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/svm/_common.py +328 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/svm/nusvc.py +40 -4
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/svm/nusvr.py +31 -2
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/svm/svc.py +40 -4
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/svm/svr.py +31 -2
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/svm/tests/test_svm.py +12 -20
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/tests/_utils.py +328 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/tests/_utils_spmd.py +185 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/tests/test_common.py +54 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/tests/test_config.py +4 -0
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/tests/test_memory_usage.py +290 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/tests/test_monkeypatch.py +12 -4
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/tests/test_patching.py +21 -25
- scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/tests/test_run_to_run_stability.py +295 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/utils/_namespace.py +1 -1
- {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/METADATA +5 -2
- scikit_learn_intelex-2024.7.0.dist-info/RECORD +122 -0
- {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/WHEEL +1 -1
- scikit_learn_intelex-2024.5.0.data/data/Lib/site-packages/sklearnex/_device_offload.py +0 -257
- scikit_learn_intelex-2024.5.0.data/data/Lib/site-packages/sklearnex/basic_statistics/basic_statistics.py +0 -17
- scikit_learn_intelex-2024.5.0.data/data/Lib/site-packages/sklearnex/svm/_common.py +0 -185
- scikit_learn_intelex-2024.5.0.data/data/Lib/site-packages/sklearnex/tests/_utils.py +0 -173
- scikit_learn_intelex-2024.5.0.data/data/Lib/site-packages/sklearnex/tests/test_memory_usage.py +0 -231
- scikit_learn_intelex-2024.5.0.data/data/Lib/site-packages/sklearnex/tests/test_run_to_run_stability_tests.py +0 -428
- scikit_learn_intelex-2024.5.0.dist-info/RECORD +0 -104
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/__main__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/_utils.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/basic_statistics/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/basic_statistics/incremental_basic_statistics.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/cluster/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/covariance/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/decomposition/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/doc/third-party-programs.txt +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/ensemble/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/glob/__main__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/linear_model/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/linear_model/logistic_path.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/manifold/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/manifold/tests/test_tsne.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/metrics/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/metrics/tests/test_metrics.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/model_selection/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/model_selection/tests/test_model_selection.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/neighbors/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/neighbors/tests/test_neighbors.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/preview/cluster/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/preview/cluster/_common.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/preview/cluster/k_means.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/preview/covariance/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/preview/covariance/covariance.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/preview/covariance/tests/test_covariance.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/basic_statistics/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/basic_statistics/basic_statistics.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/cluster/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/cluster/dbscan.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/cluster/kmeans.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/covariance/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/covariance/covariance.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/decomposition/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/decomposition/pca.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/ensemble/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/ensemble/forest.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/linear_model.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/logistic_regression.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/neighbors/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/spmd/neighbors/neighbors.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/svm/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/tests/test_n_jobs_support.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/tests/test_parallel.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/utils/__init__.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/utils/parallel.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/utils/tests/test_finite.py +0 -0
- {scikit_learn_intelex-2024.5.0.data → scikit_learn_intelex-2024.7.0.data}/data/Lib/site-packages/sklearnex/utils/validation.py +0 -0
- {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# ==============================================================================
|
|
2
|
+
# Copyright 2024 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ==============================================================================
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
from numpy.testing import assert_allclose
|
|
19
|
+
from sklearn.datasets import make_blobs, make_classification, make_regression
|
|
20
|
+
from sklearn.model_selection import train_test_split
|
|
21
|
+
|
|
22
|
+
from onedal.tests.utils._dataframes_support import _as_numpy
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
import dpctl
|
|
26
|
+
from dpctl import SyclQueue
|
|
27
|
+
from mpi4py import MPI
|
|
28
|
+
|
|
29
|
+
mpi_libs_available = True
|
|
30
|
+
gpu_is_available = dpctl.has_gpu_devices()
|
|
31
|
+
except (ImportError, ModuleNotFoundError):
|
|
32
|
+
mpi_libs_available = False
|
|
33
|
+
|
|
34
|
+
_mpi_libs_and_gpu_available = mpi_libs_available and gpu_is_available
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _get_local_tensor(full_data):
|
|
38
|
+
"""Splits data across ranks.
|
|
39
|
+
|
|
40
|
+
Called on each rank to extract the subset of data assigned to that rank.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
full_data (numpy or dpctl array): The entire set of data
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
local_data (numpy or dpctl array): The subset of data used by the rank
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
# create sycl queue and gather communicator details
|
|
50
|
+
q = SyclQueue("gpu")
|
|
51
|
+
comm = MPI.COMM_WORLD
|
|
52
|
+
rank = comm.Get_rank()
|
|
53
|
+
size = comm.Get_size()
|
|
54
|
+
|
|
55
|
+
# divide data across ranks and move to dpt tensor
|
|
56
|
+
data_rows = full_data.shape[0]
|
|
57
|
+
local_start = rank * data_rows // size
|
|
58
|
+
local_end = (1 + rank) * data_rows // size
|
|
59
|
+
local_data = full_data[local_start:local_end]
|
|
60
|
+
|
|
61
|
+
return local_data
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _generate_regression_data(n_samples, n_features, dtype=np.float64, random_state=42):
|
|
65
|
+
# Generates regression data and divides between train and test
|
|
66
|
+
X, y = make_regression(
|
|
67
|
+
n_samples=n_samples, n_features=n_features, random_state=random_state
|
|
68
|
+
)
|
|
69
|
+
X = X.astype(dtype)
|
|
70
|
+
y = y.astype(dtype)
|
|
71
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
|
|
72
|
+
return X_train, X_test, y_train, y_test
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _generate_classification_data(
|
|
76
|
+
n_samples, n_features, n_classes=2, dtype=np.float64, random_state=42
|
|
77
|
+
):
|
|
78
|
+
# Generates classification data and divides between train and test
|
|
79
|
+
X, y = make_classification(
|
|
80
|
+
n_samples=n_samples,
|
|
81
|
+
n_features=n_features,
|
|
82
|
+
n_classes=n_classes,
|
|
83
|
+
n_informative=int(0.5 * n_classes + 1),
|
|
84
|
+
random_state=random_state,
|
|
85
|
+
)
|
|
86
|
+
X = X.astype(dtype)
|
|
87
|
+
y = y.astype(dtype)
|
|
88
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
|
|
89
|
+
return X_train, X_test, y_train, y_test
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _generate_statistic_data(n_samples, n_features, dtype=np.float64, random_state=42):
|
|
93
|
+
# Generates statistical data
|
|
94
|
+
gen = np.random.default_rng(random_state)
|
|
95
|
+
data = gen.uniform(low=-0.3, high=+0.7, size=(n_samples, n_features)).astype(dtype)
|
|
96
|
+
return data
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _generate_clustering_data(
|
|
100
|
+
n_samples, n_features, centers=None, dtype=np.float64, random_state=42
|
|
101
|
+
):
|
|
102
|
+
# Generates clustering data and divides between train and test
|
|
103
|
+
X, _ = make_blobs(
|
|
104
|
+
n_samples=n_samples,
|
|
105
|
+
centers=centers,
|
|
106
|
+
n_features=n_features,
|
|
107
|
+
random_state=random_state,
|
|
108
|
+
)
|
|
109
|
+
X = X.astype(dtype)
|
|
110
|
+
X_train, X_test = train_test_split(X, random_state=random_state)
|
|
111
|
+
return X_train, X_test
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _spmd_assert_allclose(spmd_result, batch_result, **kwargs):
|
|
115
|
+
"""Calls assert_allclose on spmd and batch results.
|
|
116
|
+
|
|
117
|
+
Called on each rank to compare the spmd result specific to that rank and
|
|
118
|
+
subset of batch result that corresponds to that rank.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
spmd_result (numpy or dpctl array): The result for the subset of data on the rank the function is called from, computed by the spmd estimator
|
|
122
|
+
batch_result (numpy array): The result for all data, computed by the batch estimator
|
|
123
|
+
|
|
124
|
+
Raises:
|
|
125
|
+
AssertionError: If all results are not adequately close.
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
# extract chunk from batch result to match with local spmd result
|
|
129
|
+
local_batch_result = _get_local_tensor(batch_result)
|
|
130
|
+
|
|
131
|
+
assert_allclose(_as_numpy(spmd_result), _as_numpy(local_batch_result), **kwargs)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _assert_unordered_allclose(spmd_result, batch_result, localize=False, **kwargs):
|
|
135
|
+
"""Checks if rows in spmd and batch results are aligned, even if not in the same order.
|
|
136
|
+
|
|
137
|
+
Called to verify correct unordered results are present. Useful to check KMeans centers
|
|
138
|
+
or KNN neighbors, where order does not matter. Sorts inputs to handle unordering. Also
|
|
139
|
+
capable of handling localization.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
spmd_result (numpy or dpctl array): Result computed by the spmd estimator
|
|
143
|
+
batch_result (numpy array): Result computed by batch estimator
|
|
144
|
+
localize (bool): Whether of not spmd result is specific to the rank, in which case batch result needs to be localized
|
|
145
|
+
|
|
146
|
+
Raises:
|
|
147
|
+
AssertionError: If results do not match.
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
sorted_spmd_result = spmd_result[np.argsort(np.linalg.norm(spmd_result, axis=1))]
|
|
151
|
+
if localize:
|
|
152
|
+
local_batch_result = _get_local_tensor(batch_result)
|
|
153
|
+
sorted_batch_result = local_batch_result[
|
|
154
|
+
np.argsort(np.linalg.norm(local_batch_result, axis=1))
|
|
155
|
+
]
|
|
156
|
+
else:
|
|
157
|
+
sorted_batch_result = batch_result[
|
|
158
|
+
np.argsort(np.linalg.norm(batch_result, axis=1))
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
assert_allclose(_as_numpy(sorted_spmd_result), sorted_batch_result, **kwargs)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _assert_kmeans_labels_allclose(
|
|
165
|
+
spmd_labels, batch_labels, spmd_centers, batch_centers, **kwargs
|
|
166
|
+
):
|
|
167
|
+
"""Checks if labels for spmd and batch results are aligned, even cluster indices don't match.
|
|
168
|
+
|
|
169
|
+
Called to verify labels are assigned the same way on spmd and batch. Uses raw labels (which
|
|
170
|
+
may not match) to identify cluster center and ensure results match.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
spmd_labels (numpy or dpctl array): The labels for the subset of data on the rank the function is called from, computed by the spmd estimator
|
|
174
|
+
batch_labels (numpy array): The labels for all data, computed by the batch estimator
|
|
175
|
+
spmd_centers (numpy or dpctl array): Centers computed by the spmd estimator
|
|
176
|
+
batch_centers (numpy array): Centers computed by batch estimator
|
|
177
|
+
|
|
178
|
+
Raises:
|
|
179
|
+
AssertionError: If clusters are not correctly assigned.
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
local_batch_labels = _get_local_tensor(batch_labels)
|
|
183
|
+
assert_allclose(
|
|
184
|
+
spmd_centers[_as_numpy(spmd_labels)], batch_centers[local_batch_labels], **kwargs
|
|
185
|
+
)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# ==============================================================================
|
|
2
|
+
# Copyright 2024 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ==============================================================================
|
|
16
|
+
|
|
17
|
+
import os
|
|
18
|
+
from glob import glob
|
|
19
|
+
|
|
20
|
+
import pytest
|
|
21
|
+
|
|
22
|
+
ALLOWED_LOCATIONS = [
|
|
23
|
+
"_config.py",
|
|
24
|
+
"_device_offload.py",
|
|
25
|
+
"test",
|
|
26
|
+
"svc.py",
|
|
27
|
+
"svm" + os.sep + "_common.py",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_target_offload_ban():
|
|
32
|
+
"""This test blocks the use of target_offload in
|
|
33
|
+
in sklearnex files. Offloading computation to devices
|
|
34
|
+
via target_offload should only occur externally, and not
|
|
35
|
+
within the architecture of the sklearnex classes. This
|
|
36
|
+
is for clarity, traceability and maintainability.
|
|
37
|
+
"""
|
|
38
|
+
from sklearnex import __file__ as loc
|
|
39
|
+
|
|
40
|
+
path = loc.replace("__init__.py", "")
|
|
41
|
+
files = [y for x in os.walk(path) for y in glob(os.path.join(x[0], "*.py"))]
|
|
42
|
+
|
|
43
|
+
output = []
|
|
44
|
+
|
|
45
|
+
for f in files:
|
|
46
|
+
if open(f, "r").read().find("target_offload") != -1:
|
|
47
|
+
output += [f.replace(path, "sklearnex" + os.sep)]
|
|
48
|
+
|
|
49
|
+
# remove this file from the list
|
|
50
|
+
for allowed in ALLOWED_LOCATIONS:
|
|
51
|
+
output = [i for i in output if allowed not in i]
|
|
52
|
+
|
|
53
|
+
output = "\n".join(output)
|
|
54
|
+
assert output == "", f"sklearn versioning is occuring in: \n{output}"
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
import sklearn
|
|
18
18
|
|
|
19
|
+
import onedal
|
|
19
20
|
import sklearnex
|
|
20
21
|
|
|
21
22
|
|
|
@@ -33,7 +34,10 @@ def test_set_config_works():
|
|
|
33
34
|
)
|
|
34
35
|
|
|
35
36
|
config = sklearnex.get_config()
|
|
37
|
+
onedal_config = onedal._config._get_config()
|
|
36
38
|
assert config["target_offload"] == "cpu:0"
|
|
37
39
|
assert config["allow_fallback_to_host"]
|
|
38
40
|
assert config["assume_finite"]
|
|
41
|
+
assert onedal_config["target_offload"] == "cpu:0"
|
|
42
|
+
assert onedal_config["allow_fallback_to_host"]
|
|
39
43
|
sklearnex.set_config(**default_config)
|
scikit_learn_intelex-2024.7.0.data/data/Lib/site-packages/sklearnex/tests/test_memory_usage.py
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
# ==============================================================================
|
|
2
|
+
# Copyright 2021 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ==============================================================================
|
|
16
|
+
|
|
17
|
+
import gc
|
|
18
|
+
import logging
|
|
19
|
+
import os
|
|
20
|
+
import tracemalloc
|
|
21
|
+
import types
|
|
22
|
+
import warnings
|
|
23
|
+
from inspect import isclass
|
|
24
|
+
|
|
25
|
+
import numpy as np
|
|
26
|
+
import pandas as pd
|
|
27
|
+
import pytest
|
|
28
|
+
from scipy.stats import pearsonr
|
|
29
|
+
from sklearn.base import BaseEstimator, clone
|
|
30
|
+
from sklearn.datasets import make_classification
|
|
31
|
+
from sklearn.model_selection import KFold
|
|
32
|
+
|
|
33
|
+
from onedal import _is_dpc_backend
|
|
34
|
+
from onedal.tests.utils._dataframes_support import (
|
|
35
|
+
_convert_to_dataframe,
|
|
36
|
+
get_dataframes_and_queues,
|
|
37
|
+
)
|
|
38
|
+
from onedal.tests.utils._device_selection import get_queues, is_dpctl_available
|
|
39
|
+
from sklearnex import config_context
|
|
40
|
+
from sklearnex.tests._utils import PATCHED_FUNCTIONS, PATCHED_MODELS, SPECIAL_INSTANCES
|
|
41
|
+
from sklearnex.utils import get_namespace
|
|
42
|
+
|
|
43
|
+
if _is_dpc_backend:
|
|
44
|
+
from onedal import _backend
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
CPU_SKIP_LIST = (
|
|
48
|
+
"TSNE", # too slow for using in testing on common data size
|
|
49
|
+
"config_context", # does not malloc
|
|
50
|
+
"get_config", # does not malloc
|
|
51
|
+
"set_config", # does not malloc
|
|
52
|
+
"SVC(probability=True)", # memory leak fortran numpy (investigate _fit_proba)
|
|
53
|
+
"NuSVC(probability=True)", # memory leak fortran numpy (investigate _fit_proba)
|
|
54
|
+
"IncrementalEmpiricalCovariance", # dataframe_f issues
|
|
55
|
+
"IncrementalLinearRegression", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
|
|
56
|
+
"IncrementalPCA", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
|
|
57
|
+
"LogisticRegression(solver='newton-cg')", # memory leak fortran (1000, 100)
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
GPU_SKIP_LIST = (
|
|
61
|
+
"TSNE", # too slow for using in testing on common data size
|
|
62
|
+
"RandomForestRegressor", # too slow for using in testing on common data size
|
|
63
|
+
"KMeans", # does not support GPU offloading
|
|
64
|
+
"config_context", # does not malloc
|
|
65
|
+
"get_config", # does not malloc
|
|
66
|
+
"set_config", # does not malloc
|
|
67
|
+
"Ridge", # does not support GPU offloading (fails silently)
|
|
68
|
+
"ElasticNet", # does not support GPU offloading (fails silently)
|
|
69
|
+
"Lasso", # does not support GPU offloading (fails silently)
|
|
70
|
+
"SVR", # does not support GPU offloading (fails silently)
|
|
71
|
+
"NuSVR", # does not support GPU offloading (fails silently)
|
|
72
|
+
"NuSVC", # does not support GPU offloading (fails silently)
|
|
73
|
+
"LogisticRegression", # default parameters not supported, see solver=newton-cg
|
|
74
|
+
"NuSVC(probability=True)", # does not support GPU offloading (fails silently)
|
|
75
|
+
"IncrementalLinearRegression", # issue with potrf with the specific dataset
|
|
76
|
+
"LinearRegression", # issue with potrf with the specific dataset
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def gen_functions(functions):
|
|
81
|
+
func_dict = functions.copy()
|
|
82
|
+
|
|
83
|
+
roc_auc_score = func_dict.pop("roc_auc_score")
|
|
84
|
+
func_dict["roc_auc_score"] = lambda x, y: roc_auc_score(y, y)
|
|
85
|
+
|
|
86
|
+
pairwise_distances = func_dict.pop("pairwise_distances")
|
|
87
|
+
func_dict["pairwise_distances(metric='cosine')"] = lambda x, y: pairwise_distances(
|
|
88
|
+
x, metric="cosine"
|
|
89
|
+
)
|
|
90
|
+
func_dict["pairwise_distances(metric='correlation')"] = (
|
|
91
|
+
lambda x, y: pairwise_distances(x, metric="correlation")
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
_assert_all_finite = func_dict.pop("_assert_all_finite")
|
|
95
|
+
func_dict["_assert_all_finite"] = lambda x, y: [
|
|
96
|
+
_assert_all_finite(x),
|
|
97
|
+
_assert_all_finite(y),
|
|
98
|
+
]
|
|
99
|
+
return func_dict
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
FUNCTIONS = gen_functions(PATCHED_FUNCTIONS)
|
|
103
|
+
|
|
104
|
+
CPU_ESTIMATORS = {
|
|
105
|
+
k: v
|
|
106
|
+
for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES, **FUNCTIONS}.items()
|
|
107
|
+
if not k in CPU_SKIP_LIST
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
GPU_ESTIMATORS = {
|
|
111
|
+
k: v
|
|
112
|
+
for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES}.items()
|
|
113
|
+
if not k in GPU_SKIP_LIST
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
data_shapes = [
|
|
117
|
+
pytest.param((1000, 100), id="(1000, 100)"),
|
|
118
|
+
pytest.param((2000, 50), id="(2000, 50)"),
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
EXTRA_MEMORY_THRESHOLD = 0.15
|
|
122
|
+
N_SPLITS = 10
|
|
123
|
+
ORDER_DICT = {"F": np.asfortranarray, "C": np.ascontiguousarray}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def gen_clsf_data(n_samples, n_features):
|
|
127
|
+
data, label = make_classification(
|
|
128
|
+
n_classes=2, n_samples=n_samples, n_features=n_features, random_state=777
|
|
129
|
+
)
|
|
130
|
+
return (
|
|
131
|
+
data,
|
|
132
|
+
label,
|
|
133
|
+
data.size * data.dtype.itemsize + label.size * label.dtype.itemsize,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def get_traced_memory(queue=None):
|
|
138
|
+
if _is_dpc_backend and queue and queue.sycl_device.is_gpu:
|
|
139
|
+
return _backend.get_used_memory(queue)
|
|
140
|
+
else:
|
|
141
|
+
return tracemalloc.get_traced_memory()[0]
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def take(x, index, axis=0, queue=None):
|
|
145
|
+
xp, array_api = get_namespace(x)
|
|
146
|
+
if array_api:
|
|
147
|
+
return xp.take(x, xp.asarray(index, device=queue), axis=axis)
|
|
148
|
+
else:
|
|
149
|
+
return x.take(index, axis=axis)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def split_train_inference(kf, x, y, estimator, queue=None):
|
|
153
|
+
mem_tracks = []
|
|
154
|
+
for train_index, test_index in kf.split(x):
|
|
155
|
+
x_train = take(x, train_index, queue=queue)
|
|
156
|
+
y_train = take(y, train_index, queue=queue)
|
|
157
|
+
x_test = take(x, test_index, queue=queue)
|
|
158
|
+
y_test = take(y, test_index, queue=queue)
|
|
159
|
+
|
|
160
|
+
if isclass(estimator) and issubclass(estimator, BaseEstimator):
|
|
161
|
+
alg = estimator()
|
|
162
|
+
flag = True
|
|
163
|
+
elif isinstance(estimator, BaseEstimator):
|
|
164
|
+
alg = clone(estimator)
|
|
165
|
+
flag = True
|
|
166
|
+
else:
|
|
167
|
+
flag = False
|
|
168
|
+
|
|
169
|
+
if flag:
|
|
170
|
+
alg.fit(x_train, y_train)
|
|
171
|
+
if hasattr(alg, "predict"):
|
|
172
|
+
alg.predict(x_test)
|
|
173
|
+
elif hasattr(alg, "transform"):
|
|
174
|
+
alg.transform(x_test)
|
|
175
|
+
elif hasattr(alg, "kneighbors"):
|
|
176
|
+
alg.kneighbors(x_test)
|
|
177
|
+
del alg
|
|
178
|
+
else:
|
|
179
|
+
estimator(x_train, y_train)
|
|
180
|
+
|
|
181
|
+
del x_train, x_test, y_train, y_test, flag
|
|
182
|
+
mem_tracks.append(get_traced_memory(queue))
|
|
183
|
+
return mem_tracks
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _kfold_function_template(estimator, dataframe, data_shape, queue=None, func=None):
|
|
187
|
+
tracemalloc.start()
|
|
188
|
+
|
|
189
|
+
n_samples, n_features = data_shape
|
|
190
|
+
X, y, data_memory_size = gen_clsf_data(n_samples, n_features)
|
|
191
|
+
kf = KFold(n_splits=N_SPLITS)
|
|
192
|
+
if func:
|
|
193
|
+
X = func(X)
|
|
194
|
+
|
|
195
|
+
X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
196
|
+
y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
|
|
197
|
+
|
|
198
|
+
mem_before = get_traced_memory(queue)
|
|
199
|
+
mem_tracks = split_train_inference(kf, X, y, estimator, queue=queue)
|
|
200
|
+
mem_iter_diffs = np.array(mem_tracks[1:]) - np.array(mem_tracks[:-1])
|
|
201
|
+
mem_incr_mean, mem_incr_std = mem_iter_diffs.mean(), mem_iter_diffs.std()
|
|
202
|
+
mem_incr_mean, mem_incr_std = round(mem_incr_mean), round(mem_incr_std)
|
|
203
|
+
with warnings.catch_warnings():
|
|
204
|
+
# In the case that the memory usage is constant, this will raise
|
|
205
|
+
# a ConstantInputWarning error in pearsonr from scipy, this can
|
|
206
|
+
# be ignored.
|
|
207
|
+
warnings.filterwarnings(
|
|
208
|
+
"ignore",
|
|
209
|
+
message="An input array is constant; the correlation coefficient is not defined",
|
|
210
|
+
)
|
|
211
|
+
mem_iter_corr, _ = pearsonr(mem_tracks, list(range(len(mem_tracks))))
|
|
212
|
+
|
|
213
|
+
if mem_iter_corr > 0.95:
|
|
214
|
+
logging.warning(
|
|
215
|
+
"Memory usage is steadily increasing with iterations "
|
|
216
|
+
"(Pearson correlation coefficient between "
|
|
217
|
+
f"memory tracks and iterations is {mem_iter_corr})\n"
|
|
218
|
+
"Memory usage increase per iteration: "
|
|
219
|
+
f"{mem_incr_mean}±{mem_incr_std} bytes"
|
|
220
|
+
)
|
|
221
|
+
mem_before_gc = get_traced_memory(queue)
|
|
222
|
+
mem_diff = mem_before_gc - mem_before
|
|
223
|
+
if isinstance(estimator, BaseEstimator):
|
|
224
|
+
name = str(estimator)
|
|
225
|
+
else:
|
|
226
|
+
name = estimator.__name__
|
|
227
|
+
|
|
228
|
+
message = (
|
|
229
|
+
"Size of extra allocated memory {} using garbage collector "
|
|
230
|
+
f"is greater than {EXTRA_MEMORY_THRESHOLD * 100}% of input data"
|
|
231
|
+
f"\n\tAlgorithm: {name}"
|
|
232
|
+
f"\n\tInput data size: {data_memory_size} bytes"
|
|
233
|
+
"\n\tExtra allocated memory size: {} bytes"
|
|
234
|
+
" / {} %"
|
|
235
|
+
)
|
|
236
|
+
if mem_diff >= EXTRA_MEMORY_THRESHOLD * data_memory_size:
|
|
237
|
+
logging.warning(
|
|
238
|
+
message.format(
|
|
239
|
+
"before", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
|
|
240
|
+
)
|
|
241
|
+
)
|
|
242
|
+
gc.collect()
|
|
243
|
+
mem_after = get_traced_memory(queue)
|
|
244
|
+
tracemalloc.stop()
|
|
245
|
+
mem_diff = mem_after - mem_before
|
|
246
|
+
|
|
247
|
+
# GPU offloading with SYCL contains a program/kernel cache which should
|
|
248
|
+
# be controllable via a KernelProgramCache object in the SYCL context.
|
|
249
|
+
# The programs and kernels are stored on the GPU, but cannot be cleared
|
|
250
|
+
# as this class is not available for access in all oneDAL DPC++ runtimes.
|
|
251
|
+
# Therefore, until this is implemented this test must be skipped for gpu
|
|
252
|
+
# as it looks like a memory leak (at least there is no way to discern a
|
|
253
|
+
# leak on the first run).
|
|
254
|
+
if queue is None or queue.sycl_device.is_cpu:
|
|
255
|
+
assert mem_diff < EXTRA_MEMORY_THRESHOLD * data_memory_size, message.format(
|
|
256
|
+
"after", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
@pytest.mark.parametrize("order", ["F", "C"])
|
|
261
|
+
@pytest.mark.parametrize(
|
|
262
|
+
"dataframe,queue", get_dataframes_and_queues("numpy,pandas,dpctl", "cpu")
|
|
263
|
+
)
|
|
264
|
+
@pytest.mark.parametrize("estimator", CPU_ESTIMATORS.keys())
|
|
265
|
+
@pytest.mark.parametrize("data_shape", data_shapes)
|
|
266
|
+
def test_memory_leaks(estimator, dataframe, queue, order, data_shape):
|
|
267
|
+
func = ORDER_DICT[order]
|
|
268
|
+
if estimator == "_assert_all_finite" and queue is not None:
|
|
269
|
+
pytest.skip(f"{estimator} is not designed for device offloading")
|
|
270
|
+
|
|
271
|
+
_kfold_function_template(
|
|
272
|
+
CPU_ESTIMATORS[estimator], dataframe, data_shape, queue, func
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
@pytest.mark.skipif(
|
|
277
|
+
os.getenv("ZES_ENABLE_SYSMAN") is None or not is_dpctl_available("gpu"),
|
|
278
|
+
reason="SYCL device memory leak check requires the level zero sysman",
|
|
279
|
+
)
|
|
280
|
+
@pytest.mark.parametrize("queue", get_queues("gpu"))
|
|
281
|
+
@pytest.mark.parametrize("estimator", GPU_ESTIMATORS.keys())
|
|
282
|
+
@pytest.mark.parametrize("order", ["F", "C"])
|
|
283
|
+
@pytest.mark.parametrize("data_shape", data_shapes)
|
|
284
|
+
def test_gpu_memory_leaks(estimator, queue, order, data_shape):
|
|
285
|
+
func = ORDER_DICT[order]
|
|
286
|
+
if "ExtraTrees" in estimator and data_shape == (2000, 50):
|
|
287
|
+
pytest.skip("Avoid a segmentation fault in Extra Trees algorithms")
|
|
288
|
+
|
|
289
|
+
with config_context(target_offload=queue):
|
|
290
|
+
_kfold_function_template(GPU_ESTIMATORS[estimator], None, data_shape, queue, func)
|
|
@@ -208,10 +208,11 @@ def test_preview_namespace():
|
|
|
208
208
|
from sklearn.cluster import DBSCAN
|
|
209
209
|
from sklearn.decomposition import PCA
|
|
210
210
|
from sklearn.ensemble import RandomForestClassifier
|
|
211
|
-
from sklearn.linear_model import LinearRegression
|
|
211
|
+
from sklearn.linear_model import LinearRegression, Ridge
|
|
212
212
|
from sklearn.svm import SVC
|
|
213
213
|
|
|
214
214
|
return (
|
|
215
|
+
Ridge(),
|
|
215
216
|
LinearRegression(),
|
|
216
217
|
PCA(),
|
|
217
218
|
DBSCAN(),
|
|
@@ -226,9 +227,12 @@ def test_preview_namespace():
|
|
|
226
227
|
|
|
227
228
|
assert _is_preview_enabled()
|
|
228
229
|
|
|
229
|
-
lr, pca, dbscan, svc, rfc = get_estimators()
|
|
230
|
+
ridge, lr, pca, dbscan, svc, rfc = get_estimators()
|
|
230
231
|
assert "sklearnex" in rfc.__module__
|
|
231
232
|
|
|
233
|
+
if daal_check_version((2024, "P", 600)):
|
|
234
|
+
assert "sklearnex.preview" in ridge.__module__
|
|
235
|
+
|
|
232
236
|
if daal_check_version((2023, "P", 100)):
|
|
233
237
|
assert "sklearnex" in lr.__module__
|
|
234
238
|
else:
|
|
@@ -242,7 +246,8 @@ def test_preview_namespace():
|
|
|
242
246
|
sklearnex.unpatch_sklearn()
|
|
243
247
|
|
|
244
248
|
# no patching behavior
|
|
245
|
-
lr, pca, dbscan, svc, rfc = get_estimators()
|
|
249
|
+
ridge, lr, pca, dbscan, svc, rfc = get_estimators()
|
|
250
|
+
assert "sklearn." in ridge.__module__ and "daal4py" not in ridge.__module__
|
|
246
251
|
assert "sklearn." in lr.__module__ and "daal4py" not in lr.__module__
|
|
247
252
|
assert "sklearn." in pca.__module__ and "daal4py" not in pca.__module__
|
|
248
253
|
assert "sklearn." in dbscan.__module__ and "daal4py" not in dbscan.__module__
|
|
@@ -254,7 +259,10 @@ def test_preview_namespace():
|
|
|
254
259
|
sklearnex.patch_sklearn()
|
|
255
260
|
assert not _is_preview_enabled()
|
|
256
261
|
|
|
257
|
-
lr, pca, dbscan, svc, rfc = get_estimators()
|
|
262
|
+
ridge, lr, pca, dbscan, svc, rfc = get_estimators()
|
|
263
|
+
|
|
264
|
+
assert "daal4py" in ridge.__module__
|
|
265
|
+
|
|
258
266
|
if daal_check_version((2023, "P", 100)):
|
|
259
267
|
assert "sklearnex" in lr.__module__
|
|
260
268
|
else:
|
|
@@ -43,6 +43,7 @@ from sklearnex.tests._utils import (
|
|
|
43
43
|
SPECIAL_INSTANCES,
|
|
44
44
|
UNPATCHED_FUNCTIONS,
|
|
45
45
|
UNPATCHED_MODELS,
|
|
46
|
+
call_method,
|
|
46
47
|
gen_dataset,
|
|
47
48
|
gen_models_info,
|
|
48
49
|
)
|
|
@@ -139,6 +140,9 @@ def test_standard_estimator_patching(caplog, dataframe, queue, dtype, estimator,
|
|
|
139
140
|
]:
|
|
140
141
|
pytest.skip(f"{estimator} does not support GPU queues")
|
|
141
142
|
|
|
143
|
+
if "NearestNeighbors" in estimator and "radius" in method:
|
|
144
|
+
pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex")
|
|
145
|
+
|
|
142
146
|
if estimator == "TSNE" and method == "fit_transform":
|
|
143
147
|
pytest.skip("TSNE.fit_transform is too slow for common testing")
|
|
144
148
|
elif (
|
|
@@ -148,30 +152,21 @@ def test_standard_estimator_patching(caplog, dataframe, queue, dtype, estimator,
|
|
|
148
152
|
and dtype in [np.uint32, np.uint64]
|
|
149
153
|
):
|
|
150
154
|
pytest.skip("Windows segmentation fault for Ridge.predict for unsigned ints")
|
|
151
|
-
elif estimator == "IncrementalLinearRegression" and
|
|
152
|
-
np.
|
|
153
|
-
|
|
154
|
-
np.int32,
|
|
155
|
-
np.int64,
|
|
156
|
-
np.uint8,
|
|
157
|
-
np.uint16,
|
|
158
|
-
np.uint32,
|
|
159
|
-
np.uint64,
|
|
160
|
-
]:
|
|
155
|
+
elif estimator == "IncrementalLinearRegression" and np.issubdtype(
|
|
156
|
+
dtype, np.integer
|
|
157
|
+
):
|
|
161
158
|
pytest.skip(
|
|
162
159
|
"IncrementalLinearRegression fails on oneDAL side with int types because dataset is filled by zeroes"
|
|
163
160
|
)
|
|
164
161
|
elif method and not hasattr(est, method):
|
|
165
162
|
pytest.skip(f"sklearn available_if prevents testing {estimator}.{method}")
|
|
166
163
|
|
|
167
|
-
X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)
|
|
164
|
+
X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0]
|
|
168
165
|
est.fit(X, y)
|
|
169
166
|
|
|
170
167
|
if method:
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
else:
|
|
174
|
-
est.score(X, y)
|
|
168
|
+
call_method(est, method, X, y)
|
|
169
|
+
|
|
175
170
|
assert all(
|
|
176
171
|
[
|
|
177
172
|
"running accelerated version" in i.message
|
|
@@ -190,23 +185,24 @@ def test_special_estimator_patching(caplog, dataframe, queue, dtype, estimator,
|
|
|
190
185
|
with caplog.at_level(logging.WARNING, logger="sklearnex"):
|
|
191
186
|
est = SPECIAL_INSTANCES[estimator]
|
|
192
187
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
188
|
+
if queue:
|
|
189
|
+
# Its not possible to get the dpnp/dpctl arrays to be in the proper dtype
|
|
190
|
+
if dtype == np.float16 and not queue.sycl_device.has_aspect_fp16:
|
|
191
|
+
pytest.skip("Hardware does not support fp16 SYCL testing")
|
|
192
|
+
elif dtype == np.float64 and not queue.sycl_device.has_aspect_fp64:
|
|
193
|
+
pytest.skip("Hardware does not support fp64 SYCL testing")
|
|
194
|
+
|
|
195
|
+
if "NearestNeighbors" in estimator and "radius" in method:
|
|
196
|
+
pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex")
|
|
198
197
|
|
|
199
|
-
X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)
|
|
198
|
+
X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0]
|
|
200
199
|
est.fit(X, y)
|
|
201
200
|
|
|
202
201
|
if method and not hasattr(est, method):
|
|
203
202
|
pytest.skip(f"sklearn available_if prevents testing {estimator}.{method}")
|
|
204
203
|
|
|
205
204
|
if method:
|
|
206
|
-
|
|
207
|
-
getattr(est, method)(X)
|
|
208
|
-
else:
|
|
209
|
-
est.score(X, y)
|
|
205
|
+
call_method(est, method, X, y)
|
|
210
206
|
|
|
211
207
|
assert all(
|
|
212
208
|
[
|