scikit-learn-intelex 2025.1.0__py310-none-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- daal4py/__init__.py +73 -0
- daal4py/__main__.py +58 -0
- daal4py/_daal4py.cpython-310-x86_64-linux-gnu.so +0 -0
- daal4py/doc/third-party-programs.txt +424 -0
- daal4py/mb/__init__.py +19 -0
- daal4py/mb/model_builders.py +377 -0
- daal4py/mpi_transceiver.cpython-310-x86_64-linux-gnu.so +0 -0
- daal4py/sklearn/__init__.py +40 -0
- daal4py/sklearn/_n_jobs_support.py +248 -0
- daal4py/sklearn/_utils.py +245 -0
- daal4py/sklearn/cluster/__init__.py +20 -0
- daal4py/sklearn/cluster/dbscan.py +165 -0
- daal4py/sklearn/cluster/k_means.py +597 -0
- daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
- daal4py/sklearn/decomposition/__init__.py +19 -0
- daal4py/sklearn/decomposition/_pca.py +524 -0
- daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
- daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
- daal4py/sklearn/ensemble/__init__.py +27 -0
- daal4py/sklearn/ensemble/_forest.py +1397 -0
- daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
- daal4py/sklearn/linear_model/__init__.py +29 -0
- daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
- daal4py/sklearn/linear_model/_linear.py +272 -0
- daal4py/sklearn/linear_model/_ridge.py +325 -0
- daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
- daal4py/sklearn/linear_model/linear.py +17 -0
- daal4py/sklearn/linear_model/logistic_loss.py +195 -0
- daal4py/sklearn/linear_model/logistic_path.py +1026 -0
- daal4py/sklearn/linear_model/ridge.py +17 -0
- daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
- daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
- daal4py/sklearn/manifold/__init__.py +19 -0
- daal4py/sklearn/manifold/_t_sne.py +405 -0
- daal4py/sklearn/metrics/__init__.py +20 -0
- daal4py/sklearn/metrics/_pairwise.py +236 -0
- daal4py/sklearn/metrics/_ranking.py +210 -0
- daal4py/sklearn/model_selection/__init__.py +19 -0
- daal4py/sklearn/model_selection/_split.py +309 -0
- daal4py/sklearn/model_selection/tests/test_split.py +56 -0
- daal4py/sklearn/monkeypatch/__init__.py +0 -0
- daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
- daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
- daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
- daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
- daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
- daal4py/sklearn/neighbors/__init__.py +21 -0
- daal4py/sklearn/neighbors/_base.py +503 -0
- daal4py/sklearn/neighbors/_classification.py +139 -0
- daal4py/sklearn/neighbors/_regression.py +74 -0
- daal4py/sklearn/neighbors/_unsupervised.py +55 -0
- daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
- daal4py/sklearn/svm/__init__.py +19 -0
- daal4py/sklearn/svm/svm.py +734 -0
- daal4py/sklearn/utils/__init__.py +21 -0
- daal4py/sklearn/utils/base.py +75 -0
- daal4py/sklearn/utils/tests/test_utils.py +51 -0
- daal4py/sklearn/utils/validation.py +693 -0
- onedal/__init__.py +83 -0
- onedal/_config.py +54 -0
- onedal/_device_offload.py +222 -0
- onedal/_onedal_py_dpc.cpython-310-x86_64-linux-gnu.so +0 -0
- onedal/_onedal_py_host.cpython-310-x86_64-linux-gnu.so +0 -0
- onedal/_onedal_py_spmd_dpc.cpython-310-x86_64-linux-gnu.so +0 -0
- onedal/basic_statistics/__init__.py +20 -0
- onedal/basic_statistics/basic_statistics.py +107 -0
- onedal/basic_statistics/incremental_basic_statistics.py +160 -0
- onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
- onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
- onedal/cluster/__init__.py +27 -0
- onedal/cluster/dbscan.py +110 -0
- onedal/cluster/kmeans.py +564 -0
- onedal/cluster/kmeans_init.py +115 -0
- onedal/cluster/tests/test_dbscan.py +125 -0
- onedal/cluster/tests/test_kmeans.py +88 -0
- onedal/cluster/tests/test_kmeans_init.py +93 -0
- onedal/common/_base.py +38 -0
- onedal/common/_estimator_checks.py +47 -0
- onedal/common/_mixin.py +62 -0
- onedal/common/_policy.py +59 -0
- onedal/common/_spmd_policy.py +30 -0
- onedal/common/hyperparameters.py +125 -0
- onedal/common/tests/test_policy.py +76 -0
- onedal/covariance/__init__.py +20 -0
- onedal/covariance/covariance.py +125 -0
- onedal/covariance/incremental_covariance.py +146 -0
- onedal/covariance/tests/test_covariance.py +50 -0
- onedal/covariance/tests/test_incremental_covariance.py +122 -0
- onedal/datatypes/__init__.py +19 -0
- onedal/datatypes/_data_conversion.py +154 -0
- onedal/datatypes/tests/common.py +126 -0
- onedal/datatypes/tests/test_data.py +414 -0
- onedal/decomposition/__init__.py +20 -0
- onedal/decomposition/incremental_pca.py +204 -0
- onedal/decomposition/pca.py +186 -0
- onedal/decomposition/tests/test_incremental_pca.py +198 -0
- onedal/ensemble/__init__.py +29 -0
- onedal/ensemble/forest.py +727 -0
- onedal/ensemble/tests/test_random_forest.py +97 -0
- onedal/linear_model/__init__.py +27 -0
- onedal/linear_model/incremental_linear_model.py +258 -0
- onedal/linear_model/linear_model.py +329 -0
- onedal/linear_model/logistic_regression.py +249 -0
- onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
- onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
- onedal/linear_model/tests/test_linear_regression.py +250 -0
- onedal/linear_model/tests/test_logistic_regression.py +95 -0
- onedal/linear_model/tests/test_ridge.py +95 -0
- onedal/neighbors/__init__.py +19 -0
- onedal/neighbors/neighbors.py +767 -0
- onedal/neighbors/tests/test_knn_classification.py +49 -0
- onedal/primitives/__init__.py +27 -0
- onedal/primitives/get_tree.py +25 -0
- onedal/primitives/kernel_functions.py +153 -0
- onedal/primitives/tests/test_kernel_functions.py +159 -0
- onedal/spmd/__init__.py +25 -0
- onedal/spmd/_base.py +30 -0
- onedal/spmd/basic_statistics/__init__.py +20 -0
- onedal/spmd/basic_statistics/basic_statistics.py +30 -0
- onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
- onedal/spmd/cluster/__init__.py +28 -0
- onedal/spmd/cluster/dbscan.py +23 -0
- onedal/spmd/cluster/kmeans.py +56 -0
- onedal/spmd/covariance/__init__.py +20 -0
- onedal/spmd/covariance/covariance.py +26 -0
- onedal/spmd/covariance/incremental_covariance.py +82 -0
- onedal/spmd/decomposition/__init__.py +20 -0
- onedal/spmd/decomposition/incremental_pca.py +117 -0
- onedal/spmd/decomposition/pca.py +26 -0
- onedal/spmd/ensemble/__init__.py +19 -0
- onedal/spmd/ensemble/forest.py +28 -0
- onedal/spmd/linear_model/__init__.py +21 -0
- onedal/spmd/linear_model/incremental_linear_model.py +97 -0
- onedal/spmd/linear_model/linear_model.py +30 -0
- onedal/spmd/linear_model/logistic_regression.py +38 -0
- onedal/spmd/neighbors/__init__.py +19 -0
- onedal/spmd/neighbors/neighbors.py +75 -0
- onedal/svm/__init__.py +19 -0
- onedal/svm/svm.py +556 -0
- onedal/svm/tests/test_csr_svm.py +351 -0
- onedal/svm/tests/test_nusvc.py +204 -0
- onedal/svm/tests/test_nusvr.py +210 -0
- onedal/svm/tests/test_svc.py +176 -0
- onedal/svm/tests/test_svr.py +243 -0
- onedal/tests/test_common.py +57 -0
- onedal/tests/utils/_dataframes_support.py +162 -0
- onedal/tests/utils/_device_selection.py +102 -0
- onedal/utils/__init__.py +49 -0
- onedal/utils/_array_api.py +81 -0
- onedal/utils/_dpep_helpers.py +56 -0
- onedal/utils/validation.py +440 -0
- scikit_learn_intelex-2025.1.0.dist-info/LICENSE.txt +202 -0
- scikit_learn_intelex-2025.1.0.dist-info/METADATA +231 -0
- scikit_learn_intelex-2025.1.0.dist-info/RECORD +280 -0
- scikit_learn_intelex-2025.1.0.dist-info/WHEEL +5 -0
- scikit_learn_intelex-2025.1.0.dist-info/top_level.txt +3 -0
- sklearnex/__init__.py +66 -0
- sklearnex/__main__.py +58 -0
- sklearnex/_config.py +116 -0
- sklearnex/_device_offload.py +126 -0
- sklearnex/_utils.py +132 -0
- sklearnex/basic_statistics/__init__.py +20 -0
- sklearnex/basic_statistics/basic_statistics.py +230 -0
- sklearnex/basic_statistics/incremental_basic_statistics.py +345 -0
- sklearnex/basic_statistics/tests/test_basic_statistics.py +270 -0
- sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +404 -0
- sklearnex/cluster/__init__.py +20 -0
- sklearnex/cluster/dbscan.py +197 -0
- sklearnex/cluster/k_means.py +395 -0
- sklearnex/cluster/tests/test_dbscan.py +38 -0
- sklearnex/cluster/tests/test_kmeans.py +159 -0
- sklearnex/conftest.py +82 -0
- sklearnex/covariance/__init__.py +19 -0
- sklearnex/covariance/incremental_covariance.py +398 -0
- sklearnex/covariance/tests/test_incremental_covariance.py +237 -0
- sklearnex/decomposition/__init__.py +19 -0
- sklearnex/decomposition/pca.py +425 -0
- sklearnex/decomposition/tests/test_pca.py +58 -0
- sklearnex/dispatcher.py +543 -0
- sklearnex/doc/third-party-programs.txt +424 -0
- sklearnex/ensemble/__init__.py +29 -0
- sklearnex/ensemble/_forest.py +2029 -0
- sklearnex/ensemble/tests/test_forest.py +135 -0
- sklearnex/glob/__main__.py +72 -0
- sklearnex/glob/dispatcher.py +101 -0
- sklearnex/linear_model/__init__.py +32 -0
- sklearnex/linear_model/coordinate_descent.py +30 -0
- sklearnex/linear_model/incremental_linear.py +482 -0
- sklearnex/linear_model/incremental_ridge.py +425 -0
- sklearnex/linear_model/linear.py +341 -0
- sklearnex/linear_model/logistic_regression.py +413 -0
- sklearnex/linear_model/ridge.py +24 -0
- sklearnex/linear_model/tests/test_incremental_linear.py +207 -0
- sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
- sklearnex/linear_model/tests/test_linear.py +167 -0
- sklearnex/linear_model/tests/test_logreg.py +134 -0
- sklearnex/manifold/__init__.py +19 -0
- sklearnex/manifold/t_sne.py +21 -0
- sklearnex/manifold/tests/test_tsne.py +26 -0
- sklearnex/metrics/__init__.py +23 -0
- sklearnex/metrics/pairwise.py +22 -0
- sklearnex/metrics/ranking.py +20 -0
- sklearnex/metrics/tests/test_metrics.py +39 -0
- sklearnex/model_selection/__init__.py +21 -0
- sklearnex/model_selection/split.py +22 -0
- sklearnex/model_selection/tests/test_model_selection.py +34 -0
- sklearnex/neighbors/__init__.py +27 -0
- sklearnex/neighbors/_lof.py +236 -0
- sklearnex/neighbors/common.py +310 -0
- sklearnex/neighbors/knn_classification.py +231 -0
- sklearnex/neighbors/knn_regression.py +207 -0
- sklearnex/neighbors/knn_unsupervised.py +178 -0
- sklearnex/neighbors/tests/test_neighbors.py +82 -0
- sklearnex/preview/__init__.py +17 -0
- sklearnex/preview/covariance/__init__.py +19 -0
- sklearnex/preview/covariance/covariance.py +138 -0
- sklearnex/preview/covariance/tests/test_covariance.py +66 -0
- sklearnex/preview/decomposition/__init__.py +19 -0
- sklearnex/preview/decomposition/incremental_pca.py +233 -0
- sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
- sklearnex/preview/linear_model/__init__.py +19 -0
- sklearnex/preview/linear_model/ridge.py +424 -0
- sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
- sklearnex/spmd/__init__.py +25 -0
- sklearnex/spmd/basic_statistics/__init__.py +20 -0
- sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
- sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
- sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
- sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
- sklearnex/spmd/cluster/__init__.py +30 -0
- sklearnex/spmd/cluster/dbscan.py +50 -0
- sklearnex/spmd/cluster/kmeans.py +21 -0
- sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
- sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
- sklearnex/spmd/covariance/__init__.py +20 -0
- sklearnex/spmd/covariance/covariance.py +21 -0
- sklearnex/spmd/covariance/incremental_covariance.py +37 -0
- sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
- sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
- sklearnex/spmd/decomposition/__init__.py +20 -0
- sklearnex/spmd/decomposition/incremental_pca.py +30 -0
- sklearnex/spmd/decomposition/pca.py +21 -0
- sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
- sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
- sklearnex/spmd/ensemble/__init__.py +19 -0
- sklearnex/spmd/ensemble/forest.py +71 -0
- sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
- sklearnex/spmd/linear_model/__init__.py +21 -0
- sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
- sklearnex/spmd/linear_model/linear_model.py +21 -0
- sklearnex/spmd/linear_model/logistic_regression.py +21 -0
- sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
- sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
- sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
- sklearnex/spmd/neighbors/__init__.py +19 -0
- sklearnex/spmd/neighbors/neighbors.py +25 -0
- sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
- sklearnex/svm/__init__.py +29 -0
- sklearnex/svm/_common.py +339 -0
- sklearnex/svm/nusvc.py +371 -0
- sklearnex/svm/nusvr.py +170 -0
- sklearnex/svm/svc.py +399 -0
- sklearnex/svm/svr.py +167 -0
- sklearnex/svm/tests/test_svm.py +93 -0
- sklearnex/tests/test_common.py +390 -0
- sklearnex/tests/test_config.py +123 -0
- sklearnex/tests/test_memory_usage.py +379 -0
- sklearnex/tests/test_monkeypatch.py +276 -0
- sklearnex/tests/test_n_jobs_support.py +108 -0
- sklearnex/tests/test_parallel.py +48 -0
- sklearnex/tests/test_patching.py +385 -0
- sklearnex/tests/test_run_to_run_stability.py +321 -0
- sklearnex/tests/utils/__init__.py +44 -0
- sklearnex/tests/utils/base.py +371 -0
- sklearnex/tests/utils/spmd.py +198 -0
- sklearnex/utils/__init__.py +19 -0
- sklearnex/utils/_array_api.py +82 -0
- sklearnex/utils/parallel.py +59 -0
- sklearnex/utils/tests/test_finite.py +89 -0
- sklearnex/utils/validation.py +17 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# ==============================================================================
|
|
2
|
+
# Copyright 2021 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ==============================================================================
|
|
16
|
+
|
|
17
|
+
import sklearn
|
|
18
|
+
|
|
19
|
+
import onedal
|
|
20
|
+
import sklearnex
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_get_config_contains_sklearn_params():
|
|
24
|
+
skex_config = sklearnex.get_config()
|
|
25
|
+
sk_config = sklearn.get_config()
|
|
26
|
+
|
|
27
|
+
assert all(value in skex_config.keys() for value in sk_config.keys())
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_set_config_works():
|
|
31
|
+
"""Test validates that the config settings were applied correctly by
|
|
32
|
+
set_config.
|
|
33
|
+
"""
|
|
34
|
+
# This retrieves the current configuration settings
|
|
35
|
+
# from sklearnex to restore them later.
|
|
36
|
+
default_config = sklearnex.get_config()
|
|
37
|
+
|
|
38
|
+
# These variables define the new configuration settings
|
|
39
|
+
# that will be tested.
|
|
40
|
+
assume_finite = True
|
|
41
|
+
target_offload = "cpu:0"
|
|
42
|
+
allow_fallback_to_host = True
|
|
43
|
+
allow_sklearn_after_onedal = False
|
|
44
|
+
|
|
45
|
+
sklearnex.set_config(
|
|
46
|
+
assume_finite=assume_finite,
|
|
47
|
+
target_offload=target_offload,
|
|
48
|
+
allow_fallback_to_host=allow_fallback_to_host,
|
|
49
|
+
allow_sklearn_after_onedal=allow_sklearn_after_onedal,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
config = sklearnex.get_config()
|
|
53
|
+
onedal_config = onedal._config._get_config()
|
|
54
|
+
# Any assert in test_set_config_works will leave the default config in place.
|
|
55
|
+
# This is an undesired behavior. Using a try finally statement will guarantee
|
|
56
|
+
# the use of set_config in the case of a failure.
|
|
57
|
+
try:
|
|
58
|
+
# These assertions check if the configuration was set correctly.
|
|
59
|
+
# If any assertion fails, it will raise an error.
|
|
60
|
+
assert config["target_offload"] == target_offload
|
|
61
|
+
assert config["allow_fallback_to_host"] == allow_fallback_to_host
|
|
62
|
+
assert config["allow_sklearn_after_onedal"] == allow_sklearn_after_onedal
|
|
63
|
+
assert config["assume_finite"] == assume_finite
|
|
64
|
+
assert onedal_config["target_offload"] == target_offload
|
|
65
|
+
assert onedal_config["allow_fallback_to_host"] == allow_fallback_to_host
|
|
66
|
+
finally:
|
|
67
|
+
# This ensures that the original configuration is restored, regardless of
|
|
68
|
+
# whether the assertions pass or fail.
|
|
69
|
+
sklearnex.set_config(**default_config)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def test_config_context_works():
|
|
73
|
+
"""Test validates that the config settings were applied correctly
|
|
74
|
+
by config context manager.
|
|
75
|
+
"""
|
|
76
|
+
from sklearnex import config_context, get_config
|
|
77
|
+
|
|
78
|
+
default_config = get_config()
|
|
79
|
+
onedal_default_config = onedal._config._get_config()
|
|
80
|
+
|
|
81
|
+
# These variables define the new configuration settings
|
|
82
|
+
# that will be tested.
|
|
83
|
+
assume_finite = True
|
|
84
|
+
target_offload = "cpu:0"
|
|
85
|
+
allow_fallback_to_host = True
|
|
86
|
+
allow_sklearn_after_onedal = False
|
|
87
|
+
|
|
88
|
+
# Nested context manager applies the new configuration settings.
|
|
89
|
+
# Each config_context temporarily sets a specific configuration,
|
|
90
|
+
# allowing for a clean and isolated testing environment.
|
|
91
|
+
with config_context(assume_finite=assume_finite):
|
|
92
|
+
with config_context(target_offload=target_offload):
|
|
93
|
+
with config_context(allow_fallback_to_host=allow_fallback_to_host):
|
|
94
|
+
with config_context(
|
|
95
|
+
allow_sklearn_after_onedal=allow_sklearn_after_onedal
|
|
96
|
+
):
|
|
97
|
+
config = sklearnex.get_config()
|
|
98
|
+
onedal_config = onedal._config._get_config()
|
|
99
|
+
|
|
100
|
+
assert config["target_offload"] == target_offload
|
|
101
|
+
assert config["allow_fallback_to_host"] == allow_fallback_to_host
|
|
102
|
+
assert config["allow_sklearn_after_onedal"] == allow_sklearn_after_onedal
|
|
103
|
+
assert config["assume_finite"] == assume_finite
|
|
104
|
+
assert onedal_config["target_offload"] == target_offload
|
|
105
|
+
assert onedal_config["allow_fallback_to_host"] == allow_fallback_to_host
|
|
106
|
+
|
|
107
|
+
# Check that out of the config context manager default settings are
|
|
108
|
+
# remaining.
|
|
109
|
+
default_config_after_cc = get_config()
|
|
110
|
+
onedal_default_config_after_cc = onedal._config._get_config()
|
|
111
|
+
for param in [
|
|
112
|
+
"target_offload",
|
|
113
|
+
"allow_fallback_to_host",
|
|
114
|
+
"allow_sklearn_after_onedal",
|
|
115
|
+
"assume_finite",
|
|
116
|
+
]:
|
|
117
|
+
assert default_config_after_cc[param] == default_config[param]
|
|
118
|
+
|
|
119
|
+
for param in [
|
|
120
|
+
"target_offload",
|
|
121
|
+
"allow_fallback_to_host",
|
|
122
|
+
]:
|
|
123
|
+
assert onedal_default_config_after_cc[param] == onedal_default_config[param]
|
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
# ==============================================================================
|
|
2
|
+
# Copyright 2021 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ==============================================================================
|
|
16
|
+
|
|
17
|
+
import gc
|
|
18
|
+
import logging
|
|
19
|
+
import os
|
|
20
|
+
import tracemalloc
|
|
21
|
+
import types
|
|
22
|
+
import warnings
|
|
23
|
+
from inspect import isclass
|
|
24
|
+
|
|
25
|
+
import numpy as np
|
|
26
|
+
import pytest
|
|
27
|
+
from scipy.stats import pearsonr
|
|
28
|
+
from sklearn.base import BaseEstimator, clone
|
|
29
|
+
from sklearn.datasets import make_classification
|
|
30
|
+
from sklearn.model_selection import KFold
|
|
31
|
+
|
|
32
|
+
from onedal import _is_dpc_backend
|
|
33
|
+
from onedal.tests.utils._dataframes_support import (
|
|
34
|
+
_convert_to_dataframe,
|
|
35
|
+
get_dataframes_and_queues,
|
|
36
|
+
)
|
|
37
|
+
from onedal.tests.utils._device_selection import get_queues, is_dpctl_device_available
|
|
38
|
+
from onedal.utils._array_api import _get_sycl_namespace
|
|
39
|
+
from onedal.utils._dpep_helpers import dpctl_available, dpnp_available
|
|
40
|
+
from sklearnex import config_context
|
|
41
|
+
from sklearnex.tests.utils import PATCHED_FUNCTIONS, PATCHED_MODELS, SPECIAL_INSTANCES
|
|
42
|
+
from sklearnex.utils._array_api import get_namespace
|
|
43
|
+
|
|
44
|
+
if dpctl_available:
|
|
45
|
+
from dpctl.tensor import usm_ndarray
|
|
46
|
+
|
|
47
|
+
if dpnp_available:
|
|
48
|
+
import dpnp
|
|
49
|
+
|
|
50
|
+
if _is_dpc_backend:
|
|
51
|
+
from onedal import _backend
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
CPU_SKIP_LIST = (
|
|
55
|
+
"TSNE", # too slow for using in testing on common data size
|
|
56
|
+
"config_context", # does not malloc
|
|
57
|
+
"get_config", # does not malloc
|
|
58
|
+
"set_config", # does not malloc
|
|
59
|
+
"SVC(probability=True)", # memory leak fortran numpy (investigate _fit_proba)
|
|
60
|
+
"NuSVC(probability=True)", # memory leak fortran numpy (investigate _fit_proba)
|
|
61
|
+
"IncrementalEmpiricalCovariance", # dataframe_f issues
|
|
62
|
+
"IncrementalLinearRegression", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
|
|
63
|
+
"IncrementalPCA", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
|
|
64
|
+
"IncrementalRidge", # TODO fix memory leak issue in private CI for data_shape = (1000, 100), data_transform_function = dataframe_f
|
|
65
|
+
"LogisticRegression(solver='newton-cg')", # memory leak fortran (1000, 100)
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
GPU_SKIP_LIST = (
|
|
69
|
+
"TSNE", # too slow for using in testing on common data size
|
|
70
|
+
"RandomForestRegressor", # too slow for using in testing on common data size
|
|
71
|
+
"KMeans", # does not support GPU offloading
|
|
72
|
+
"config_context", # does not malloc
|
|
73
|
+
"get_config", # does not malloc
|
|
74
|
+
"set_config", # does not malloc
|
|
75
|
+
"Ridge", # does not support GPU offloading (fails silently)
|
|
76
|
+
"ElasticNet", # does not support GPU offloading (fails silently)
|
|
77
|
+
"Lasso", # does not support GPU offloading (fails silently)
|
|
78
|
+
"SVR", # does not support GPU offloading (fails silently)
|
|
79
|
+
"NuSVR", # does not support GPU offloading (fails silently)
|
|
80
|
+
"NuSVC", # does not support GPU offloading (fails silently)
|
|
81
|
+
"LogisticRegression", # default parameters not supported, see solver=newton-cg
|
|
82
|
+
"NuSVC(probability=True)", # does not support GPU offloading (fails silently)
|
|
83
|
+
"IncrementalLinearRegression", # issue with potrf with the specific dataset
|
|
84
|
+
"LinearRegression", # issue with potrf with the specific dataset
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def gen_functions(functions):
|
|
89
|
+
func_dict = functions.copy()
|
|
90
|
+
|
|
91
|
+
roc_auc_score = func_dict.pop("roc_auc_score")
|
|
92
|
+
func_dict["roc_auc_score"] = lambda x, y: roc_auc_score(y, y)
|
|
93
|
+
|
|
94
|
+
pairwise_distances = func_dict.pop("pairwise_distances")
|
|
95
|
+
func_dict["pairwise_distances(metric='cosine')"] = lambda x, y: pairwise_distances(
|
|
96
|
+
x, metric="cosine"
|
|
97
|
+
)
|
|
98
|
+
func_dict["pairwise_distances(metric='correlation')"] = (
|
|
99
|
+
lambda x, y: pairwise_distances(x, metric="correlation")
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
_assert_all_finite = func_dict.pop("_assert_all_finite")
|
|
103
|
+
func_dict["_assert_all_finite"] = lambda x, y: [
|
|
104
|
+
_assert_all_finite(x),
|
|
105
|
+
_assert_all_finite(y),
|
|
106
|
+
]
|
|
107
|
+
return func_dict
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
FUNCTIONS = gen_functions(PATCHED_FUNCTIONS)
|
|
111
|
+
|
|
112
|
+
CPU_ESTIMATORS = {
|
|
113
|
+
k: v
|
|
114
|
+
for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES, **FUNCTIONS}.items()
|
|
115
|
+
if not k in CPU_SKIP_LIST
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
GPU_ESTIMATORS = {
|
|
119
|
+
k: v
|
|
120
|
+
for k, v in {**PATCHED_MODELS, **SPECIAL_INSTANCES}.items()
|
|
121
|
+
if not k in GPU_SKIP_LIST
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
data_shapes = [
|
|
125
|
+
pytest.param((1000, 100), id="(1000, 100)"),
|
|
126
|
+
pytest.param((2000, 50), id="(2000, 50)"),
|
|
127
|
+
]
|
|
128
|
+
|
|
129
|
+
EXTRA_MEMORY_THRESHOLD = 0.15
|
|
130
|
+
EXTRA_MEMORY_THRESHOLD_PANDAS = 0.25
|
|
131
|
+
N_SPLITS = 10
|
|
132
|
+
ORDER_DICT = {"F": np.asfortranarray, "C": np.ascontiguousarray}
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
if _is_dpc_backend:
|
|
136
|
+
|
|
137
|
+
from sklearn.utils.validation import check_is_fitted
|
|
138
|
+
|
|
139
|
+
from onedal.datatypes import from_table, to_table
|
|
140
|
+
|
|
141
|
+
class DummyEstimatorWithTableConversions(BaseEstimator):
|
|
142
|
+
|
|
143
|
+
def fit(self, X, y=None):
|
|
144
|
+
sua_iface, xp, _ = _get_sycl_namespace(X)
|
|
145
|
+
X_table = to_table(X)
|
|
146
|
+
y_table = to_table(y)
|
|
147
|
+
# The presence of the fitted attributes (ending with a trailing
|
|
148
|
+
# underscore) is required for the correct check. The cleanup of
|
|
149
|
+
# the memory will occur at the estimator instance deletion.
|
|
150
|
+
self.x_attr_ = from_table(
|
|
151
|
+
X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
|
|
152
|
+
)
|
|
153
|
+
self.y_attr_ = from_table(
|
|
154
|
+
y_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
|
|
155
|
+
)
|
|
156
|
+
return self
|
|
157
|
+
|
|
158
|
+
def predict(self, X):
|
|
159
|
+
# Checks if the estimator is fitted by verifying the presence of
|
|
160
|
+
# fitted attributes (ending with a trailing underscore).
|
|
161
|
+
check_is_fitted(self)
|
|
162
|
+
sua_iface, xp, _ = _get_sycl_namespace(X)
|
|
163
|
+
X_table = to_table(X)
|
|
164
|
+
returned_X = from_table(
|
|
165
|
+
X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
|
|
166
|
+
)
|
|
167
|
+
return returned_X
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def gen_clsf_data(n_samples, n_features, dtype=None):
|
|
171
|
+
data, label = make_classification(
|
|
172
|
+
n_classes=2, n_samples=n_samples, n_features=n_features, random_state=777
|
|
173
|
+
)
|
|
174
|
+
if dtype:
|
|
175
|
+
data, label = data.astype(dtype), label.astype(dtype)
|
|
176
|
+
return (
|
|
177
|
+
data,
|
|
178
|
+
label,
|
|
179
|
+
data.size * data.dtype.itemsize + label.size * label.dtype.itemsize,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def get_traced_memory(queue=None):
|
|
184
|
+
if _is_dpc_backend and queue and queue.sycl_device.is_gpu:
|
|
185
|
+
return _backend.get_used_memory(queue)
|
|
186
|
+
else:
|
|
187
|
+
return tracemalloc.get_traced_memory()[0]
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def take(x, index, axis=0, queue=None):
|
|
191
|
+
xp, array_api = get_namespace(x)
|
|
192
|
+
if (
|
|
193
|
+
dpnp_available
|
|
194
|
+
and isinstance(x, dpnp.ndarray)
|
|
195
|
+
or dpctl_available
|
|
196
|
+
and isinstance(x, usm_ndarray)
|
|
197
|
+
):
|
|
198
|
+
# Using the same sycl queue for dpnp.ndarray or usm_ndarray.
|
|
199
|
+
return xp.take(
|
|
200
|
+
x, xp.asarray(index, usm_type="device", sycl_queue=x.sycl_queue), axis=axis
|
|
201
|
+
)
|
|
202
|
+
elif array_api:
|
|
203
|
+
return xp.take(x, xp.asarray(index, device=x.device), axis=axis)
|
|
204
|
+
else:
|
|
205
|
+
return x.take(index, axis=axis)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def split_train_inference(kf, x, y, estimator, queue=None):
|
|
209
|
+
mem_tracks = []
|
|
210
|
+
for train_index, test_index in kf.split(x):
|
|
211
|
+
x_train = take(x, train_index, queue=queue)
|
|
212
|
+
y_train = take(y, train_index, queue=queue)
|
|
213
|
+
x_test = take(x, test_index, queue=queue)
|
|
214
|
+
y_test = take(y, test_index, queue=queue)
|
|
215
|
+
|
|
216
|
+
if isclass(estimator) and issubclass(estimator, BaseEstimator):
|
|
217
|
+
alg = estimator()
|
|
218
|
+
flag = True
|
|
219
|
+
elif isinstance(estimator, BaseEstimator):
|
|
220
|
+
alg = clone(estimator)
|
|
221
|
+
flag = True
|
|
222
|
+
else:
|
|
223
|
+
flag = False
|
|
224
|
+
|
|
225
|
+
if flag:
|
|
226
|
+
alg.fit(x_train, y_train)
|
|
227
|
+
if hasattr(alg, "predict"):
|
|
228
|
+
alg.predict(x_test)
|
|
229
|
+
elif hasattr(alg, "transform"):
|
|
230
|
+
alg.transform(x_test)
|
|
231
|
+
elif hasattr(alg, "kneighbors"):
|
|
232
|
+
alg.kneighbors(x_test)
|
|
233
|
+
del alg
|
|
234
|
+
else:
|
|
235
|
+
estimator(x_train, y_train)
|
|
236
|
+
|
|
237
|
+
del x_train, x_test, y_train, y_test, flag
|
|
238
|
+
mem_tracks.append(get_traced_memory(queue))
|
|
239
|
+
return mem_tracks
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _kfold_function_template(
|
|
243
|
+
estimator, dataframe, data_shape, queue=None, func=None, dtype=None
|
|
244
|
+
):
|
|
245
|
+
tracemalloc.start()
|
|
246
|
+
|
|
247
|
+
n_samples, n_features = data_shape
|
|
248
|
+
X, y, data_memory_size = gen_clsf_data(n_samples, n_features, dtype=dtype)
|
|
249
|
+
kf = KFold(n_splits=N_SPLITS)
|
|
250
|
+
if func:
|
|
251
|
+
X = func(X)
|
|
252
|
+
|
|
253
|
+
X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
|
|
254
|
+
y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
|
|
255
|
+
|
|
256
|
+
mem_before = get_traced_memory(queue)
|
|
257
|
+
mem_tracks = split_train_inference(kf, X, y, estimator, queue=queue)
|
|
258
|
+
mem_iter_diffs = np.array(mem_tracks[1:]) - np.array(mem_tracks[:-1])
|
|
259
|
+
mem_incr_mean, mem_incr_std = mem_iter_diffs.mean(), mem_iter_diffs.std()
|
|
260
|
+
mem_incr_mean, mem_incr_std = round(mem_incr_mean), round(mem_incr_std)
|
|
261
|
+
with warnings.catch_warnings():
|
|
262
|
+
# In the case that the memory usage is constant, this will raise
|
|
263
|
+
# a ConstantInputWarning error in pearsonr from scipy, this can
|
|
264
|
+
# be ignored.
|
|
265
|
+
warnings.filterwarnings(
|
|
266
|
+
"ignore",
|
|
267
|
+
message="An input array is constant; the correlation coefficient is not defined",
|
|
268
|
+
)
|
|
269
|
+
mem_iter_corr, _ = pearsonr(mem_tracks, list(range(len(mem_tracks))))
|
|
270
|
+
|
|
271
|
+
if mem_iter_corr > 0.95:
|
|
272
|
+
logging.warning(
|
|
273
|
+
"Memory usage is steadily increasing with iterations "
|
|
274
|
+
"(Pearson correlation coefficient between "
|
|
275
|
+
f"memory tracks and iterations is {mem_iter_corr})\n"
|
|
276
|
+
"Memory usage increase per iteration: "
|
|
277
|
+
f"{mem_incr_mean}±{mem_incr_std} bytes"
|
|
278
|
+
)
|
|
279
|
+
mem_before_gc = get_traced_memory(queue)
|
|
280
|
+
mem_diff = mem_before_gc - mem_before
|
|
281
|
+
if isinstance(estimator, BaseEstimator):
|
|
282
|
+
name = str(estimator)
|
|
283
|
+
else:
|
|
284
|
+
name = estimator.__name__
|
|
285
|
+
|
|
286
|
+
threshold = (
|
|
287
|
+
EXTRA_MEMORY_THRESHOLD_PANDAS if dataframe == "pandas" else EXTRA_MEMORY_THRESHOLD
|
|
288
|
+
)
|
|
289
|
+
message = (
|
|
290
|
+
"Size of extra allocated memory {} using garbage collector "
|
|
291
|
+
f"is greater than {threshold * 100}% of input data"
|
|
292
|
+
f"\n\tAlgorithm: {name}"
|
|
293
|
+
f"\n\tInput data size: {data_memory_size} bytes"
|
|
294
|
+
"\n\tExtra allocated memory size: {} bytes"
|
|
295
|
+
" / {} %"
|
|
296
|
+
)
|
|
297
|
+
if mem_diff >= threshold * data_memory_size:
|
|
298
|
+
logging.warning(
|
|
299
|
+
message.format(
|
|
300
|
+
"before", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
|
|
301
|
+
)
|
|
302
|
+
)
|
|
303
|
+
gc.collect()
|
|
304
|
+
mem_after = get_traced_memory(queue)
|
|
305
|
+
tracemalloc.stop()
|
|
306
|
+
mem_diff = mem_after - mem_before
|
|
307
|
+
|
|
308
|
+
# GPU offloading with SYCL contains a program/kernel cache which should
|
|
309
|
+
# be controllable via a KernelProgramCache object in the SYCL context.
|
|
310
|
+
# The programs and kernels are stored on the GPU, but cannot be cleared
|
|
311
|
+
# as this class is not available for access in all oneDAL DPC++ runtimes.
|
|
312
|
+
# Therefore, until this is implemented this test must be skipped for gpu
|
|
313
|
+
# as it looks like a memory leak (at least there is no way to discern a
|
|
314
|
+
# leak on the first run).
|
|
315
|
+
if queue is None or queue.sycl_device.is_cpu:
|
|
316
|
+
assert mem_diff < threshold * data_memory_size, message.format(
|
|
317
|
+
"after", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
@pytest.mark.parametrize("order", ["F", "C"])
|
|
322
|
+
@pytest.mark.parametrize(
|
|
323
|
+
"dataframe,queue", get_dataframes_and_queues("numpy,pandas,dpctl", "cpu")
|
|
324
|
+
)
|
|
325
|
+
@pytest.mark.parametrize("estimator", CPU_ESTIMATORS.keys())
|
|
326
|
+
@pytest.mark.parametrize("data_shape", data_shapes)
|
|
327
|
+
def test_memory_leaks(estimator, dataframe, queue, order, data_shape):
|
|
328
|
+
func = ORDER_DICT[order]
|
|
329
|
+
if estimator == "_assert_all_finite" and queue is not None:
|
|
330
|
+
pytest.skip(f"{estimator} is not designed for device offloading")
|
|
331
|
+
|
|
332
|
+
_kfold_function_template(
|
|
333
|
+
CPU_ESTIMATORS[estimator], dataframe, data_shape, queue, func
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
@pytest.mark.skipif(
|
|
338
|
+
os.getenv("ZES_ENABLE_SYSMAN") is None or not is_dpctl_device_available("gpu"),
|
|
339
|
+
reason="SYCL device memory leak check requires the level zero sysman",
|
|
340
|
+
)
|
|
341
|
+
@pytest.mark.parametrize("queue", get_queues("gpu"))
|
|
342
|
+
@pytest.mark.parametrize("estimator", GPU_ESTIMATORS.keys())
|
|
343
|
+
@pytest.mark.parametrize("order", ["F", "C"])
|
|
344
|
+
@pytest.mark.parametrize("data_shape", data_shapes)
|
|
345
|
+
def test_gpu_memory_leaks(estimator, queue, order, data_shape):
|
|
346
|
+
func = ORDER_DICT[order]
|
|
347
|
+
if "ExtraTrees" in estimator and data_shape == (2000, 50):
|
|
348
|
+
pytest.skip("Avoid a segmentation fault in Extra Trees algorithms")
|
|
349
|
+
|
|
350
|
+
with config_context(target_offload=queue):
|
|
351
|
+
_kfold_function_template(GPU_ESTIMATORS[estimator], None, data_shape, queue, func)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
@pytest.mark.skipif(
|
|
355
|
+
not _is_dpc_backend,
|
|
356
|
+
reason="__sycl_usm_array_interface__ support requires DPC backend.",
|
|
357
|
+
)
|
|
358
|
+
@pytest.mark.parametrize(
|
|
359
|
+
"dataframe,queue", get_dataframes_and_queues("dpctl,dpnp", "cpu,gpu")
|
|
360
|
+
)
|
|
361
|
+
@pytest.mark.parametrize("order", ["F", "C"])
|
|
362
|
+
@pytest.mark.parametrize("data_shape", data_shapes)
|
|
363
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
364
|
+
def test_table_conversions_memory_leaks(dataframe, queue, order, data_shape, dtype):
|
|
365
|
+
func = ORDER_DICT[order]
|
|
366
|
+
|
|
367
|
+
if queue.sycl_device.is_gpu and (
|
|
368
|
+
os.getenv("ZES_ENABLE_SYSMAN") is None or not is_dpctl_device_available("gpu")
|
|
369
|
+
):
|
|
370
|
+
pytest.skip("SYCL device memory leak check requires the level zero sysman")
|
|
371
|
+
|
|
372
|
+
_kfold_function_template(
|
|
373
|
+
DummyEstimatorWithTableConversions,
|
|
374
|
+
dataframe,
|
|
375
|
+
data_shape,
|
|
376
|
+
queue,
|
|
377
|
+
func,
|
|
378
|
+
dtype,
|
|
379
|
+
)
|