scikit-learn-intelex 2024.0.1__py312-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (90) hide show
  1. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/__init__.py +61 -0
  2. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/__main__.py +59 -0
  3. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/_config.py +110 -0
  4. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/_device_offload.py +223 -0
  5. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/_utils.py +95 -0
  6. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/basic_statistics/__init__.py +20 -0
  7. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/basic_statistics/basic_statistics.py +17 -0
  8. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/cluster/__init__.py +21 -0
  9. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/cluster/dbscan.py +187 -0
  10. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/cluster/k_means.py +18 -0
  11. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/cluster/tests/test_dbscan.py +37 -0
  12. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/cluster/tests/test_kmeans.py +31 -0
  13. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/decomposition/__init__.py +20 -0
  14. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/decomposition/pca.py +18 -0
  15. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/decomposition/tests/test_pca.py +28 -0
  16. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/dispatcher.py +329 -0
  17. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/doc/third-party-programs.txt +424 -0
  18. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/ensemble/__init__.py +30 -0
  19. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/ensemble/_forest.py +1947 -0
  20. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/ensemble/tests/test_forest.py +118 -0
  21. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/glob/__main__.py +73 -0
  22. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/glob/dispatcher.py +88 -0
  23. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/linear_model/__init__.py +30 -0
  24. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/linear_model/coordinate_descent.py +18 -0
  25. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/linear_model/linear.py +373 -0
  26. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/linear_model/logistic_path.py +18 -0
  27. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/linear_model/ridge.py +18 -0
  28. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/linear_model/tests/test_linear.py +77 -0
  29. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/linear_model/tests/test_logreg.py +29 -0
  30. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/manifold/__init__.py +20 -0
  31. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/manifold/t_sne.py +18 -0
  32. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/manifold/tests/test_tsne.py +27 -0
  33. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/metrics/__init__.py +24 -0
  34. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/metrics/pairwise.py +18 -0
  35. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/metrics/ranking.py +18 -0
  36. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/metrics/tests/test_metrics.py +40 -0
  37. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/model_selection/__init__.py +22 -0
  38. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/model_selection/split.py +18 -0
  39. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/model_selection/tests/test_model_selection.py +35 -0
  40. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/__init__.py +28 -0
  41. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/common.py +264 -0
  42. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/knn_classification.py +331 -0
  43. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/knn_regression.py +307 -0
  44. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/knn_unsupervised.py +220 -0
  45. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/lof.py +437 -0
  46. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/tests/test_neighbors.py +85 -0
  47. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/__init__.py +18 -0
  48. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/cluster/__init__.py +20 -0
  49. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/cluster/_common.py +84 -0
  50. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/cluster/k_means.py +370 -0
  51. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/decomposition/__init__.py +20 -0
  52. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/decomposition/pca.py +376 -0
  53. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/decomposition/tests/test_preview_pca.py +38 -0
  54. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/__init__.py +24 -0
  55. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/basic_statistics/__init__.py +19 -0
  56. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  57. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/cluster/__init__.py +30 -0
  58. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/cluster/dbscan.py +50 -0
  59. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/cluster/kmeans.py +21 -0
  60. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/decomposition/__init__.py +19 -0
  61. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/decomposition/pca.py +21 -0
  62. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/ensemble/__init__.py +19 -0
  63. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/ensemble/forest.py +79 -0
  64. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/linear_model/__init__.py +19 -0
  65. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/linear_model/linear_model.py +21 -0
  66. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/neighbors/__init__.py +19 -0
  67. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/neighbors/neighbors.py +25 -0
  68. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/svm/__init__.py +30 -0
  69. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/svm/_common.py +188 -0
  70. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/svm/nusvc.py +272 -0
  71. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/svm/nusvr.py +163 -0
  72. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/svm/svc.py +301 -0
  73. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/svm/svr.py +164 -0
  74. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/svm/tests/test_svm.py +102 -0
  75. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/_models_info.py +170 -0
  76. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/test_config.py +39 -0
  77. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/test_memory_usage.py +225 -0
  78. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/test_monkeypatch.py +210 -0
  79. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/test_parallel.py +50 -0
  80. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/test_patching.py +122 -0
  81. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/test_run_to_run_stability_tests.py +428 -0
  82. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/utils/_launch_algorithms.py +118 -0
  83. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/utils/__init__.py +19 -0
  84. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/utils/parallel.py +59 -0
  85. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/utils/validation.py +18 -0
  86. scikit_learn_intelex-2024.0.1.dist-info/LICENSE.txt +202 -0
  87. scikit_learn_intelex-2024.0.1.dist-info/METADATA +230 -0
  88. scikit_learn_intelex-2024.0.1.dist-info/RECORD +90 -0
  89. scikit_learn_intelex-2024.0.1.dist-info/WHEEL +5 -0
  90. scikit_learn_intelex-2024.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env python
2
+ # ===============================================================================
3
+ # Copyright 2021 Intel Corporation
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ # ===============================================================================
17
+
18
+ import numpy as np
19
+ import pytest
20
+ from numpy.testing import assert_allclose
21
+
22
+ from onedal.tests.utils._dataframes_support import (
23
+ _as_numpy,
24
+ _convert_to_dataframe,
25
+ get_dataframes_and_queues,
26
+ )
27
+
28
+
29
+ # TODO:
30
+ # investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU`
31
+ @pytest.mark.parametrize(
32
+ "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
33
+ )
34
+ def test_sklearnex_import_svc(dataframe, queue):
35
+ from sklearnex.svm import SVC
36
+
37
+ X = np.array([[-2, -1], [-1, -1], [-1, -2], [+1, +1], [+1, +2], [+2, +1]])
38
+ y = np.array([1, 1, 1, 2, 2, 2])
39
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
40
+ y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
41
+ svc = SVC(kernel="linear").fit(X, y)
42
+ assert "daal4py" in svc.__module__ or "sklearnex" in svc.__module__
43
+ assert_allclose(_as_numpy(svc.dual_coef_), [[-0.25, 0.25]])
44
+ assert_allclose(_as_numpy(svc.support_), [1, 3])
45
+
46
+
47
+ # TODO:
48
+ # investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU`
49
+ @pytest.mark.parametrize(
50
+ "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
51
+ )
52
+ def test_sklearnex_import_nusvc(dataframe, queue):
53
+ from sklearnex.svm import NuSVC
54
+
55
+ X = np.array([[-2, -1], [-1, -1], [-1, -2], [+1, +1], [+1, +2], [+2, +1]])
56
+ y = np.array([1, 1, 1, 2, 2, 2])
57
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
58
+ y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
59
+ svc = NuSVC(kernel="linear").fit(X, y)
60
+ assert "daal4py" in svc.__module__ or "sklearnex" in svc.__module__
61
+ assert_allclose(
62
+ _as_numpy(svc.dual_coef_), [[-0.04761905, -0.0952381, 0.0952381, 0.04761905]]
63
+ )
64
+ assert_allclose(_as_numpy(svc.support_), [0, 1, 3, 4])
65
+
66
+
67
+ # TODO:
68
+ # investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU`
69
+ @pytest.mark.parametrize(
70
+ "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
71
+ )
72
+ def test_sklearnex_import_svr(dataframe, queue):
73
+ from sklearnex.svm import SVR
74
+
75
+ X = np.array([[-2, -1], [-1, -1], [-1, -2], [+1, +1], [+1, +2], [+2, +1]])
76
+ y = np.array([1, 1, 1, 2, 2, 2])
77
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
78
+ y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
79
+ svc = SVR(kernel="linear").fit(X, y)
80
+ assert "daal4py" in svc.__module__ or "sklearnex" in svc.__module__
81
+ assert_allclose(_as_numpy(svc.dual_coef_), [[-0.1, 0.1]])
82
+ assert_allclose(_as_numpy(svc.support_), [1, 3])
83
+
84
+
85
+ # TODO:
86
+ # investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU`
87
+ @pytest.mark.parametrize(
88
+ "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
89
+ )
90
+ def test_sklearnex_import_nusvr(dataframe, queue):
91
+ from sklearnex.svm import NuSVR
92
+
93
+ X = np.array([[-2, -1], [-1, -1], [-1, -2], [+1, +1], [+1, +2], [+2, +1]])
94
+ y = np.array([1, 1, 1, 2, 2, 2])
95
+ X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
96
+ y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
97
+ svc = NuSVR(kernel="linear", nu=0.9).fit(X, y)
98
+ assert "daal4py" in svc.__module__ or "sklearnex" in svc.__module__
99
+ assert_allclose(
100
+ _as_numpy(svc.dual_coef_), [[-1.0, 0.611111, 1.0, -0.611111]], rtol=1e-3
101
+ )
102
+ assert_allclose(_as_numpy(svc.support_), [1, 2, 3, 5])
@@ -0,0 +1,170 @@
1
+ # ==============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import numpy as np
18
+ from sklearn.cluster import DBSCAN, KMeans
19
+ from sklearn.decomposition import PCA
20
+ from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
21
+ from sklearn.linear_model import (
22
+ ElasticNet,
23
+ Lasso,
24
+ LinearRegression,
25
+ LogisticRegression,
26
+ LogisticRegressionCV,
27
+ Ridge,
28
+ )
29
+ from sklearn.manifold import TSNE
30
+ from sklearn.neighbors import (
31
+ KNeighborsClassifier,
32
+ KNeighborsRegressor,
33
+ LocalOutlierFactor,
34
+ NearestNeighbors,
35
+ )
36
+ from sklearn.svm import SVC
37
+
38
+ MODELS_INFO = [
39
+ {
40
+ "model": KNeighborsClassifier(algorithm="brute"),
41
+ "methods": ["kneighbors", "predict", "predict_proba", "score"],
42
+ "dataset": "classifier",
43
+ },
44
+ {
45
+ "model": KNeighborsRegressor(algorithm="brute"),
46
+ "methods": ["kneighbors", "predict", "score"],
47
+ "dataset": "regression",
48
+ },
49
+ {
50
+ "model": NearestNeighbors(algorithm="brute"),
51
+ "methods": ["kneighbors"],
52
+ "dataset": "blobs",
53
+ },
54
+ {
55
+ "model": LocalOutlierFactor(novelty=False),
56
+ "methods": ["fit_predict"],
57
+ "dataset": "blobs",
58
+ },
59
+ {
60
+ "model": LocalOutlierFactor(novelty=True),
61
+ "methods": ["predict"],
62
+ "dataset": "blobs",
63
+ },
64
+ {
65
+ "model": DBSCAN(),
66
+ "methods": ["fit_predict"],
67
+ "dataset": "blobs",
68
+ },
69
+ {
70
+ "model": SVC(probability=True),
71
+ "methods": ["decision_function", "predict", "predict_proba", "score"],
72
+ "dataset": "classifier",
73
+ },
74
+ {
75
+ "model": KMeans(),
76
+ "methods": ["fit_predict", "fit_transform", "transform", "predict", "score"],
77
+ "dataset": "blobs",
78
+ },
79
+ {
80
+ "model": ElasticNet(),
81
+ "methods": ["predict", "score"],
82
+ "dataset": "regression",
83
+ },
84
+ {
85
+ "model": Lasso(),
86
+ "methods": ["predict", "score"],
87
+ "dataset": "regression",
88
+ },
89
+ {
90
+ "model": PCA(),
91
+ "methods": ["fit_transform", "transform", "score"],
92
+ "dataset": "classifier",
93
+ },
94
+ {
95
+ "model": LogisticRegression(max_iter=100, multi_class="multinomial"),
96
+ "methods": [
97
+ "decision_function",
98
+ "predict",
99
+ "predict_proba",
100
+ "predict_log_proba",
101
+ "score",
102
+ ],
103
+ "dataset": "classifier",
104
+ },
105
+ {
106
+ "model": LogisticRegressionCV(max_iter=100),
107
+ "methods": [
108
+ "decision_function",
109
+ "predict",
110
+ "predict_proba",
111
+ "predict_log_proba",
112
+ "score",
113
+ ],
114
+ "dataset": "classifier",
115
+ },
116
+ {
117
+ "model": RandomForestClassifier(n_estimators=10),
118
+ "methods": ["predict", "predict_proba", "predict_log_proba", "score"],
119
+ "dataset": "classifier",
120
+ },
121
+ {
122
+ "model": RandomForestRegressor(n_estimators=10),
123
+ "methods": ["predict", "score"],
124
+ "dataset": "regression",
125
+ },
126
+ {
127
+ "model": LinearRegression(),
128
+ "methods": ["predict", "score"],
129
+ "dataset": "regression",
130
+ },
131
+ {
132
+ "model": Ridge(),
133
+ "methods": ["predict", "score"],
134
+ "dataset": "regression",
135
+ },
136
+ ]
137
+
138
+ TYPES = [
139
+ np.int8,
140
+ np.int16,
141
+ np.int32,
142
+ np.int64,
143
+ np.float16,
144
+ np.float32,
145
+ np.float64,
146
+ np.uint8,
147
+ np.uint16,
148
+ np.uint32,
149
+ np.uint64,
150
+ ]
151
+
152
+ TO_SKIP = [
153
+ # --------------- NO INFO ---------------
154
+ r"KMeans .*transform",
155
+ r"KMeans .*score",
156
+ r"PCA .*score",
157
+ r"LogisticRegression .*decision_function",
158
+ r"LogisticRegressionCV .*decision_function",
159
+ r"LogisticRegressionCV .*predict",
160
+ r"LogisticRegressionCV .*predict_proba",
161
+ r"LogisticRegressionCV .*predict_log_proba",
162
+ r"LogisticRegressionCV .*score",
163
+ # --------------- Scikit ---------------
164
+ r"Ridge float16 predict",
165
+ r"Ridge float16 score",
166
+ r"RandomForestClassifier .*predict_proba",
167
+ r"RandomForestClassifier .*predict_log_proba",
168
+ r"pairwise_distances .*pairwise_distances", # except float64
169
+ r"roc_auc_score .*roc_auc_score",
170
+ ]
@@ -0,0 +1,39 @@
1
+ # ==============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import sklearn
18
+
19
+ import sklearnex
20
+
21
+
22
+ def test_get_config_contains_sklearn_params():
23
+ skex_config = sklearnex.get_config()
24
+ sk_config = sklearn.get_config()
25
+
26
+ assert all(value in skex_config.keys() for value in sk_config.keys())
27
+
28
+
29
+ def test_set_config_works():
30
+ default_config = sklearnex.get_config()
31
+ sklearnex.set_config(
32
+ assume_finite=True, target_offload="cpu:0", allow_fallback_to_host=True
33
+ )
34
+
35
+ config = sklearnex.get_config()
36
+ assert config["target_offload"] == "cpu:0"
37
+ assert config["allow_fallback_to_host"]
38
+ assert config["assume_finite"]
39
+ sklearnex.set_config(**default_config)
@@ -0,0 +1,225 @@
1
+ # ==============================================================================
2
+ # Copyright 2021 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import gc
18
+ import logging
19
+ import tracemalloc
20
+ import types
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+ import pytest
25
+ from scipy.stats import pearsonr
26
+ from sklearn.base import BaseEstimator
27
+ from sklearn.datasets import make_classification
28
+ from sklearn.model_selection import KFold
29
+
30
+ from sklearnex import get_patch_map
31
+ from sklearnex.metrics import pairwise_distances, roc_auc_score
32
+ from sklearnex.model_selection import train_test_split
33
+ from sklearnex.preview.decomposition import PCA as PreviewPCA
34
+ from sklearnex.utils import _assert_all_finite
35
+
36
+
37
+ class TrainTestSplitEstimator:
38
+ def __init__(self):
39
+ pass
40
+
41
+ def fit(self, x, y):
42
+ train_test_split(x, y)
43
+
44
+
45
+ class FiniteCheckEstimator:
46
+ def __init__(self):
47
+ pass
48
+
49
+ def fit(self, x, y):
50
+ _assert_all_finite(x)
51
+ _assert_all_finite(y)
52
+
53
+
54
+ class PairwiseDistancesEstimator:
55
+ def fit(self, x, y):
56
+ pairwise_distances(x, metric=self.metric)
57
+
58
+
59
+ class CosineDistancesEstimator(PairwiseDistancesEstimator):
60
+ def __init__(self):
61
+ self.metric = "cosine"
62
+
63
+
64
+ class CorrelationDistancesEstimator(PairwiseDistancesEstimator):
65
+ def __init__(self):
66
+ self.metric = "correlation"
67
+
68
+
69
+ class RocAucEstimator:
70
+ def __init__(self):
71
+ pass
72
+
73
+ def fit(self, x, y):
74
+ print(roc_auc_score(y, np.zeros(shape=y.shape, dtype=np.int32)))
75
+
76
+
77
+ # add all daal4py estimators enabled in patching (except banned)
78
+ def get_patched_estimators(ban_list, output_list):
79
+ patched_estimators = get_patch_map().values()
80
+ for listing in patched_estimators:
81
+ estimator, name = listing[0][0][2], listing[0][0][1]
82
+ if not isinstance(estimator, types.FunctionType):
83
+ if name not in ban_list:
84
+ if issubclass(estimator, BaseEstimator):
85
+ if hasattr(estimator, "fit"):
86
+ output_list.append(estimator)
87
+
88
+
89
+ def remove_duplicated_estimators(estimators_list):
90
+ estimators_map = {}
91
+ for estimator in estimators_list:
92
+ full_name = f"{estimator.__module__}.{estimator.__name__}"
93
+ estimators_map[full_name] = estimator
94
+ return estimators_map.values()
95
+
96
+
97
+ BANNED_ESTIMATORS = (
98
+ "LocalOutlierFactor", # fails on ndarray_c for sklearn > 1.0
99
+ "TSNE", # too slow for using in testing on common data size
100
+ )
101
+ estimators = [
102
+ PreviewPCA,
103
+ TrainTestSplitEstimator,
104
+ FiniteCheckEstimator,
105
+ CosineDistancesEstimator,
106
+ CorrelationDistancesEstimator,
107
+ RocAucEstimator,
108
+ ]
109
+ get_patched_estimators(BANNED_ESTIMATORS, estimators)
110
+ estimators = remove_duplicated_estimators(estimators)
111
+
112
+
113
+ def ndarray_c(x, y):
114
+ return np.ascontiguousarray(x), y
115
+
116
+
117
+ def ndarray_f(x, y):
118
+ return np.asfortranarray(x), y
119
+
120
+
121
+ def dataframe_c(x, y):
122
+ return pd.DataFrame(np.ascontiguousarray(x)), pd.Series(y)
123
+
124
+
125
+ def dataframe_f(x, y):
126
+ return pd.DataFrame(np.asfortranarray(x)), pd.Series(y)
127
+
128
+
129
+ data_transforms = [ndarray_c, ndarray_f, dataframe_c, dataframe_f]
130
+
131
+ data_shapes = [(1000, 100), (2000, 50)]
132
+
133
+ EXTRA_MEMORY_THRESHOLD = 0.15
134
+ N_SPLITS = 10
135
+
136
+
137
+ def gen_clsf_data(n_samples, n_features):
138
+ data, label = make_classification(
139
+ n_classes=2, n_samples=n_samples, n_features=n_features, random_state=777
140
+ )
141
+ return (
142
+ data,
143
+ label,
144
+ data.size * data.dtype.itemsize + label.size * label.dtype.itemsize,
145
+ )
146
+
147
+
148
+ def split_train_inference(kf, x, y, estimator):
149
+ mem_tracks = []
150
+ for train_index, test_index in kf.split(x):
151
+ if isinstance(x, np.ndarray):
152
+ x_train, x_test = x[train_index], x[test_index]
153
+ y_train, y_test = y[train_index], y[test_index]
154
+ elif isinstance(x, pd.core.frame.DataFrame):
155
+ x_train, x_test = x.iloc[train_index], x.iloc[test_index]
156
+ y_train, y_test = y.iloc[train_index], y.iloc[test_index]
157
+ # TODO: add parameters for all estimators to prevent
158
+ # fallback to stock scikit-learn with default parameters
159
+ alg = estimator()
160
+ alg.fit(x_train, y_train)
161
+ if hasattr(alg, "predict"):
162
+ alg.predict(x_test)
163
+ elif hasattr(alg, "transform"):
164
+ alg.transform(x_test)
165
+ elif hasattr(alg, "kneighbors"):
166
+ alg.kneighbors(x_test)
167
+ del alg, x_train, x_test, y_train, y_test
168
+ mem_tracks.append(tracemalloc.get_traced_memory()[0])
169
+
170
+ return mem_tracks
171
+
172
+
173
+ def _kfold_function_template(estimator, data_transform_function, data_shape):
174
+ tracemalloc.start()
175
+
176
+ n_samples, n_features = data_shape
177
+ x, y, data_memory_size = gen_clsf_data(n_samples, n_features)
178
+ kf = KFold(n_splits=N_SPLITS)
179
+ x, y = data_transform_function(x, y)
180
+
181
+ mem_before, _ = tracemalloc.get_traced_memory()
182
+ mem_tracks = split_train_inference(kf, x, y, estimator)
183
+ mem_iter_diffs = np.array(mem_tracks[1:]) - np.array(mem_tracks[:-1])
184
+ mem_incr_mean, mem_incr_std = mem_iter_diffs.mean(), mem_iter_diffs.std()
185
+ mem_incr_mean, mem_incr_std = round(mem_incr_mean), round(mem_incr_std)
186
+ mem_iter_corr, _ = pearsonr(mem_tracks, list(range(len(mem_tracks))))
187
+ if mem_iter_corr > 0.95:
188
+ logging.warning(
189
+ "Memory usage is steadily increasing with iterations "
190
+ "(Pearson correlation coefficient between "
191
+ f"memory tracks and iterations is {mem_iter_corr})\n"
192
+ "Memory usage increase per iteration: "
193
+ f"{mem_incr_mean}±{mem_incr_std} bytes"
194
+ )
195
+ mem_before_gc, _ = tracemalloc.get_traced_memory()
196
+ mem_diff = mem_before_gc - mem_before
197
+ message = (
198
+ "Size of extra allocated memory {} using garbage collector "
199
+ f"is greater than {EXTRA_MEMORY_THRESHOLD * 100}% of input data"
200
+ f"\n\tAlgorithm: {estimator.__name__}"
201
+ f"\n\tInput data size: {data_memory_size} bytes"
202
+ "\n\tExtra allocated memory size: {} bytes"
203
+ " / {} %"
204
+ )
205
+ if mem_diff >= EXTRA_MEMORY_THRESHOLD * data_memory_size:
206
+ logging.warning(
207
+ message.format(
208
+ "before", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
209
+ )
210
+ )
211
+ gc.collect()
212
+ mem_after, _ = tracemalloc.get_traced_memory()
213
+ tracemalloc.stop()
214
+ mem_diff = mem_after - mem_before
215
+
216
+ assert mem_diff < EXTRA_MEMORY_THRESHOLD * data_memory_size, message.format(
217
+ "after", mem_diff, round((mem_diff) / data_memory_size * 100, 2)
218
+ )
219
+
220
+
221
+ @pytest.mark.parametrize("data_transform_function", data_transforms)
222
+ @pytest.mark.parametrize("estimator", estimators)
223
+ @pytest.mark.parametrize("data_shape", data_shapes)
224
+ def test_memory_leaks(estimator, data_transform_function, data_shape):
225
+ _kfold_function_template(estimator, data_transform_function, data_shape)