scikit-learn-intelex 2024.4.0__py39-none-manylinux1_x86_64.whl → 2024.6.0__py39-none-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/METADATA +2 -2
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/RECORD +43 -36
- sklearnex/_device_offload.py +8 -1
- sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +2 -4
- sklearnex/cluster/dbscan.py +3 -0
- sklearnex/cluster/tests/test_dbscan.py +8 -6
- sklearnex/conftest.py +11 -1
- sklearnex/covariance/incremental_covariance.py +217 -30
- sklearnex/covariance/tests/test_incremental_covariance.py +54 -17
- sklearnex/decomposition/pca.py +68 -13
- sklearnex/decomposition/tests/test_pca.py +6 -4
- sklearnex/dispatcher.py +46 -1
- sklearnex/ensemble/_forest.py +114 -22
- sklearnex/ensemble/tests/test_forest.py +13 -3
- sklearnex/glob/dispatcher.py +16 -2
- sklearnex/linear_model/__init__.py +5 -3
- sklearnex/linear_model/incremental_linear.py +464 -0
- sklearnex/linear_model/linear.py +27 -9
- sklearnex/linear_model/logistic_regression.py +13 -15
- sklearnex/linear_model/tests/test_incremental_linear.py +200 -0
- sklearnex/linear_model/tests/test_linear.py +2 -2
- sklearnex/neighbors/knn_regression.py +24 -0
- sklearnex/neighbors/tests/test_neighbors.py +2 -2
- sklearnex/preview/__init__.py +1 -1
- sklearnex/preview/decomposition/__init__.py +19 -0
- sklearnex/preview/decomposition/incremental_pca.py +228 -0
- sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
- sklearnex/svm/_common.py +165 -20
- sklearnex/svm/nusvc.py +40 -4
- sklearnex/svm/nusvr.py +31 -2
- sklearnex/svm/svc.py +40 -4
- sklearnex/svm/svr.py +31 -2
- sklearnex/tests/_utils.py +70 -29
- sklearnex/tests/test_common.py +54 -0
- sklearnex/tests/test_memory_usage.py +195 -132
- sklearnex/tests/test_n_jobs_support.py +4 -0
- sklearnex/tests/test_patching.py +22 -10
- sklearnex/tests/test_run_to_run_stability.py +283 -0
- sklearnex/utils/_namespace.py +1 -1
- sklearnex/utils/tests/test_finite.py +89 -0
- sklearnex/tests/test_run_to_run_stability_tests.py +0 -428
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/WHEEL +0 -0
- {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/top_level.txt +0 -0
sklearnex/ensemble/_forest.py
CHANGED
|
@@ -29,7 +29,7 @@ from sklearn.ensemble._forest import ForestClassifier as sklearn_ForestClassifie
|
|
|
29
29
|
from sklearn.ensemble._forest import ForestRegressor as sklearn_ForestRegressor
|
|
30
30
|
from sklearn.ensemble._forest import _get_n_samples_bootstrap
|
|
31
31
|
from sklearn.exceptions import DataConversionWarning
|
|
32
|
-
from sklearn.metrics import accuracy_score
|
|
32
|
+
from sklearn.metrics import accuracy_score, r2_score
|
|
33
33
|
from sklearn.tree import (
|
|
34
34
|
DecisionTreeClassifier,
|
|
35
35
|
DecisionTreeRegressor,
|
|
@@ -38,7 +38,7 @@ from sklearn.tree import (
|
|
|
38
38
|
)
|
|
39
39
|
from sklearn.tree._tree import Tree
|
|
40
40
|
from sklearn.utils import check_random_state, deprecated
|
|
41
|
-
from sklearn.utils.validation import check_array, check_is_fitted
|
|
41
|
+
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
|
|
42
42
|
|
|
43
43
|
from daal4py.sklearn._n_jobs_support import control_n_jobs
|
|
44
44
|
from daal4py.sklearn._utils import (
|
|
@@ -74,6 +74,7 @@ class BaseForest(ABC):
|
|
|
74
74
|
accept_sparse=False,
|
|
75
75
|
dtype=[np.float64, np.float32],
|
|
76
76
|
force_all_finite=False,
|
|
77
|
+
ensure_2d=True,
|
|
77
78
|
)
|
|
78
79
|
|
|
79
80
|
if sample_weight is not None:
|
|
@@ -97,8 +98,6 @@ class BaseForest(ABC):
|
|
|
97
98
|
|
|
98
99
|
y, expanded_class_weight = self._validate_y_class_weight(y)
|
|
99
100
|
|
|
100
|
-
self.n_features_in_ = X.shape[1]
|
|
101
|
-
|
|
102
101
|
if expanded_class_weight is not None:
|
|
103
102
|
if sample_weight is not None:
|
|
104
103
|
sample_weight = sample_weight * expanded_class_weight
|
|
@@ -114,7 +113,9 @@ class BaseForest(ABC):
|
|
|
114
113
|
"min_samples_split": self.min_samples_split,
|
|
115
114
|
"min_samples_leaf": self.min_samples_leaf,
|
|
116
115
|
"min_weight_fraction_leaf": self.min_weight_fraction_leaf,
|
|
117
|
-
"max_features": self.
|
|
116
|
+
"max_features": self._to_absolute_max_features(
|
|
117
|
+
self.max_features, self.n_features_in_
|
|
118
|
+
),
|
|
118
119
|
"max_leaf_nodes": self.max_leaf_nodes,
|
|
119
120
|
"min_impurity_decrease": self.min_impurity_decrease,
|
|
120
121
|
"bootstrap": self.bootstrap,
|
|
@@ -174,6 +175,45 @@ class BaseForest(ABC):
|
|
|
174
175
|
self._validate_estimator()
|
|
175
176
|
return self
|
|
176
177
|
|
|
178
|
+
def _to_absolute_max_features(self, max_features, n_features):
|
|
179
|
+
if max_features is None:
|
|
180
|
+
return n_features
|
|
181
|
+
if isinstance(max_features, str):
|
|
182
|
+
if max_features == "auto":
|
|
183
|
+
if not sklearn_check_version("1.3"):
|
|
184
|
+
if sklearn_check_version("1.1"):
|
|
185
|
+
warnings.warn(
|
|
186
|
+
"`max_features='auto'` has been deprecated in 1.1 "
|
|
187
|
+
"and will be removed in 1.3. To keep the past behaviour, "
|
|
188
|
+
"explicitly set `max_features=1.0` or remove this "
|
|
189
|
+
"parameter as it is also the default value for "
|
|
190
|
+
"RandomForestRegressors and ExtraTreesRegressors.",
|
|
191
|
+
FutureWarning,
|
|
192
|
+
)
|
|
193
|
+
return (
|
|
194
|
+
max(1, int(np.sqrt(n_features)))
|
|
195
|
+
if isinstance(self, ForestClassifier)
|
|
196
|
+
else n_features
|
|
197
|
+
)
|
|
198
|
+
if max_features == "sqrt":
|
|
199
|
+
return max(1, int(np.sqrt(n_features)))
|
|
200
|
+
if max_features == "log2":
|
|
201
|
+
return max(1, int(np.log2(n_features)))
|
|
202
|
+
allowed_string_values = (
|
|
203
|
+
'"sqrt" or "log2"'
|
|
204
|
+
if sklearn_check_version("1.3")
|
|
205
|
+
else '"auto", "sqrt" or "log2"'
|
|
206
|
+
)
|
|
207
|
+
raise ValueError(
|
|
208
|
+
"Invalid value for max_features. Allowed string "
|
|
209
|
+
f"values are {allowed_string_values}."
|
|
210
|
+
)
|
|
211
|
+
if isinstance(max_features, (numbers.Integral, np.integer)):
|
|
212
|
+
return max_features
|
|
213
|
+
if max_features > 0.0:
|
|
214
|
+
return max(1, int(max_features * n_features))
|
|
215
|
+
return 0
|
|
216
|
+
|
|
177
217
|
def _check_parameters(self):
|
|
178
218
|
if isinstance(self.min_samples_leaf, numbers.Integral):
|
|
179
219
|
if not 1 <= self.min_samples_leaf:
|
|
@@ -518,7 +558,7 @@ class ForestClassifier(sklearn_ForestClassifier, BaseForest):
|
|
|
518
558
|
)
|
|
519
559
|
|
|
520
560
|
if patching_status.get_status():
|
|
521
|
-
X, y =
|
|
561
|
+
X, y = check_X_y(
|
|
522
562
|
X,
|
|
523
563
|
y,
|
|
524
564
|
multi_output=True,
|
|
@@ -738,6 +778,10 @@ class ForestClassifier(sklearn_ForestClassifier, BaseForest):
|
|
|
738
778
|
or self.estimator.__class__ == DecisionTreeClassifier,
|
|
739
779
|
"ExtraTrees only supported starting from oneDAL version 2023.1",
|
|
740
780
|
),
|
|
781
|
+
(
|
|
782
|
+
not self.oob_score,
|
|
783
|
+
"oob_scores using r2 or accuracy not implemented.",
|
|
784
|
+
),
|
|
741
785
|
(sample_weight is None, "sample_weight is not supported."),
|
|
742
786
|
]
|
|
743
787
|
)
|
|
@@ -777,26 +821,46 @@ class ForestClassifier(sklearn_ForestClassifier, BaseForest):
|
|
|
777
821
|
return patching_status
|
|
778
822
|
|
|
779
823
|
def _onedal_predict(self, X, queue=None):
|
|
780
|
-
X = check_array(
|
|
781
|
-
X,
|
|
782
|
-
dtype=[np.float64, np.float32],
|
|
783
|
-
force_all_finite=False,
|
|
784
|
-
) # Warning, order of dtype matters
|
|
785
824
|
check_is_fitted(self, "_onedal_estimator")
|
|
786
825
|
|
|
787
826
|
if sklearn_check_version("1.0"):
|
|
788
|
-
self.
|
|
827
|
+
X = self._validate_data(
|
|
828
|
+
X,
|
|
829
|
+
dtype=[np.float64, np.float32],
|
|
830
|
+
force_all_finite=False,
|
|
831
|
+
reset=False,
|
|
832
|
+
ensure_2d=True,
|
|
833
|
+
)
|
|
834
|
+
else:
|
|
835
|
+
X = check_array(
|
|
836
|
+
X,
|
|
837
|
+
dtype=[np.float64, np.float32],
|
|
838
|
+
force_all_finite=False,
|
|
839
|
+
) # Warning, order of dtype matters
|
|
840
|
+
self._check_n_features(X, reset=False)
|
|
789
841
|
|
|
790
842
|
res = self._onedal_estimator.predict(X, queue=queue)
|
|
791
843
|
return np.take(self.classes_, res.ravel().astype(np.int64, casting="unsafe"))
|
|
792
844
|
|
|
793
845
|
def _onedal_predict_proba(self, X, queue=None):
|
|
794
|
-
X = check_array(X, dtype=[np.float64, np.float32], force_all_finite=False)
|
|
795
846
|
check_is_fitted(self, "_onedal_estimator")
|
|
796
847
|
|
|
797
|
-
self._check_n_features(X, reset=False)
|
|
798
848
|
if sklearn_check_version("1.0"):
|
|
799
|
-
self.
|
|
849
|
+
X = self._validate_data(
|
|
850
|
+
X,
|
|
851
|
+
dtype=[np.float64, np.float32],
|
|
852
|
+
force_all_finite=False,
|
|
853
|
+
reset=False,
|
|
854
|
+
ensure_2d=True,
|
|
855
|
+
)
|
|
856
|
+
else:
|
|
857
|
+
X = check_array(
|
|
858
|
+
X,
|
|
859
|
+
dtype=[np.float64, np.float32],
|
|
860
|
+
force_all_finite=False,
|
|
861
|
+
) # Warning, order of dtype matters
|
|
862
|
+
self._check_n_features(X, reset=False)
|
|
863
|
+
|
|
800
864
|
return self._onedal_estimator.predict_proba(X, queue=queue)
|
|
801
865
|
|
|
802
866
|
def _onedal_score(self, X, y, sample_weight=None, queue=None):
|
|
@@ -913,7 +977,7 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
|
|
|
913
977
|
)
|
|
914
978
|
|
|
915
979
|
if patching_status.get_status():
|
|
916
|
-
X, y =
|
|
980
|
+
X, y = check_X_y(
|
|
917
981
|
X,
|
|
918
982
|
y,
|
|
919
983
|
multi_output=True,
|
|
@@ -995,7 +1059,7 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
|
|
|
995
1059
|
]
|
|
996
1060
|
)
|
|
997
1061
|
|
|
998
|
-
elif method_name
|
|
1062
|
+
elif method_name in ["predict", "score"]:
|
|
999
1063
|
X = data[0]
|
|
1000
1064
|
|
|
1001
1065
|
patching_status.and_conditions(
|
|
@@ -1045,11 +1109,12 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
|
|
|
1045
1109
|
or self.estimator.__class__ == DecisionTreeClassifier,
|
|
1046
1110
|
"ExtraTrees only supported starting from oneDAL version 2023.1",
|
|
1047
1111
|
),
|
|
1112
|
+
(not self.oob_score, "oob_score value is not sklearn conformant."),
|
|
1048
1113
|
(sample_weight is None, "sample_weight is not supported."),
|
|
1049
1114
|
]
|
|
1050
1115
|
)
|
|
1051
1116
|
|
|
1052
|
-
elif method_name
|
|
1117
|
+
elif method_name in ["predict", "score"]:
|
|
1053
1118
|
X = data[0]
|
|
1054
1119
|
|
|
1055
1120
|
patching_status.and_conditions(
|
|
@@ -1082,16 +1147,28 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
|
|
|
1082
1147
|
return patching_status
|
|
1083
1148
|
|
|
1084
1149
|
def _onedal_predict(self, X, queue=None):
|
|
1085
|
-
X = check_array(
|
|
1086
|
-
X, dtype=[np.float64, np.float32], force_all_finite=False
|
|
1087
|
-
) # Warning, order of dtype matters
|
|
1088
1150
|
check_is_fitted(self, "_onedal_estimator")
|
|
1089
1151
|
|
|
1090
1152
|
if sklearn_check_version("1.0"):
|
|
1091
|
-
self.
|
|
1153
|
+
X = self._validate_data(
|
|
1154
|
+
X,
|
|
1155
|
+
dtype=[np.float64, np.float32],
|
|
1156
|
+
force_all_finite=False,
|
|
1157
|
+
reset=False,
|
|
1158
|
+
ensure_2d=True,
|
|
1159
|
+
) # Warning, order of dtype matters
|
|
1160
|
+
else:
|
|
1161
|
+
X = check_array(
|
|
1162
|
+
X, dtype=[np.float64, np.float32], force_all_finite=False
|
|
1163
|
+
) # Warning, order of dtype matters
|
|
1092
1164
|
|
|
1093
1165
|
return self._onedal_estimator.predict(X, queue=queue)
|
|
1094
1166
|
|
|
1167
|
+
def _onedal_score(self, X, y, sample_weight=None, queue=None):
|
|
1168
|
+
return r2_score(
|
|
1169
|
+
y, self._onedal_predict(X, queue=queue), sample_weight=sample_weight
|
|
1170
|
+
)
|
|
1171
|
+
|
|
1095
1172
|
def fit(self, X, y, sample_weight=None):
|
|
1096
1173
|
dispatch(
|
|
1097
1174
|
self,
|
|
@@ -1118,8 +1195,23 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
|
|
|
1118
1195
|
X,
|
|
1119
1196
|
)
|
|
1120
1197
|
|
|
1198
|
+
@wrap_output_data
|
|
1199
|
+
def score(self, X, y, sample_weight=None):
|
|
1200
|
+
return dispatch(
|
|
1201
|
+
self,
|
|
1202
|
+
"score",
|
|
1203
|
+
{
|
|
1204
|
+
"onedal": self.__class__._onedal_score,
|
|
1205
|
+
"sklearn": sklearn_ForestRegressor.score,
|
|
1206
|
+
},
|
|
1207
|
+
X,
|
|
1208
|
+
y,
|
|
1209
|
+
sample_weight=sample_weight,
|
|
1210
|
+
)
|
|
1211
|
+
|
|
1121
1212
|
fit.__doc__ = sklearn_ForestRegressor.fit.__doc__
|
|
1122
1213
|
predict.__doc__ = sklearn_ForestRegressor.predict.__doc__
|
|
1214
|
+
score.__doc__ = sklearn_ForestRegressor.score.__doc__
|
|
1123
1215
|
|
|
1124
1216
|
|
|
1125
1217
|
@control_n_jobs(decorated_methods=["fit", "predict", "predict_proba", "score"])
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
|
+
import numpy as np
|
|
17
18
|
import pytest
|
|
18
19
|
from numpy.testing import assert_allclose
|
|
19
20
|
from sklearn.datasets import make_classification, make_regression
|
|
@@ -45,7 +46,10 @@ def test_sklearnex_import_rf_classifier(dataframe, queue):
|
|
|
45
46
|
assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]])))
|
|
46
47
|
|
|
47
48
|
|
|
48
|
-
|
|
49
|
+
# TODO: fix RF regressor predict for the GPU sycl_queue.
|
|
50
|
+
@pytest.mark.parametrize(
|
|
51
|
+
"dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
|
|
52
|
+
)
|
|
49
53
|
def test_sklearnex_import_rf_regression(dataframe, queue):
|
|
50
54
|
from sklearnex.ensemble import RandomForestRegressor
|
|
51
55
|
|
|
@@ -65,7 +69,10 @@ def test_sklearnex_import_rf_regression(dataframe, queue):
|
|
|
65
69
|
assert_allclose([-6.839], pred, atol=1e-2)
|
|
66
70
|
|
|
67
71
|
|
|
68
|
-
|
|
72
|
+
# TODO: fix ET classifier predict for the GPU sycl_queue.
|
|
73
|
+
@pytest.mark.parametrize(
|
|
74
|
+
"dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
|
|
75
|
+
)
|
|
69
76
|
def test_sklearnex_import_et_classifier(dataframe, queue):
|
|
70
77
|
from sklearnex.ensemble import ExtraTreesClassifier
|
|
71
78
|
|
|
@@ -86,7 +93,10 @@ def test_sklearnex_import_et_classifier(dataframe, queue):
|
|
|
86
93
|
assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]])))
|
|
87
94
|
|
|
88
95
|
|
|
89
|
-
|
|
96
|
+
# TODO: fix ET regressor predict for the GPU sycl_queue.
|
|
97
|
+
@pytest.mark.parametrize(
|
|
98
|
+
"dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
|
|
99
|
+
)
|
|
90
100
|
def test_sklearnex_import_et_regression(dataframe, queue):
|
|
91
101
|
from sklearnex.ensemble import ExtraTreesRegressor
|
|
92
102
|
|
sklearnex/glob/dispatcher.py
CHANGED
|
@@ -17,18 +17,32 @@
|
|
|
17
17
|
|
|
18
18
|
def get_patch_str(name=None, verbose=True):
|
|
19
19
|
return f"""try:
|
|
20
|
+
# TEMP. FIX: sklearnex.patch_sklearn imports sklearn beforehand
|
|
21
|
+
# when it didn't initialized _threadpool_controller required for
|
|
22
|
+
# pairwise distances dispatching during imports.
|
|
23
|
+
# Manually setting and deleting _threadpool_controller during patch fixes it.
|
|
24
|
+
import sklearn
|
|
25
|
+
from threadpoolctl import ThreadpoolController
|
|
26
|
+
sklearn._threadpool_controller = ThreadpoolController()
|
|
20
27
|
from sklearnex import patch_sklearn
|
|
21
28
|
patch_sklearn(name={str(name)}, verbose={str(verbose)})
|
|
22
|
-
del patch_sklearn
|
|
29
|
+
del patch_sklearn, sklearn._threadpool_controller
|
|
23
30
|
except ImportError:
|
|
24
31
|
pass"""
|
|
25
32
|
|
|
26
33
|
|
|
27
34
|
def get_patch_str_re():
|
|
28
35
|
return r"""\ntry:
|
|
36
|
+
\# TEMP. FIX: sklearnex.patch_sklearn imports sklearn beforehand
|
|
37
|
+
\# when it didn't initialized _threadpool_controller required for
|
|
38
|
+
\# pairwise distances dispatching during imports.
|
|
39
|
+
\# Manually setting and deleting _threadpool_controller during patch fixes it.
|
|
40
|
+
import sklearn
|
|
41
|
+
from threadpoolctl import ThreadpoolController
|
|
42
|
+
sklearn._threadpool_controller = ThreadpoolController\(\)
|
|
29
43
|
from sklearnex import patch_sklearn
|
|
30
44
|
patch_sklearn\(name=.*, verbose=.*\)
|
|
31
|
-
del patch_sklearn
|
|
45
|
+
del patch_sklearn, sklearn._threadpool_controller
|
|
32
46
|
except ImportError:
|
|
33
47
|
pass\n"""
|
|
34
48
|
|
|
@@ -15,14 +15,16 @@
|
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
17
|
from .coordinate_descent import ElasticNet, Lasso
|
|
18
|
+
from .incremental_linear import IncrementalLinearRegression
|
|
18
19
|
from .linear import LinearRegression
|
|
19
20
|
from .logistic_regression import LogisticRegression
|
|
20
21
|
from .ridge import Ridge
|
|
21
22
|
|
|
22
23
|
__all__ = [
|
|
23
|
-
"Ridge",
|
|
24
|
-
"LinearRegression",
|
|
25
|
-
"LogisticRegression",
|
|
26
24
|
"ElasticNet",
|
|
25
|
+
"IncrementalLinearRegression",
|
|
27
26
|
"Lasso",
|
|
27
|
+
"LinearRegression",
|
|
28
|
+
"LogisticRegression",
|
|
29
|
+
"Ridge",
|
|
28
30
|
]
|