scikit-learn-intelex 2024.4.0__py39-none-manylinux1_x86_64.whl → 2024.6.0__py39-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (44) hide show
  1. {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/METADATA +2 -2
  2. {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/RECORD +43 -36
  3. sklearnex/_device_offload.py +8 -1
  4. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +2 -4
  5. sklearnex/cluster/dbscan.py +3 -0
  6. sklearnex/cluster/tests/test_dbscan.py +8 -6
  7. sklearnex/conftest.py +11 -1
  8. sklearnex/covariance/incremental_covariance.py +217 -30
  9. sklearnex/covariance/tests/test_incremental_covariance.py +54 -17
  10. sklearnex/decomposition/pca.py +68 -13
  11. sklearnex/decomposition/tests/test_pca.py +6 -4
  12. sklearnex/dispatcher.py +46 -1
  13. sklearnex/ensemble/_forest.py +114 -22
  14. sklearnex/ensemble/tests/test_forest.py +13 -3
  15. sklearnex/glob/dispatcher.py +16 -2
  16. sklearnex/linear_model/__init__.py +5 -3
  17. sklearnex/linear_model/incremental_linear.py +464 -0
  18. sklearnex/linear_model/linear.py +27 -9
  19. sklearnex/linear_model/logistic_regression.py +13 -15
  20. sklearnex/linear_model/tests/test_incremental_linear.py +200 -0
  21. sklearnex/linear_model/tests/test_linear.py +2 -2
  22. sklearnex/neighbors/knn_regression.py +24 -0
  23. sklearnex/neighbors/tests/test_neighbors.py +2 -2
  24. sklearnex/preview/__init__.py +1 -1
  25. sklearnex/preview/decomposition/__init__.py +19 -0
  26. sklearnex/preview/decomposition/incremental_pca.py +228 -0
  27. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  28. sklearnex/svm/_common.py +165 -20
  29. sklearnex/svm/nusvc.py +40 -4
  30. sklearnex/svm/nusvr.py +31 -2
  31. sklearnex/svm/svc.py +40 -4
  32. sklearnex/svm/svr.py +31 -2
  33. sklearnex/tests/_utils.py +70 -29
  34. sklearnex/tests/test_common.py +54 -0
  35. sklearnex/tests/test_memory_usage.py +195 -132
  36. sklearnex/tests/test_n_jobs_support.py +4 -0
  37. sklearnex/tests/test_patching.py +22 -10
  38. sklearnex/tests/test_run_to_run_stability.py +283 -0
  39. sklearnex/utils/_namespace.py +1 -1
  40. sklearnex/utils/tests/test_finite.py +89 -0
  41. sklearnex/tests/test_run_to_run_stability_tests.py +0 -428
  42. {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/LICENSE.txt +0 -0
  43. {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/WHEEL +0 -0
  44. {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/top_level.txt +0 -0
@@ -29,7 +29,7 @@ from sklearn.ensemble._forest import ForestClassifier as sklearn_ForestClassifie
29
29
  from sklearn.ensemble._forest import ForestRegressor as sklearn_ForestRegressor
30
30
  from sklearn.ensemble._forest import _get_n_samples_bootstrap
31
31
  from sklearn.exceptions import DataConversionWarning
32
- from sklearn.metrics import accuracy_score
32
+ from sklearn.metrics import accuracy_score, r2_score
33
33
  from sklearn.tree import (
34
34
  DecisionTreeClassifier,
35
35
  DecisionTreeRegressor,
@@ -38,7 +38,7 @@ from sklearn.tree import (
38
38
  )
39
39
  from sklearn.tree._tree import Tree
40
40
  from sklearn.utils import check_random_state, deprecated
41
- from sklearn.utils.validation import check_array, check_is_fitted
41
+ from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
42
42
 
43
43
  from daal4py.sklearn._n_jobs_support import control_n_jobs
44
44
  from daal4py.sklearn._utils import (
@@ -74,6 +74,7 @@ class BaseForest(ABC):
74
74
  accept_sparse=False,
75
75
  dtype=[np.float64, np.float32],
76
76
  force_all_finite=False,
77
+ ensure_2d=True,
77
78
  )
78
79
 
79
80
  if sample_weight is not None:
@@ -97,8 +98,6 @@ class BaseForest(ABC):
97
98
 
98
99
  y, expanded_class_weight = self._validate_y_class_weight(y)
99
100
 
100
- self.n_features_in_ = X.shape[1]
101
-
102
101
  if expanded_class_weight is not None:
103
102
  if sample_weight is not None:
104
103
  sample_weight = sample_weight * expanded_class_weight
@@ -114,7 +113,9 @@ class BaseForest(ABC):
114
113
  "min_samples_split": self.min_samples_split,
115
114
  "min_samples_leaf": self.min_samples_leaf,
116
115
  "min_weight_fraction_leaf": self.min_weight_fraction_leaf,
117
- "max_features": self.max_features,
116
+ "max_features": self._to_absolute_max_features(
117
+ self.max_features, self.n_features_in_
118
+ ),
118
119
  "max_leaf_nodes": self.max_leaf_nodes,
119
120
  "min_impurity_decrease": self.min_impurity_decrease,
120
121
  "bootstrap": self.bootstrap,
@@ -174,6 +175,45 @@ class BaseForest(ABC):
174
175
  self._validate_estimator()
175
176
  return self
176
177
 
178
+ def _to_absolute_max_features(self, max_features, n_features):
179
+ if max_features is None:
180
+ return n_features
181
+ if isinstance(max_features, str):
182
+ if max_features == "auto":
183
+ if not sklearn_check_version("1.3"):
184
+ if sklearn_check_version("1.1"):
185
+ warnings.warn(
186
+ "`max_features='auto'` has been deprecated in 1.1 "
187
+ "and will be removed in 1.3. To keep the past behaviour, "
188
+ "explicitly set `max_features=1.0` or remove this "
189
+ "parameter as it is also the default value for "
190
+ "RandomForestRegressors and ExtraTreesRegressors.",
191
+ FutureWarning,
192
+ )
193
+ return (
194
+ max(1, int(np.sqrt(n_features)))
195
+ if isinstance(self, ForestClassifier)
196
+ else n_features
197
+ )
198
+ if max_features == "sqrt":
199
+ return max(1, int(np.sqrt(n_features)))
200
+ if max_features == "log2":
201
+ return max(1, int(np.log2(n_features)))
202
+ allowed_string_values = (
203
+ '"sqrt" or "log2"'
204
+ if sklearn_check_version("1.3")
205
+ else '"auto", "sqrt" or "log2"'
206
+ )
207
+ raise ValueError(
208
+ "Invalid value for max_features. Allowed string "
209
+ f"values are {allowed_string_values}."
210
+ )
211
+ if isinstance(max_features, (numbers.Integral, np.integer)):
212
+ return max_features
213
+ if max_features > 0.0:
214
+ return max(1, int(max_features * n_features))
215
+ return 0
216
+
177
217
  def _check_parameters(self):
178
218
  if isinstance(self.min_samples_leaf, numbers.Integral):
179
219
  if not 1 <= self.min_samples_leaf:
@@ -518,7 +558,7 @@ class ForestClassifier(sklearn_ForestClassifier, BaseForest):
518
558
  )
519
559
 
520
560
  if patching_status.get_status():
521
- X, y = self._validate_data(
561
+ X, y = check_X_y(
522
562
  X,
523
563
  y,
524
564
  multi_output=True,
@@ -738,6 +778,10 @@ class ForestClassifier(sklearn_ForestClassifier, BaseForest):
738
778
  or self.estimator.__class__ == DecisionTreeClassifier,
739
779
  "ExtraTrees only supported starting from oneDAL version 2023.1",
740
780
  ),
781
+ (
782
+ not self.oob_score,
783
+ "oob_scores using r2 or accuracy not implemented.",
784
+ ),
741
785
  (sample_weight is None, "sample_weight is not supported."),
742
786
  ]
743
787
  )
@@ -777,26 +821,46 @@ class ForestClassifier(sklearn_ForestClassifier, BaseForest):
777
821
  return patching_status
778
822
 
779
823
  def _onedal_predict(self, X, queue=None):
780
- X = check_array(
781
- X,
782
- dtype=[np.float64, np.float32],
783
- force_all_finite=False,
784
- ) # Warning, order of dtype matters
785
824
  check_is_fitted(self, "_onedal_estimator")
786
825
 
787
826
  if sklearn_check_version("1.0"):
788
- self._check_feature_names(X, reset=False)
827
+ X = self._validate_data(
828
+ X,
829
+ dtype=[np.float64, np.float32],
830
+ force_all_finite=False,
831
+ reset=False,
832
+ ensure_2d=True,
833
+ )
834
+ else:
835
+ X = check_array(
836
+ X,
837
+ dtype=[np.float64, np.float32],
838
+ force_all_finite=False,
839
+ ) # Warning, order of dtype matters
840
+ self._check_n_features(X, reset=False)
789
841
 
790
842
  res = self._onedal_estimator.predict(X, queue=queue)
791
843
  return np.take(self.classes_, res.ravel().astype(np.int64, casting="unsafe"))
792
844
 
793
845
  def _onedal_predict_proba(self, X, queue=None):
794
- X = check_array(X, dtype=[np.float64, np.float32], force_all_finite=False)
795
846
  check_is_fitted(self, "_onedal_estimator")
796
847
 
797
- self._check_n_features(X, reset=False)
798
848
  if sklearn_check_version("1.0"):
799
- self._check_feature_names(X, reset=False)
849
+ X = self._validate_data(
850
+ X,
851
+ dtype=[np.float64, np.float32],
852
+ force_all_finite=False,
853
+ reset=False,
854
+ ensure_2d=True,
855
+ )
856
+ else:
857
+ X = check_array(
858
+ X,
859
+ dtype=[np.float64, np.float32],
860
+ force_all_finite=False,
861
+ ) # Warning, order of dtype matters
862
+ self._check_n_features(X, reset=False)
863
+
800
864
  return self._onedal_estimator.predict_proba(X, queue=queue)
801
865
 
802
866
  def _onedal_score(self, X, y, sample_weight=None, queue=None):
@@ -913,7 +977,7 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
913
977
  )
914
978
 
915
979
  if patching_status.get_status():
916
- X, y = self._validate_data(
980
+ X, y = check_X_y(
917
981
  X,
918
982
  y,
919
983
  multi_output=True,
@@ -995,7 +1059,7 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
995
1059
  ]
996
1060
  )
997
1061
 
998
- elif method_name == "predict":
1062
+ elif method_name in ["predict", "score"]:
999
1063
  X = data[0]
1000
1064
 
1001
1065
  patching_status.and_conditions(
@@ -1045,11 +1109,12 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
1045
1109
  or self.estimator.__class__ == DecisionTreeClassifier,
1046
1110
  "ExtraTrees only supported starting from oneDAL version 2023.1",
1047
1111
  ),
1112
+ (not self.oob_score, "oob_score value is not sklearn conformant."),
1048
1113
  (sample_weight is None, "sample_weight is not supported."),
1049
1114
  ]
1050
1115
  )
1051
1116
 
1052
- elif method_name == "predict":
1117
+ elif method_name in ["predict", "score"]:
1053
1118
  X = data[0]
1054
1119
 
1055
1120
  patching_status.and_conditions(
@@ -1082,16 +1147,28 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
1082
1147
  return patching_status
1083
1148
 
1084
1149
  def _onedal_predict(self, X, queue=None):
1085
- X = check_array(
1086
- X, dtype=[np.float64, np.float32], force_all_finite=False
1087
- ) # Warning, order of dtype matters
1088
1150
  check_is_fitted(self, "_onedal_estimator")
1089
1151
 
1090
1152
  if sklearn_check_version("1.0"):
1091
- self._check_feature_names(X, reset=False)
1153
+ X = self._validate_data(
1154
+ X,
1155
+ dtype=[np.float64, np.float32],
1156
+ force_all_finite=False,
1157
+ reset=False,
1158
+ ensure_2d=True,
1159
+ ) # Warning, order of dtype matters
1160
+ else:
1161
+ X = check_array(
1162
+ X, dtype=[np.float64, np.float32], force_all_finite=False
1163
+ ) # Warning, order of dtype matters
1092
1164
 
1093
1165
  return self._onedal_estimator.predict(X, queue=queue)
1094
1166
 
1167
+ def _onedal_score(self, X, y, sample_weight=None, queue=None):
1168
+ return r2_score(
1169
+ y, self._onedal_predict(X, queue=queue), sample_weight=sample_weight
1170
+ )
1171
+
1095
1172
  def fit(self, X, y, sample_weight=None):
1096
1173
  dispatch(
1097
1174
  self,
@@ -1118,8 +1195,23 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
1118
1195
  X,
1119
1196
  )
1120
1197
 
1198
+ @wrap_output_data
1199
+ def score(self, X, y, sample_weight=None):
1200
+ return dispatch(
1201
+ self,
1202
+ "score",
1203
+ {
1204
+ "onedal": self.__class__._onedal_score,
1205
+ "sklearn": sklearn_ForestRegressor.score,
1206
+ },
1207
+ X,
1208
+ y,
1209
+ sample_weight=sample_weight,
1210
+ )
1211
+
1121
1212
  fit.__doc__ = sklearn_ForestRegressor.fit.__doc__
1122
1213
  predict.__doc__ = sklearn_ForestRegressor.predict.__doc__
1214
+ score.__doc__ = sklearn_ForestRegressor.score.__doc__
1123
1215
 
1124
1216
 
1125
1217
  @control_n_jobs(decorated_methods=["fit", "predict", "predict_proba", "score"])
@@ -14,6 +14,7 @@
14
14
  # limitations under the License.
15
15
  # ===============================================================================
16
16
 
17
+ import numpy as np
17
18
  import pytest
18
19
  from numpy.testing import assert_allclose
19
20
  from sklearn.datasets import make_classification, make_regression
@@ -45,7 +46,10 @@ def test_sklearnex_import_rf_classifier(dataframe, queue):
45
46
  assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]])))
46
47
 
47
48
 
48
- @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
49
+ # TODO: fix RF regressor predict for the GPU sycl_queue.
50
+ @pytest.mark.parametrize(
51
+ "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
52
+ )
49
53
  def test_sklearnex_import_rf_regression(dataframe, queue):
50
54
  from sklearnex.ensemble import RandomForestRegressor
51
55
 
@@ -65,7 +69,10 @@ def test_sklearnex_import_rf_regression(dataframe, queue):
65
69
  assert_allclose([-6.839], pred, atol=1e-2)
66
70
 
67
71
 
68
- @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
72
+ # TODO: fix ET classifier predict for the GPU sycl_queue.
73
+ @pytest.mark.parametrize(
74
+ "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
75
+ )
69
76
  def test_sklearnex_import_et_classifier(dataframe, queue):
70
77
  from sklearnex.ensemble import ExtraTreesClassifier
71
78
 
@@ -86,7 +93,10 @@ def test_sklearnex_import_et_classifier(dataframe, queue):
86
93
  assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]])))
87
94
 
88
95
 
89
- @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
96
+ # TODO: fix ET regressor predict for the GPU sycl_queue.
97
+ @pytest.mark.parametrize(
98
+ "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
99
+ )
90
100
  def test_sklearnex_import_et_regression(dataframe, queue):
91
101
  from sklearnex.ensemble import ExtraTreesRegressor
92
102
 
@@ -17,18 +17,32 @@
17
17
 
18
18
  def get_patch_str(name=None, verbose=True):
19
19
  return f"""try:
20
+ # TEMP. FIX: sklearnex.patch_sklearn imports sklearn beforehand
21
+ # when it didn't initialized _threadpool_controller required for
22
+ # pairwise distances dispatching during imports.
23
+ # Manually setting and deleting _threadpool_controller during patch fixes it.
24
+ import sklearn
25
+ from threadpoolctl import ThreadpoolController
26
+ sklearn._threadpool_controller = ThreadpoolController()
20
27
  from sklearnex import patch_sklearn
21
28
  patch_sklearn(name={str(name)}, verbose={str(verbose)})
22
- del patch_sklearn
29
+ del patch_sklearn, sklearn._threadpool_controller
23
30
  except ImportError:
24
31
  pass"""
25
32
 
26
33
 
27
34
  def get_patch_str_re():
28
35
  return r"""\ntry:
36
+ \# TEMP. FIX: sklearnex.patch_sklearn imports sklearn beforehand
37
+ \# when it didn't initialized _threadpool_controller required for
38
+ \# pairwise distances dispatching during imports.
39
+ \# Manually setting and deleting _threadpool_controller during patch fixes it.
40
+ import sklearn
41
+ from threadpoolctl import ThreadpoolController
42
+ sklearn._threadpool_controller = ThreadpoolController\(\)
29
43
  from sklearnex import patch_sklearn
30
44
  patch_sklearn\(name=.*, verbose=.*\)
31
- del patch_sklearn
45
+ del patch_sklearn, sklearn._threadpool_controller
32
46
  except ImportError:
33
47
  pass\n"""
34
48
 
@@ -15,14 +15,16 @@
15
15
  # ===============================================================================
16
16
 
17
17
  from .coordinate_descent import ElasticNet, Lasso
18
+ from .incremental_linear import IncrementalLinearRegression
18
19
  from .linear import LinearRegression
19
20
  from .logistic_regression import LogisticRegression
20
21
  from .ridge import Ridge
21
22
 
22
23
  __all__ = [
23
- "Ridge",
24
- "LinearRegression",
25
- "LogisticRegression",
26
24
  "ElasticNet",
25
+ "IncrementalLinearRegression",
27
26
  "Lasso",
27
+ "LinearRegression",
28
+ "LogisticRegression",
29
+ "Ridge",
28
30
  ]