PyPI - scikit-learn-intelex - Versions diffs - 2024.4.0__py39-none-manylinux1_x86_64.whl → 2024.6.0__py39-none-manylinux1_x86_64.whl - Mend

scikit-learn-intelex 2024.4.0__py39-none-manylinux1_x86_64.whl → 2024.6.0__py39-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (44) hide show

{scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/METADATA +2 -2
{scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/RECORD +43 -36
sklearnex/_device_offload.py +8 -1
sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +2 -4
sklearnex/cluster/dbscan.py +3 -0
sklearnex/cluster/tests/test_dbscan.py +8 -6
sklearnex/conftest.py +11 -1
sklearnex/covariance/incremental_covariance.py +217 -30
sklearnex/covariance/tests/test_incremental_covariance.py +54 -17
sklearnex/decomposition/pca.py +68 -13
sklearnex/decomposition/tests/test_pca.py +6 -4
sklearnex/dispatcher.py +46 -1
sklearnex/ensemble/_forest.py +114 -22
sklearnex/ensemble/tests/test_forest.py +13 -3
sklearnex/glob/dispatcher.py +16 -2
sklearnex/linear_model/__init__.py +5 -3
sklearnex/linear_model/incremental_linear.py +464 -0
sklearnex/linear_model/linear.py +27 -9
sklearnex/linear_model/logistic_regression.py +13 -15
sklearnex/linear_model/tests/test_incremental_linear.py +200 -0
sklearnex/linear_model/tests/test_linear.py +2 -2
sklearnex/neighbors/knn_regression.py +24 -0
sklearnex/neighbors/tests/test_neighbors.py +2 -2
sklearnex/preview/__init__.py +1 -1
sklearnex/preview/decomposition/__init__.py +19 -0
sklearnex/preview/decomposition/incremental_pca.py +228 -0
sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
sklearnex/svm/_common.py +165 -20
sklearnex/svm/nusvc.py +40 -4
sklearnex/svm/nusvr.py +31 -2
sklearnex/svm/svc.py +40 -4
sklearnex/svm/svr.py +31 -2
sklearnex/tests/_utils.py +70 -29
sklearnex/tests/test_common.py +54 -0
sklearnex/tests/test_memory_usage.py +195 -132
sklearnex/tests/test_n_jobs_support.py +4 -0
sklearnex/tests/test_patching.py +22 -10
sklearnex/tests/test_run_to_run_stability.py +283 -0
sklearnex/utils/_namespace.py +1 -1
sklearnex/utils/tests/test_finite.py +89 -0
sklearnex/tests/test_run_to_run_stability_tests.py +0 -428
{scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/LICENSE.txt +0 -0
{scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/WHEEL +0 -0
{scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/top_level.txt +0 -0

sklearnex/ensemble/_forest.py CHANGED Viewed

@@ -29,7 +29,7 @@ from sklearn.ensemble._forest import ForestClassifier as sklearn_ForestClassifie
 from sklearn.ensemble._forest import ForestRegressor as sklearn_ForestRegressor
 from sklearn.ensemble._forest import _get_n_samples_bootstrap
 from sklearn.exceptions import DataConversionWarning
-from sklearn.metrics import accuracy_score
+from sklearn.metrics import accuracy_score, r2_score
 from sklearn.tree import (
     DecisionTreeClassifier,
     DecisionTreeRegressor,
@@ -38,7 +38,7 @@ from sklearn.tree import (
 )
 from sklearn.tree._tree import Tree
 from sklearn.utils import check_random_state, deprecated
-from sklearn.utils.validation import check_array, check_is_fitted
+from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
 from daal4py.sklearn._n_jobs_support import control_n_jobs
 from daal4py.sklearn._utils import (
@@ -74,6 +74,7 @@ class BaseForest(ABC):
             accept_sparse=False,
             dtype=[np.float64, np.float32],
             force_all_finite=False,
+            ensure_2d=True,
         )
         if sample_weight is not None:
@@ -97,8 +98,6 @@ class BaseForest(ABC):
         y, expanded_class_weight = self._validate_y_class_weight(y)
-        self.n_features_in_ = X.shape[1]
         if expanded_class_weight is not None:
             if sample_weight is not None:
                 sample_weight = sample_weight * expanded_class_weight
@@ -114,7 +113,9 @@ class BaseForest(ABC):
             "min_samples_split": self.min_samples_split,
             "min_samples_leaf": self.min_samples_leaf,
             "min_weight_fraction_leaf": self.min_weight_fraction_leaf,
-            "max_features": self.max_features,
+            "max_features": self._to_absolute_max_features(
+                self.max_features, self.n_features_in_
+            ),
             "max_leaf_nodes": self.max_leaf_nodes,
             "min_impurity_decrease": self.min_impurity_decrease,
             "bootstrap": self.bootstrap,
@@ -174,6 +175,45 @@ class BaseForest(ABC):
         self._validate_estimator()
         return self
+    def _to_absolute_max_features(self, max_features, n_features):
+        if max_features is None:
+            return n_features
+        if isinstance(max_features, str):
+            if max_features == "auto":
+                if not sklearn_check_version("1.3"):
+                    if sklearn_check_version("1.1"):
+                        warnings.warn(
+                            "`max_features='auto'` has been deprecated in 1.1 "
+                            "and will be removed in 1.3. To keep the past behaviour, "
+                            "explicitly set `max_features=1.0` or remove this "
+                            "parameter as it is also the default value for "
+                            "RandomForestRegressors and ExtraTreesRegressors.",
+                            FutureWarning,
+                        )
+                    return (
+                        max(1, int(np.sqrt(n_features)))
+                        if isinstance(self, ForestClassifier)
+                        else n_features
+                    )
+            if max_features == "sqrt":
+                return max(1, int(np.sqrt(n_features)))
+            if max_features == "log2":
+                return max(1, int(np.log2(n_features)))
+            allowed_string_values = (
+                '"sqrt" or "log2"'
+                if sklearn_check_version("1.3")
+                else '"auto", "sqrt" or "log2"'
+            )
+            raise ValueError(
+                "Invalid value for max_features. Allowed string "
+                f"values are {allowed_string_values}."
+            )
+        if isinstance(max_features, (numbers.Integral, np.integer)):
+            return max_features
+        if max_features > 0.0:
+            return max(1, int(max_features * n_features))
+        return 0
     def _check_parameters(self):
         if isinstance(self.min_samples_leaf, numbers.Integral):
             if not 1 <= self.min_samples_leaf:
@@ -518,7 +558,7 @@ class ForestClassifier(sklearn_ForestClassifier, BaseForest):
             )
         if patching_status.get_status():
-            X, y = self._validate_data(
+            X, y = check_X_y(
                 X,
                 y,
                 multi_output=True,
@@ -738,6 +778,10 @@ class ForestClassifier(sklearn_ForestClassifier, BaseForest):
                         or self.estimator.__class__ == DecisionTreeClassifier,
                         "ExtraTrees only supported starting from oneDAL version 2023.1",
                     ),
+                    (
+                        not self.oob_score,
+                        "oob_scores using r2 or accuracy not implemented.",
+                    ),
                     (sample_weight is None, "sample_weight is not supported."),
                 ]
             )
@@ -777,26 +821,46 @@ class ForestClassifier(sklearn_ForestClassifier, BaseForest):
         return patching_status
     def _onedal_predict(self, X, queue=None):
-        X = check_array(
-            X,
-            dtype=[np.float64, np.float32],
-            force_all_finite=False,
-        )  # Warning, order of dtype matters
         check_is_fitted(self, "_onedal_estimator")
         if sklearn_check_version("1.0"):
-            self._check_feature_names(X, reset=False)
+            X = self._validate_data(
+                X,
+                dtype=[np.float64, np.float32],
+                force_all_finite=False,
+                reset=False,
+                ensure_2d=True,
+            )
+        else:
+            X = check_array(
+                X,
+                dtype=[np.float64, np.float32],
+                force_all_finite=False,
+            )  # Warning, order of dtype matters
+            self._check_n_features(X, reset=False)
         res = self._onedal_estimator.predict(X, queue=queue)
         return np.take(self.classes_, res.ravel().astype(np.int64, casting="unsafe"))
     def _onedal_predict_proba(self, X, queue=None):
-        X = check_array(X, dtype=[np.float64, np.float32], force_all_finite=False)
         check_is_fitted(self, "_onedal_estimator")
-        self._check_n_features(X, reset=False)
         if sklearn_check_version("1.0"):
-            self._check_feature_names(X, reset=False)
+            X = self._validate_data(
+                X,
+                dtype=[np.float64, np.float32],
+                force_all_finite=False,
+                reset=False,
+                ensure_2d=True,
+            )
+        else:
+            X = check_array(
+                X,
+                dtype=[np.float64, np.float32],
+                force_all_finite=False,
+            )  # Warning, order of dtype matters
+            self._check_n_features(X, reset=False)
         return self._onedal_estimator.predict_proba(X, queue=queue)
     def _onedal_score(self, X, y, sample_weight=None, queue=None):
@@ -913,7 +977,7 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
             )
         if patching_status.get_status():
-            X, y = self._validate_data(
+            X, y = check_X_y(
                 X,
                 y,
                 multi_output=True,
@@ -995,7 +1059,7 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
                 ]
             )
-        elif method_name == "predict":
+        elif method_name in ["predict", "score"]:
             X = data[0]
             patching_status.and_conditions(
@@ -1045,11 +1109,12 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
                         or self.estimator.__class__ == DecisionTreeClassifier,
                         "ExtraTrees only supported starting from oneDAL version 2023.1",
                     ),
+                    (not self.oob_score, "oob_score value is not sklearn conformant."),
                     (sample_weight is None, "sample_weight is not supported."),
                 ]
             )
-        elif method_name == "predict":
+        elif method_name in ["predict", "score"]:
             X = data[0]
             patching_status.and_conditions(
@@ -1082,16 +1147,28 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
         return patching_status
     def _onedal_predict(self, X, queue=None):
-        X = check_array(
-            X, dtype=[np.float64, np.float32], force_all_finite=False
-        )  # Warning, order of dtype matters
         check_is_fitted(self, "_onedal_estimator")
         if sklearn_check_version("1.0"):
-            self._check_feature_names(X, reset=False)
+            X = self._validate_data(
+                X,
+                dtype=[np.float64, np.float32],
+                force_all_finite=False,
+                reset=False,
+                ensure_2d=True,
+            )  # Warning, order of dtype matters
+        else:
+            X = check_array(
+                X, dtype=[np.float64, np.float32], force_all_finite=False
+            )  # Warning, order of dtype matters
         return self._onedal_estimator.predict(X, queue=queue)
+    def _onedal_score(self, X, y, sample_weight=None, queue=None):
+        return r2_score(
+            y, self._onedal_predict(X, queue=queue), sample_weight=sample_weight
+        )
     def fit(self, X, y, sample_weight=None):
         dispatch(
             self,
@@ -1118,8 +1195,23 @@ class ForestRegressor(sklearn_ForestRegressor, BaseForest):
             X,
         )
+    @wrap_output_data
+    def score(self, X, y, sample_weight=None):
+        return dispatch(
+            self,
+            "score",
+            {
+                "onedal": self.__class__._onedal_score,
+                "sklearn": sklearn_ForestRegressor.score,
+            },
+            X,
+            y,
+            sample_weight=sample_weight,
+        )
     fit.__doc__ = sklearn_ForestRegressor.fit.__doc__
     predict.__doc__ = sklearn_ForestRegressor.predict.__doc__
+    score.__doc__ = sklearn_ForestRegressor.score.__doc__
 @control_n_jobs(decorated_methods=["fit", "predict", "predict_proba", "score"])

sklearnex/ensemble/tests/test_forest.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # limitations under the License.
 # ===============================================================================
+import numpy as np
 import pytest
 from numpy.testing import assert_allclose
 from sklearn.datasets import make_classification, make_regression
@@ -45,7 +46,10 @@ def test_sklearnex_import_rf_classifier(dataframe, queue):
     assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]])))
-@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+# TODO: fix RF regressor predict for the GPU sycl_queue.
+@pytest.mark.parametrize(
+    "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
+)
 def test_sklearnex_import_rf_regression(dataframe, queue):
     from sklearnex.ensemble import RandomForestRegressor
@@ -65,7 +69,10 @@ def test_sklearnex_import_rf_regression(dataframe, queue):
             assert_allclose([-6.839], pred, atol=1e-2)
-@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+# TODO: fix ET classifier predict for the GPU sycl_queue.
+@pytest.mark.parametrize(
+    "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
+)
 def test_sklearnex_import_et_classifier(dataframe, queue):
     from sklearnex.ensemble import ExtraTreesClassifier
@@ -86,7 +93,10 @@ def test_sklearnex_import_et_classifier(dataframe, queue):
     assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]])))
-@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+# TODO: fix ET regressor predict for the GPU sycl_queue.
+@pytest.mark.parametrize(
+    "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
+)
 def test_sklearnex_import_et_regression(dataframe, queue):
     from sklearnex.ensemble import ExtraTreesRegressor

sklearnex/glob/dispatcher.py CHANGED Viewed

@@ -17,18 +17,32 @@
 def get_patch_str(name=None, verbose=True):
     return f"""try:
+    # TEMP. FIX: sklearnex.patch_sklearn imports sklearn beforehand
+    # when it didn't initialized _threadpool_controller required for
+    # pairwise distances dispatching during imports.
+    # Manually setting and deleting _threadpool_controller during patch fixes it.
+    import sklearn
+    from threadpoolctl import ThreadpoolController
+    sklearn._threadpool_controller = ThreadpoolController()
     from sklearnex import patch_sklearn
     patch_sklearn(name={str(name)}, verbose={str(verbose)})
-    del patch_sklearn
+    del patch_sklearn, sklearn._threadpool_controller
 except ImportError:
     pass"""
 def get_patch_str_re():
     return r"""\ntry:
+    \# TEMP. FIX: sklearnex.patch_sklearn imports sklearn beforehand
+    \# when it didn't initialized _threadpool_controller required for
+    \# pairwise distances dispatching during imports.
+    \# Manually setting and deleting _threadpool_controller during patch fixes it.
+    import sklearn
+    from threadpoolctl import ThreadpoolController
+    sklearn._threadpool_controller = ThreadpoolController\(\)
     from sklearnex import patch_sklearn
     patch_sklearn\(name=.*, verbose=.*\)
-    del patch_sklearn
+    del patch_sklearn, sklearn._threadpool_controller
 except ImportError:
     pass\n"""

sklearnex/linear_model/__init__.py CHANGED Viewed

@@ -15,14 +15,16 @@
 # ===============================================================================
 from .coordinate_descent import ElasticNet, Lasso
+from .incremental_linear import IncrementalLinearRegression
 from .linear import LinearRegression
 from .logistic_regression import LogisticRegression
 from .ridge import Ridge
 __all__ = [
-    "Ridge",
-    "LinearRegression",
-    "LogisticRegression",
     "ElasticNet",
+    "IncrementalLinearRegression",
     "Lasso",
+    "LinearRegression",
+    "LogisticRegression",
+    "Ridge",
 ]