PyPI - scikit-learn-intelex - Versions diffs - 2024.1.0__py38-none-manylinux1_x86_64.whl → 2024.2.0__py38-none-manylinux1_x86_64.whl - Mend

scikit-learn-intelex 2024.1.0__py38-none-manylinux1_x86_64.whl → 2024.2.0__py38-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (40) hide show

{scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/METADATA +2 -2
{scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/RECORD +38 -34
sklearnex/cluster/dbscan.py +3 -3
sklearnex/{preview/linear_model → covariance}/__init__.py +3 -3
sklearnex/covariance/incremental_covariance.py +130 -0
sklearnex/covariance/tests/test_incremental_covariance.py +143 -0
sklearnex/dispatcher.py +19 -18
sklearnex/ensemble/_forest.py +5 -10
sklearnex/linear_model/__init__.py +1 -2
sklearnex/linear_model/linear.py +3 -10
sklearnex/{preview/linear_model → linear_model}/logistic_regression.py +19 -38
sklearnex/linear_model/tests/test_logreg.py +70 -5
sklearnex/neighbors/__init__.py +1 -1
sklearnex/neighbors/_lof.py +167 -0
sklearnex/neighbors/knn_classification.py +6 -9
sklearnex/neighbors/knn_regression.py +6 -8
sklearnex/neighbors/knn_unsupervised.py +5 -7
sklearnex/neighbors/tests/test_neighbors.py +12 -11
sklearnex/preview/__init__.py +1 -1
sklearnex/preview/cluster/k_means.py +3 -8
sklearnex/preview/covariance/covariance.py +46 -12
sklearnex/preview/decomposition/pca.py +3 -5
sklearnex/spmd/__init__.py +1 -0
sklearnex/spmd/covariance/__init__.py +19 -0
sklearnex/spmd/covariance/covariance.py +21 -0
sklearnex/spmd/linear_model/__init__.py +2 -1
sklearnex/spmd/linear_model/logistic_regression.py +21 -0
sklearnex/svm/nusvc.py +5 -6
sklearnex/svm/nusvr.py +3 -4
sklearnex/svm/svc.py +5 -6
sklearnex/svm/svr.py +3 -4
sklearnex/tests/test_memory_usage.py +1 -4
sklearnex/tests/test_monkeypatch.py +33 -20
sklearnex/tests/test_n_jobs_support.py +71 -9
sklearnex/tests/test_patching.py +19 -5
sklearnex/neighbors/lof.py +0 -436
sklearnex/preview/linear_model/tests/test_preview_logistic_regression.py +0 -59
{scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/LICENSE.txt +0 -0
{scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/WHEEL +0 -0
{scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/top_level.txt +0 -0

sklearnex/tests/test_patching.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # limitations under the License.
 # ==============================================================================
+import inspect
 import os
 import pathlib
 import re
@@ -96,15 +97,15 @@ def _load_all_models(patched):
     if patched:
         patch_sklearn()
-    models = []
+    models = {}
     for patch_infos in get_patch_map().values():
-        maybe_class = getattr(patch_infos[0][0][0], patch_infos[0][0][1])
+        maybe_class = getattr(patch_infos[0][0][0], patch_infos[0][0][1], None)
         if (
             maybe_class is not None
             and isclass(maybe_class)
             and issubclass(maybe_class, BaseEstimator)
         ):
-            models.append(maybe_class())
+            models[patch_infos[0][0][1]] = maybe_class
     if patched:
         unpatch_sklearn()
@@ -116,7 +117,20 @@ PATCHED_MODELS = _load_all_models(patched=True)
 UNPATCHED_MODELS = _load_all_models(patched=False)
-@pytest.mark.parametrize(("patched", "unpatched"), zip(PATCHED_MODELS, UNPATCHED_MODELS))
-def test_is_patched_instance(patched, unpatched):
+@pytest.mark.parametrize("estimator", UNPATCHED_MODELS.keys())
+def test_is_patched_instance(estimator):
+    patched = PATCHED_MODELS[estimator]
+    unpatched = UNPATCHED_MODELS[estimator]
     assert is_patched_instance(patched), f"{patched} is a patched instance"
     assert not is_patched_instance(unpatched), f"{unpatched} is an unpatched instance"
+@pytest.mark.parametrize("member", ["_onedal_cpu_supported", "_onedal_gpu_supported"])
+@pytest.mark.parametrize(
+    "name",
+    [i for i in PATCHED_MODELS.keys() if "sklearnex" in PATCHED_MODELS[i].__module__],
+)
+def test_onedal_supported_member(name, member):
+    patched = PATCHED_MODELS[name]
+    sig = str(inspect.signature(getattr(patched, member)))
+    assert "(self, method_name, *data)" == sig

sklearnex/neighbors/lof.py DELETED Viewed

@@ -1,436 +0,0 @@
-# ===============================================================================
-# Copyright 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ===============================================================================
-import warnings
-import numpy as np
-from sklearn.neighbors._lof import LocalOutlierFactor as sklearn_LocalOutlierFactor
-from .knn_unsupervised import NearestNeighbors
-try:
-    from sklearn.utils.metaestimators import available_if
-except ImportError:
-    pass
-from sklearn.utils import check_array
-from sklearn.utils.validation import check_is_fitted
-from daal4py.sklearn._utils import sklearn_check_version
-from .._config import config_context
-from .._device_offload import dispatch, wrap_output_data
-from .._utils import PatchingConditionsChain
-if sklearn_check_version("1.0"):
-    class LocalOutlierFactor(sklearn_LocalOutlierFactor):
-        if sklearn_check_version("1.2"):
-            _parameter_constraints: dict = {
-                **sklearn_LocalOutlierFactor._parameter_constraints
-            }
-        def __init__(
-            self,
-            n_neighbors=20,
-            *,
-            algorithm="auto",
-            leaf_size=30,
-            metric="minkowski",
-            p=2,
-            metric_params=None,
-            contamination="auto",
-            novelty=False,
-            n_jobs=None,
-        ):
-            super().__init__(
-                n_neighbors=n_neighbors,
-                algorithm=algorithm,
-                leaf_size=leaf_size,
-                metric=metric,
-                p=p,
-                metric_params=metric_params,
-                n_jobs=n_jobs,
-                contamination=contamination,
-                novelty=novelty,
-            )
-        def _fit(self, X, y, queue=None):
-            with config_context(target_offload=queue):
-                if sklearn_check_version("1.2"):
-                    self._validate_params()
-                self._knn = NearestNeighbors(
-                    n_neighbors=self.n_neighbors,
-                    algorithm=self.algorithm,
-                    leaf_size=self.leaf_size,
-                    metric=self.metric,
-                    p=self.p,
-                    metric_params=self.metric_params,
-                    n_jobs=self.n_jobs,
-                )
-                self._knn.fit(X)
-                if self.contamination != "auto":
-                    if not (0.0 < self.contamination <= 0.5):
-                        raise ValueError(
-                            "contamination must be in (0, 0.5], "
-                            "got: %f" % self.contamination
-                        )
-                n_samples = self._knn.n_samples_fit_
-                if self.n_neighbors > n_samples:
-                    warnings.warn(
-                        "n_neighbors (%s) is greater than the "
-                        "total number of samples (%s). n_neighbors "
-                        "will be set to (n_samples - 1) for estimation."
-                        % (self.n_neighbors, n_samples)
-                    )
-                self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))
-                self._distances_fit_X_, _neighbors_indices_fit_X_ = self._knn.kneighbors(
-                    n_neighbors=self.n_neighbors_
-                )
-                self._lrd = self._local_reachability_density(
-                    self._distances_fit_X_, _neighbors_indices_fit_X_
-                )
-                # Compute lof score over training samples to define offset_:
-                lrd_ratios_array = (
-                    self._lrd[_neighbors_indices_fit_X_] / self._lrd[:, np.newaxis]
-                )
-                self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)
-                if self.contamination == "auto":
-                    # inliers score around -1 (the higher, the less abnormal).
-                    self.offset_ = -1.5
-                else:
-                    self.offset_ = np.percentile(
-                        self.negative_outlier_factor_, 100.0 * self.contamination
-                    )
-                for knn_prop_name in self._knn.__dict__.keys():
-                    if knn_prop_name not in self.__dict__.keys():
-                        setattr(self, knn_prop_name, self._knn.__dict__[knn_prop_name])
-                return self
-        def fit(self, X, y=None):
-            return dispatch(
-                self,
-                "neighbors.LocalOutlierFactor.fit",
-                {
-                    "onedal": self.__class__._fit,
-                    "sklearn": None,
-                },
-                X,
-                y,
-            )
-        def _onedal_predict(self, X, queue=None):
-            with config_context(target_offload=queue):
-                check_is_fitted(self)
-                if X is not None:
-                    X = check_array(X, accept_sparse="csr")
-                    is_inlier = np.ones(X.shape[0], dtype=int)
-                    is_inlier[self.decision_function(X) < 0] = -1
-                else:
-                    is_inlier = np.ones(self._knn.n_samples_fit_, dtype=int)
-                    is_inlier[self.negative_outlier_factor_ < self.offset_] = -1
-                return is_inlier
-        @wrap_output_data
-        def _predict(self, X=None):
-            return dispatch(
-                self,
-                "neighbors.LocalOutlierFactor.predict",
-                {
-                    "onedal": self.__class__._onedal_predict,
-                    "sklearn": None,
-                },
-                X,
-            )
-        def _score_samples(self, X, queue=None):
-            with config_context(target_offload=queue):
-                check_is_fitted(self)
-                X = check_array(X, accept_sparse="csr")
-                distances_X, neighbors_indices_X = self._knn.kneighbors(
-                    X, n_neighbors=self.n_neighbors_
-                )
-                X_lrd = self._local_reachability_density(distances_X, neighbors_indices_X)
-                lrd_ratios_array = self._lrd[neighbors_indices_X] / X_lrd[:, np.newaxis]
-                # as bigger is better:
-                return -np.mean(lrd_ratios_array, axis=1)
-        def _check_novelty_score_samples(self):
-            if not self.novelty:
-                msg = (
-                    "score_samples is not available when novelty=False. The "
-                    "scores of the training samples are always available "
-                    "through the negative_outlier_factor_ attribute. Use "
-                    "novelty=True if you want to use LOF for novelty detection "
-                    "and compute score_samples for new unseen data."
-                )
-                raise AttributeError(msg)
-            return True
-        @available_if(_check_novelty_score_samples)
-        @wrap_output_data
-        def score_samples(self, X):
-            return dispatch(
-                self,
-                "neighbors.LocalOutlierFactor.score_samples",
-                {
-                    "onedal": self.__class__._score_samples,
-                    "sklearn": None,
-                },
-                X,
-            )
-        def _check_novelty_fit_predict(self):
-            if self.novelty:
-                msg = (
-                    "fit_predict is not available when novelty=True. Use "
-                    "novelty=False if you want to predict on the training set."
-                )
-                raise AttributeError(msg)
-            return True
-        def _fit_predict(self, X, y, queue=None):
-            with config_context(target_offload=queue):
-                return self.fit(X)._predict()
-        @available_if(_check_novelty_fit_predict)
-        @wrap_output_data
-        def fit_predict(self, X, y=None):
-            return dispatch(
-                self,
-                "neighbors.LocalOutlierFactor.fit_predict",
-                {
-                    "onedal": self.__class__._fit_predict,
-                    "sklearn": None,
-                },
-                X,
-                y,
-            )
-        def _onedal_gpu_supported(self, method_name, *data):
-            class_name = self.__class__.__name__
-            patching_status = PatchingConditionsChain(
-                f"sklearn.neighbors.{class_name}.{method_name}"
-            )
-            return patching_status
-        def _onedal_cpu_supported(self, method_name, *data):
-            class_name = self.__class__.__name__
-            patching_status = PatchingConditionsChain(
-                f"sklearn.neighbors.{class_name}.{method_name}"
-            )
-            return patching_status
-else:
-    class LocalOutlierFactor(sklearn_LocalOutlierFactor):
-        def __init__(
-            self,
-            n_neighbors=20,
-            *,
-            algorithm="auto",
-            leaf_size=30,
-            metric="minkowski",
-            p=2,
-            metric_params=None,
-            contamination="auto",
-            novelty=False,
-            n_jobs=None,
-        ):
-            super().__init__(
-                n_neighbors=n_neighbors,
-                algorithm=algorithm,
-                leaf_size=leaf_size,
-                metric=metric,
-                p=p,
-                metric_params=metric_params,
-                n_jobs=n_jobs,
-                contamination=contamination,
-                novelty=novelty,
-            )
-        def _fit(self, X, y=None, queue=None):
-            with config_context(target_offload=queue):
-                self._knn = NearestNeighbors(
-                    n_neighbors=self.n_neighbors,
-                    algorithm=self.algorithm,
-                    leaf_size=self.leaf_size,
-                    metric=self.metric,
-                    p=self.p,
-                    metric_params=self.metric_params,
-                    n_jobs=self.n_jobs,
-                )
-                self._knn.fit(X)
-                if self.contamination != "auto":
-                    if not (0.0 < self.contamination <= 0.5):
-                        raise ValueError(
-                            "contamination must be in (0, 0.5], "
-                            "got: %f" % self.contamination
-                        )
-                n_samples = self._knn.n_samples_fit_
-                if self.n_neighbors > n_samples:
-                    warnings.warn(
-                        "n_neighbors (%s) is greater than the "
-                        "total number of samples (%s). n_neighbors "
-                        "will be set to (n_samples - 1) for estimation."
-                        % (self.n_neighbors, n_samples)
-                    )
-                self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))
-                self._distances_fit_X_, _neighbors_indices_fit_X_ = self._knn.kneighbors(
-                    n_neighbors=self.n_neighbors_
-                )
-                self._lrd = self._local_reachability_density(
-                    self._distances_fit_X_, _neighbors_indices_fit_X_
-                )
-                # Compute lof score over training samples to define offset_:
-                lrd_ratios_array = (
-                    self._lrd[_neighbors_indices_fit_X_] / self._lrd[:, np.newaxis]
-                )
-                self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)
-                if self.contamination == "auto":
-                    # inliers score around -1 (the higher, the less abnormal).
-                    self.offset_ = -1.5
-                else:
-                    self.offset_ = np.percentile(
-                        self.negative_outlier_factor_, 100.0 * self.contamination
-                    )
-                for knn_prop_name in self._knn.__dict__.keys():
-                    if knn_prop_name not in self.__dict__.keys():
-                        setattr(self, knn_prop_name, self._knn.__dict__[knn_prop_name])
-                return self
-        def fit(self, X, y=None):
-            return dispatch(
-                self,
-                "neighbors.LocalOutlierFactor.fit",
-                {
-                    "onedal": self.__class__._fit,
-                    "sklearn": None,
-                },
-                X,
-                y,
-            )
-        def _onedal_predict(self, X, queue=None):
-            with config_context(target_offload=queue):
-                check_is_fitted(self)
-                if X is not None:
-                    X = check_array(X, accept_sparse="csr")
-                    is_inlier = np.ones(X.shape[0], dtype=int)
-                    is_inlier[self.decision_function(X) < 0] = -1
-                else:
-                    is_inlier = np.ones(self._knn.n_samples_fit_, dtype=int)
-                    is_inlier[self.negative_outlier_factor_ < self.offset_] = -1
-                return is_inlier
-        @wrap_output_data
-        def _predict(self, X=None):
-            return dispatch(
-                self,
-                "neighbors.LocalOutlierFactor.predict",
-                {
-                    "onedal": self.__class__._onedal_predict,
-                    "sklearn": None,
-                },
-                X,
-            )
-        def _onedal_score_samples(self, X, queue=None):
-            with config_context(target_offload=queue):
-                check_is_fitted(self)
-                X = check_array(X, accept_sparse="csr")
-                distances_X, neighbors_indices_X = self._knn.kneighbors(
-                    X, n_neighbors=self.n_neighbors_
-                )
-                X_lrd = self._local_reachability_density(distances_X, neighbors_indices_X)
-                lrd_ratios_array = self._lrd[neighbors_indices_X] / X_lrd[:, np.newaxis]
-                # as bigger is better:
-                return -np.mean(lrd_ratios_array, axis=1)
-        @wrap_output_data
-        def _score_samples(self, X):
-            if not self.novelty:
-                msg = (
-                    "score_samples is not available when novelty=False. The "
-                    "scores of the training samples are always available "
-                    "through the negative_outlier_factor_ attribute. Use "
-                    "novelty=True if you want to use LOF for novelty detection "
-                    "and compute score_samples for new unseen data."
-                )
-                raise AttributeError(msg)
-            return dispatch(
-                self,
-                "neighbors.LocalOutlierFactor.score_samples",
-                {
-                    "onedal": self.__class__._onedal_score_samples,
-                    "sklearn": None,
-                },
-                X,
-            )
-        def _onedal_fit_predict(self, X, y, queue=None):
-            with config_context(target_offload=queue):
-                return self.fit(X)._predict()
-        @wrap_output_data
-        def _fit_predict(self, X, y=None):
-            return dispatch(
-                self,
-                "neighbors.LocalOutlierFactor._onedal_fit_predict",
-                {
-                    "onedal": self.__class__._onedal_fit_predict,
-                    "sklearn": None,
-                },
-                X,
-                y,
-            )
-        def _onedal_gpu_supported(self, method_name, *data):
-            return True
-        def _onedal_cpu_supported(self, method_name, *data):
-            return True

sklearnex/preview/linear_model/tests/test_preview_logistic_regression.py DELETED Viewed

@@ -1,59 +0,0 @@
-# ===============================================================================
-# Copyright 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ===============================================================================
-import numpy as np
-import pytest
-from numpy.testing import assert_allclose
-from sklearn.datasets import load_breast_cancer
-from sklearn.metrics import accuracy_score
-from sklearn.model_selection import train_test_split
-from daal4py.sklearn._utils import daal_check_version
-from onedal.tests.utils._dataframes_support import (
-    _as_numpy,
-    _convert_to_dataframe,
-    get_dataframes_and_queues,
-)
-from sklearnex import config_context
-@pytest.mark.parametrize(
-    "dataframe,queue",
-    get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
-)
-def test_sklearnex_import(dataframe, queue):
-    from sklearnex.preview.linear_model import LogisticRegression
-    X, y = load_breast_cancer(return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, train_size=0.8, random_state=42
-    )
-    X_train = _convert_to_dataframe(X_train, sycl_queue=queue, target_df=dataframe)
-    y_train = _convert_to_dataframe(y_train, sycl_queue=queue, target_df=dataframe)
-    X_test = _convert_to_dataframe(X_test, sycl_queue=queue, target_df=dataframe)
-    model = LogisticRegression(fit_intercept=True, solver="newton-cg")
-    model.fit(X_train, y_train)
-    y_pred = _as_numpy(model.predict(X_test))
-    if daal_check_version((2024, "P", 1)):
-        assert "sklearnex" in model.__module__
-    else:
-        assert "daal4py" in model.__module__
-    # in case dataframe='numpy' algorithm should fallback to sklearn
-    # as cpu method is not implemented in onedal
-    if dataframe != "numpy" and daal_check_version((2024, "P", 1)):
-        assert hasattr(model, "_onedal_estimator")
-    assert accuracy_score(y_test, y_pred) > 0.95

{scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes