PyPI - scikit-learn-intelex - Versions diffs - 2025.6.0__py39-none-manylinux_2_28_x86_64.whl → 2025.7.0__py39-none-manylinux_2_28_x86_64.whl - Mend

scikit-learn-intelex 2025.6.0__py39-none-manylinux_2_28_x86_64.whl → 2025.7.0__py39-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (81) hide show

daal4py/_daal4py.cpython-39-x86_64-linux-gnu.so +0 -0
daal4py/mb/__init__.py +2 -2
daal4py/mb/gbt_convertors.py +258 -2
daal4py/mb/tree_based_builders.py +30 -5
daal4py/mpi_transceiver.cpython-39-x86_64-linux-gnu.so +0 -0
daal4py/sklearn/cluster/dbscan.py +2 -2
daal4py/sklearn/ensemble/_forest.py +2 -2
daal4py/sklearn/linear_model/logistic_path.py +21 -7
daal4py/sklearn/manifold/_t_sne.py +8 -2
daal4py/sklearn/metrics/_pairwise.py +1 -1
daal4py/sklearn/svm/svm.py +1 -1
daal4py/sklearn/utils/validation.py +15 -16
onedal/__init__.py +26 -1
onedal/_config.py +5 -4
onedal/_device_offload.py +72 -83
onedal/_onedal_py_dpc.cpython-39-x86_64-linux-gnu.so +0 -0
onedal/_onedal_py_host.cpython-39-x86_64-linux-gnu.so +0 -0
onedal/_onedal_py_spmd_dpc.cpython-39-x86_64-linux-gnu.so +0 -0
onedal/basic_statistics/basic_statistics.py +69 -5
onedal/basic_statistics/incremental_basic_statistics.py +19 -19
onedal/cluster/kmeans.py +17 -1
onedal/common/_backend.py +62 -37
onedal/common/hyperparameters.py +3 -0
onedal/common/tests/test_sycl.py +1 -1
onedal/covariance/covariance.py +2 -2
onedal/covariance/incremental_covariance.py +8 -16
onedal/datatypes/__init__.py +8 -1
onedal/datatypes/_data_conversion.py +25 -32
onedal/datatypes/_sycl_usm.py +78 -0
onedal/datatypes/tests/common.py +8 -3
onedal/datatypes/tests/test_data.py +45 -2
onedal/decomposition/incremental_pca.py +8 -17
onedal/decomposition/pca.py +6 -4
onedal/ensemble/forest.py +13 -5
onedal/linear_model/incremental_linear_model.py +34 -32
onedal/linear_model/linear_model.py +22 -30
onedal/linear_model/logistic_regression.py +9 -5
onedal/primitives/kernel_functions.py +64 -17
onedal/spmd/decomposition/incremental_pca.py +0 -6
onedal/svm/svm.py +0 -12
onedal/tests/test_common.py +1 -1
onedal/tests/utils/_dataframes_support.py +23 -6
onedal/tests/utils/_device_selection.py +1 -1
onedal/utils/_array_api.py +28 -26
onedal/utils/_sycl_queue_manager.py +57 -31
onedal/utils/_third_party.py +170 -0
onedal/utils/validation.py +11 -3
{scikit_learn_intelex-2025.6.0.dist-info → scikit_learn_intelex-2025.7.0.dist-info}/METADATA +2 -2
{scikit_learn_intelex-2025.6.0.dist-info → scikit_learn_intelex-2025.7.0.dist-info}/RECORD +80 -79
sklearnex/_config.py +17 -8
sklearnex/_device_offload.py +33 -23
sklearnex/_utils.py +23 -1
sklearnex/base.py +1 -1
sklearnex/basic_statistics/basic_statistics.py +5 -8
sklearnex/basic_statistics/incremental_basic_statistics.py +17 -13
sklearnex/covariance/incremental_covariance.py +12 -5
sklearnex/decomposition/pca.py +16 -9
sklearnex/decomposition/tests/test_pca.py +58 -1
sklearnex/dispatcher.py +12 -1
sklearnex/ensemble/_forest.py +9 -3
sklearnex/linear_model/incremental_linear.py +14 -5
sklearnex/linear_model/incremental_ridge.py +14 -7
sklearnex/linear_model/logistic_regression.py +3 -4
sklearnex/linear_model/ridge.py +9 -0
sklearnex/manifold/tests/test_tsne.py +1 -1
sklearnex/neighbors/_lof.py +1 -1
sklearnex/preview/covariance/tests/test_covariance.py +59 -6
sklearnex/spmd/covariance/incremental_covariance.py +0 -8
sklearnex/spmd/decomposition/incremental_pca.py +0 -7
sklearnex/spmd/linear_model/incremental_linear_model.py +0 -7
sklearnex/svm/_common.py +1 -1
sklearnex/tests/test_common.py +41 -2
sklearnex/tests/test_config.py +22 -0
sklearnex/tests/test_memory_usage.py +0 -8
sklearnex/tests/test_n_jobs_support.py +1 -1
sklearnex/tests/utils/base.py +1 -1
sklearnex/utils/validation.py +10 -10
onedal/utils/_dpep_helpers.py +0 -71
{scikit_learn_intelex-2025.6.0.dist-info → scikit_learn_intelex-2025.7.0.dist-info}/LICENSE.txt +0 -0
{scikit_learn_intelex-2025.6.0.dist-info → scikit_learn_intelex-2025.7.0.dist-info}/WHEEL +0 -0
{scikit_learn_intelex-2025.6.0.dist-info → scikit_learn_intelex-2025.7.0.dist-info}/top_level.txt +0 -0

daal4py/_daal4py.cpython-39-x86_64-linux-gnu.so CHANGED Viewed

Binary file

daal4py/mb/__init__.py CHANGED Viewed

@@ -29,8 +29,8 @@ def convert_model(model) -> "GBTDAALModel | LogisticDAALModel":
     prediction methods.
     It supports gradient-boosted decision tree ensembles (GBT) from the libraries
-    ``xgboost``, ``lightgbm``, and ``catboost``; and logistic regression (binary
-    and multinomial) models from scikit-learn.
+    ``xgboost``, ``lightgbm``, ``catboost``, and ``treelite``; and logistic regression
+    (binary and multinomial) models from scikit-learn.
     See the documentation of the classes :obj:`daal4py.mb.GBTDAALModel` and
     :obj:`daal4py.mb.LogisticDAALModel` for more details.

daal4py/mb/gbt_convertors.py CHANGED Viewed

@@ -15,6 +15,7 @@
 # ===============================================================================
 import json
+import warnings
 from collections import deque
 from copy import deepcopy
 from tempfile import NamedTemporaryFile
@@ -197,6 +198,52 @@ class Node:
             right_child=right_child,
         )
+    @staticmethod
+    def from_treelite_dict(dict_all_nodes: list[dict[str, Any]], node_id: int) -> "Node":
+        this_node = dict_all_nodes[node_id]
+        is_leaf = "leaf_value" in this_node
+        default_left = this_node.get("default_left", False)
+        n_children = 0
+        if "left_child" in this_node:
+            left_child = Node.from_treelite_dict(dict_all_nodes, this_node["left_child"])
+            n_children += 1 + left_child.n_children
+        else:
+            left_child = None
+        if "right_child" in this_node:
+            right_child = Node.from_treelite_dict(
+                dict_all_nodes, this_node["right_child"]
+            )
+            n_children += 1 + right_child.n_children
+        else:
+            right_child = None
+        value = this_node["leaf_value"] if is_leaf else this_node["threshold"]
+        if not is_leaf:
+            comp = this_node["comparison_op"]
+            if comp == "<=":
+                value = float(np.nextafter(value, np.inf))
+            elif comp in [">", ">="]:
+                left_child, right_child = right_child, left_child
+                default_left = not default_left
+                if comp == ">":
+                    value = float(np.nextafter(value, -np.inf))
+            elif comp != "<":
+                raise TypeError(
+                    f"Model to convert contains unsupported split type: {comp}."
+                )
+        return Node(
+            cover=this_node.get("sum_hess", 0.0),
+            is_leaf=is_leaf,
+            default_left=default_left,
+            feature=this_node.get("split_feature_id"),
+            value=value,
+            n_children=n_children,
+            left_child=left_child,
+            right_child=right_child,
+        )
     def get_value_closest_float_downward(self) -> np.float64:
         """Get the closest exact fp value smaller than self.value"""
         return np.nextafter(np.single(self.value), np.single(-np.inf))
@@ -310,6 +357,14 @@ class TreeList(list):
         return tl
+    @staticmethod
+    def from_treelite_dict(tl_json: Dict[str, Any]) -> "TreeList":
+        tl = TreeList()
+        for tree_id, tree_dict in enumerate(tl_json["trees"]):
+            root_node = Node.from_treelite_dict(tree_dict["nodes"], 0)
+            tl.append(TreeView(tree_id=tree_id, root_node=root_node))
+        return tl
     def __setitem__(self):
         raise NotImplementedError(
             "Use TreeList.from_*() methods to initialize a TreeList"
@@ -421,7 +476,9 @@ def get_gbt_model_from_lightgbm(model: Any, booster=None) -> Any:
     if "is_linear=1" in model_str:
         raise TypeError("Linear trees are not supported.")
     if "[boosting: dart]" in model_str:
-        raise TypeError("'Dart' booster is not supported.")
+        raise TypeError(
+            "'Dart' booster is not supported. Try converting to 'treelite' first."
+        )
     if "[boosting: rf]" in model_str:
         raise TypeError("Random forest boosters are not supported.")
     if ("[objective: lambdarank]" in model_str) or (
@@ -476,7 +533,9 @@ def get_gbt_model_from_xgboost(booster: Any, xgb_config=None) -> Any:
         xgb_config = get_xgboost_params(booster)
     if xgb_config["learner"]["learner_train_param"]["booster"] != "gbtree":
-        raise TypeError("Only 'gbtree' booster type is supported.")
+        raise TypeError(
+            "Only 'gbtree' booster type is supported. For DART, try converting to 'treelite' first."
+        )
     n_targets = xgb_config["learner"]["learner_model_param"].get("num_target")
     if n_targets is not None and int(n_targets) > 1:
@@ -920,3 +979,200 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
     if not add_intercept_to_each_node:
         intercept = booster.get_scale_and_bias()[1]
     return mb.model(base_score=intercept), shap_ready
+def get_gbt_model_from_treelite(
+    tl_model: "treelite.model.Model",
+) -> tuple[Any, int, int, bool]:
+    model_json = json.loads(tl_model.dump_as_json())
+    task_type = model_json["task_type"]
+    if task_type not in ["kBinaryClf", "kRegressor", "kMultiClf", "kIsolationForest"]:
+        raise TypeError(f"Model to convert is of unsupported type: {task_type}")
+    if model_json["num_target"] > 1:
+        raise TypeError("Multi-target models are not supported.")
+    if model_json["postprocessor"] == "multiclass_ova":
+        raise TypeError(
+            "Multi-class classification models that use One-Vs-All are not supported."
+        )
+    for tree in model_json["trees"]:
+        if tree["has_categorical_split"]:
+            raise TypeError("Models with categorical features are not supported.")
+    num_trees = tl_model.num_tree
+    if not num_trees:
+        raise TypeError("Model to convert contains no trees.")
+    # Note: the daal4py module always adds up the scores, but some models
+    # might average them instead. In such case, this turns the trees into
+    # additive ones by dividing the predictions by the number of nodes beforehand.
+    if model_json["average_tree_output"]:
+        divide_treelite_leaf_values_by_const(model_json, num_trees)
+    base_score = model_json["base_scores"]
+    num_class = model_json["num_class"][0]
+    num_feature = model_json["num_feature"]
+    if task_type == "kBinaryClf":
+        num_class = 2
+        if base_score:
+            base_score = list(1 / (1 + np.exp(-np.array(base_score))))
+    if num_class > 2:
+        shap_ready = False
+    else:
+        shap_ready = True
+        for tree in model_json["trees"]:
+            if not tree["nodes"][0].get("sum_hess", False):
+                shap_ready = False
+                break
+    # In the case of random forests for classification, it might work
+    # by averaging predictions without any link function, whereas
+    # daal4py assumes a logit link. In such case, it's not possible to
+    # convert them to daal4py's logic, but the model can still be used
+    # as a regressor that always outputs something between 0 and 1.
+    is_regression = "Clf" not in task_type
+    if not is_regression and model_json["postprocessor"] == "identity_multiclass":
+        is_regression = True
+        warnings.warn(
+            "Attempting to convert classification model which is not"
+            " based on gradient boosting. Will output a regression"
+            " model instead."
+        )
+    looks_like_random_forest = (
+        model_json["postprocessor"] == "identity_multiclass"
+        and len(model_json["base_scores"]) > 1
+        and task_type == "kMultiClf"
+    )
+    if looks_like_random_forest:
+        if num_class > 2 or len(base_score) > 2:
+            raise TypeError("Multi-class random forests are not supported.")
+        if len(model_json["num_class"]) > 1:
+            raise TypeError("Multi-output random forests are not supported.")
+        if len(base_score) == 2 and base_score[0]:
+            raise TypeError("Random forests with base scores are not supported.")
+    # In the case of binary random forests, it will always have leaf values
+    # for 2 classes, which is redundant as they sum to 1. daal4py requires
+    # only values for the positive class, so they need to be converted.
+    if looks_like_random_forest:
+        leave_only_last_treelite_leaf_value(model_json)
+        base_score = base_score[-1]
+    # In the case of multi-class classification models, if converted
+    # from xgboost, the order of the trees will be the same - i.e.
+    # sequences of one tree of each class, followed by another such
+    # sequence. But treelite could in theory also support building
+    # models where the trees are in a different order, in which case
+    # they will need to be reordered to match xgboost, since that's
+    # how daal4py handles them. And if there is an uneven number of
+    # trees per class, then will need to make up extra trees with
+    # zeros to accommodate it.
+    if task_type == "kMultiClf" and not looks_like_random_forest:
+        num_trees = len(model_json["trees"])
+        if (num_trees % num_class) != 0:
+            shap_ready = False
+            class_ids, num_trees_per_class = np.unique(
+                model_json["class_id"], return_counts=True
+            )
+            max_tree_per_class = num_trees_per_class.max()
+            num_tree_add_per_class = max_tree_per_class - num_trees_per_class
+            for class_ind in range(num_class):
+                for tree in range(num_tree_add_per_class[class_ind]):
+                    add_empty_tree_to_treelite_json(model_json, class_ind)
+        tree_class_orders = model_json["class_id"]
+        sequential_ids = np.arange(num_class)
+        num_trees = len(model_json["trees"])
+        assert (num_trees % num_class) == 0
+        if not np.array_equal(
+            tree_class_orders, np.tile(sequential_ids, int(num_trees / num_class))
+        ):
+            argsorted_class_indices = np.argsort(tree_class_orders)
+            per_class_indices = np.split(argsorted_class_indices, num_class)
+            correct_order = np.vstack(per_class_indices).reshape(-1, order="F")
+            model_json["trees"] = [model_json["trees"][ix] for ix in correct_order]
+            model_json["class_id"] = [model_json["class_id"][ix] for ix in correct_order]
+    # In the case of multi-class classification with base scores,
+    # since daal4py only supports scalar intercepts, this follows the
+    # same strategy as in catboost of dividing the intercepts equally
+    # among the number of trees
+    if task_type == "kMultiClf" and not looks_like_random_forest:
+        add_intercept_to_treelite_leafs(model_json, base_score)
+        base_score = None
+    if isinstance(base_score, list):
+        if len(base_score) == 1:
+            base_score = base_score[0]
+        else:
+            raise TypeError("Model to convert is malformed.")
+    tree_list = TreeList.from_treelite_dict(model_json)
+    return (
+        get_gbt_model_from_tree_list(
+            tree_list,
+            n_iterations=num_trees
+            / (
+                num_class
+                if task_type == "kMultiClf" and not looks_like_random_forest
+                else 1
+            ),
+            is_regression=is_regression,
+            n_features=num_feature,
+            n_classes=num_class,
+            base_score=base_score,
+        ),
+        num_class,
+        num_feature,
+        shap_ready,
+    )
+def divide_treelite_leaf_values_by_const(
+    tl_json: dict[str, Any], divisor: "int | float"
+) -> None:
+    for tree in tl_json["trees"]:
+        for node in tree["nodes"]:
+            if "leaf_value" in node:
+                if isinstance(node["leaf_value"], (list, tuple)):
+                    node["leaf_value"] = list(np.array(node["leaf_value"]) / divisor)
+                else:
+                    node["leaf_value"] /= divisor
+def leave_only_last_treelite_leaf_value(tl_json: dict[str, Any]) -> None:
+    for tree in tl_json["trees"]:
+        for node in tree["nodes"]:
+            if "leaf_value" in node:
+                assert len(node["leaf_value"]) == 2
+                node["leaf_value"] = node["leaf_value"][-1]
+def add_intercept_to_treelite_leafs(
+    tl_json: dict[str, Any], base_score: list[float]
+) -> None:
+    num_trees_per_class = len(tl_json["trees"]) / tl_json["num_class"][0]
+    for tree_index, tree in enumerate(tl_json["trees"]):
+        leaf_add = base_score[tl_json["class_id"][tree_index]] / num_trees_per_class
+        for node in tree["nodes"]:
+            if "leaf_value" in node:
+                node["leaf_value"] += leaf_add
+def add_empty_tree_to_treelite_json(tl_json: dict[str, Any], class_add: int) -> None:
+    tl_json["class_id"].append(class_add)
+    tl_json["trees"].append(
+        {
+            "num_nodes": 1,
+            "has_categorical_split": False,
+            "nodes": [
+                {
+                    "node_id": 0,
+                    "leaf_value": 0.0,
+                    "data_count": 0,
+                    "sum_hess": 0.0,
+                },
+            ],
+        }
+    )

daal4py/mb/tree_based_builders.py CHANGED Viewed

@@ -37,6 +37,7 @@ from .gbt_convertors import (
     get_catboost_params,
     get_gbt_model_from_catboost,
     get_gbt_model_from_lightgbm,
+    get_gbt_model_from_treelite,
     get_gbt_model_from_xgboost,
     get_lightgbm_params,
     get_xgboost_params,
@@ -63,7 +64,9 @@ def getFPType(X):
 class GBTDAALBaseModel:
     def __init__(self):
-        self.model_type: Optional[Literal["xgboost", "catboost", "lightgbm"]] = None
+        self.model_type: Optional[
+            Literal["xgboost", "catboost", "lightgbm", "treelite"]
+        ] = None
     @property
     def _is_regression(self):
@@ -86,6 +89,8 @@ class GBTDAALBaseModel:
         if self.n_classes_ <= 2:
             if objective_fun in ["binary:logistic", "binary:logitraw"]:
                 self.n_classes_ = 2
+            elif self.n_classes_ == 0:
+                self.n_classes_ = 1
         self.n_features_in_ = int(params["learner"]["learner_model_param"]["num_feature"])
@@ -113,6 +118,11 @@ class GBTDAALBaseModel:
         self.daal_model_, self.supports_shap_ = get_gbt_model_from_catboost(booster)
         self._get_params_from_catboost(catboost_params)
+    def _convert_model_from_treelite(self, tl_model):
+        self.daal_model_, self.n_classes_, self.n_features_in_, self.supports_shap_ = (
+            get_gbt_model_from_treelite(tl_model)
+        )
     def _convert_model(self, model):
         (submodule_name, class_name) = (
             model.__class__.__module__,
@@ -147,6 +157,14 @@ class GBTDAALBaseModel:
         # Build GBTDAALModel from CatBoost
         elif (submodule_name, class_name) == ("catboost.core", "CatBoost"):
             self._convert_model_from_catboost(model)
+        elif (submodule_name, class_name) == ("treelite.model", "Model"):
+            self._convert_model_from_treelite(model)
+        elif submodule_name.startswith("sklearn.ensemble"):
+            raise TypeError(
+                "Cannot convert scikit-learn models. Try converting to treelite "
+                "with 'treelite.sklearn.import_model' and then converting the "
+                "resulting TreeLite object."
+            )
         else:
             raise TypeError(f"Unknown model format {submodule_name}.{class_name}")
@@ -303,14 +321,21 @@ class GBTDAALModel(GBTDAALBaseModel):
     Can be created from model objects that meet all of the following criteria:
-    - Were produced from one of the following libraries: ``xgboost``, ``lightgbm``, or ``catboost``.
-      It can work with either the base booster classes of those libraries or with their
-      scikit-learn-compatible classes.
+    - Were produced from one of the following libraries: ``xgboost``, ``lightgbm``, ``catboost``,
+      or ``treelite`` (with some limitations). It can work with either the base booster classes
+      of those libraries or with their scikit-learn-compatible classes.
     - Do not use categorical features.
     - Are for regression or classification (e.g. no ranking). In the case of XGBoost objective
       ``binary:logitraw``, it will create a classification model out of it, and in the case of
       objective ``reg:logistic``, will create a regression model.
     - Are not multi-output models. Note that multi-class classification **is** supported.
+    - Are not multi-class random forests (multi-class gradient boosters are supported).
+    Note that while models from packages such as scikit-learn are not supported directly,
+    they can still be converted to this class by first converting them to TreeLite and
+    then converting to :obj:`GBTDAALModel` from that TreeLite model. In such case, note that
+    models corresponding to random forest binary classifiers will be treated as regressors
+    that predict probabilities.
     Parameters
     ----------
@@ -330,7 +355,7 @@ class GBTDAALModel(GBTDAALBaseModel):
     def __init__(self, model):
         self._convert_model(model)
-        for type_str in ("xgboost", "lightgbm", "catboost"):
+        for type_str in ("xgboost", "lightgbm", "catboost", "treelite"):
             if type_str in str(type(model)):
                 self.model_type = type_str
                 break

daal4py/mpi_transceiver.cpython-39-x86_64-linux-gnu.so CHANGED Viewed

Binary file

daal4py/sklearn/cluster/dbscan.py CHANGED Viewed

@@ -36,10 +36,10 @@ def _daal_dbscan(X, eps=0.5, min_samples=5, sample_weight=None):
     ww = make2d(sample_weight) if sample_weight is not None else None
     XX = make2d(X)
-    fpt = getFPType(XX)
+    fpt = getFPType(XX)  # codespell:ignore fpt
     alg = daal4py.dbscan(
         method="defaultDense",
-        fptype=fpt,
+        fptype=fpt,  # codespell:ignore fpt
         epsilon=float(eps),
         minObservations=int(min_samples),
         memorySavingMode=False,

daal4py/sklearn/ensemble/_forest.py CHANGED Viewed

@@ -679,8 +679,8 @@ class RandomForestClassifier(RandomForestClassifier_original, RandomForestBase):
         dfc_predictionResult = dfc_algorithm.compute(X, self.daal_model_)
         pred = dfc_predictionResult.probabilities
-        return pred
+        # TODO: fix probabilities out of [0, 1] interval on oneDAL side
+        return pred.clip(0.0, 1.0)
     def _daal_fit_classifier(self, X, y, sample_weight=None):
         y = check_array(y, ensure_2d=False, dtype=None)

daal4py/sklearn/linear_model/logistic_path.py CHANGED Viewed

@@ -359,14 +359,16 @@ def __logistic_regression_path(
             y_bin = np.ones(y.shape, dtype=X.dtype)
             # for compute_class_weight
-            if solver in ["lbfgs", "newton-cg"]:
+            if solver == "liblinear" or (
+                not sklearn_check_version("1.6") and solver not in ["lbfgs", "newton-cg"]
+            ):
+                mask_classes = np.array([-1, 1])
+                y_bin[~mask] = -1.0
+            else:
                 # HalfBinomialLoss, used for those solvers, represents y in [0, 1] instead
                 # of in [-1, 1].
                 mask_classes = np.array([0, 1])
                 y_bin[~mask] = 0.0
-            else:
-                mask_classes = np.array([-1, 1])
-                y_bin[~mask] = -1.0
         else:
             mask_classes = np.array([-1, 1])
             mask = y == pos_class
@@ -388,7 +390,11 @@ def __logistic_regression_path(
     else:
         if sklearn_check_version("1.1"):
-            if solver in ["sag", "saga", "lbfgs", "newton-cg"]:
+            if sklearn_check_version("1.6"):
+                solver_list = ["sag", "saga", "lbfgs", "newton-cg", "newton-cholesky"]
+            else:
+                solver_list = ["sag", "saga", "lbfgs", "newton-cg"]
+            if solver in solver_list:
                 # SAG, lbfgs and newton-cg multinomial solvers need LabelEncoder,
                 # not LabelBinarizer, i.e. y as a 1d-array of integers.
                 # LabelEncoder also saves memory compared to LabelBinarizer, especially
@@ -488,7 +494,11 @@ def __logistic_regression_path(
     if multi_class == "multinomial":
         # fmin_l_bfgs_b and newton-cg accepts only ravelled parameters.
-        if solver in ["lbfgs", "newton-cg"]:
+        if sklearn_check_version("1.6"):
+            solver_list = ["lbfgs", "newton-cg", "newton-cholesky"]
+        else:
+            solver_list = ["lbfgs", "newton-cg"]
+        if solver in solver_list:
             if _dal_ready and classes.size == 2:
                 w0 = w0[-1:, :]
             if sklearn_check_version("1.1"):
@@ -753,7 +763,11 @@ def __logistic_regression_path(
             else:
                 n_classes = max(2, classes.size)
                 if sklearn_check_version("1.1"):
-                    if solver in ["lbfgs", "newton-cg"]:
+                    if sklearn_check_version("1.6"):
+                        solver_list = ["lbfgs", "newton-cg", "newton-cholesky"]
+                    else:
+                        solver_list = ["lbfgs", "newton-cg"]
+                    if solver in solver_list:
                         multi_w0 = np.reshape(w0, (n_classes, -1), order="F")
                     else:
                         multi_w0 = w0

daal4py/sklearn/manifold/_t_sne.py CHANGED Viewed

@@ -66,7 +66,13 @@ class TSNE(BaseTSNE):
             [n_samples],
             [P.nnz],
             [self.n_iter_without_progress],
-            [self._max_iter if sklearn_check_version("1.5") else self.n_iter],
+            [
+                (
+                    self.max_iter
+                    if sklearn_check_version("1.7")
+                    else (self._max_iter if sklearn_check_version("1.5") else self.n_iter)
+                )
+            ],
         ]
         # Pass params to daal4py backend
@@ -130,7 +136,7 @@ class TSNE(BaseTSNE):
         if isinstance(self._init, str) and self._init == "pca" and issparse(X):
             raise TypeError(
-                "PCA initialization is currently not suported "
+                "PCA initialization is currently not supported "
                 "with the sparse input matrix. Use "
                 'init="random" instead.'
             )

daal4py/sklearn/metrics/_pairwise.py CHANGED Viewed

@@ -18,6 +18,7 @@ import warnings
 from functools import partial
 import numpy as np
+from joblib import effective_n_jobs
 from sklearn.exceptions import DataConversionWarning
 from sklearn.metrics import pairwise_distances as pairwise_distances_original
 from sklearn.metrics.pairwise import (
@@ -28,7 +29,6 @@ from sklearn.metrics.pairwise import (
     _parallel_pairwise,
     check_pairwise_arrays,
 )
-from sklearn.utils._joblib import effective_n_jobs
 from sklearn.utils.validation import check_non_negative
 try:

daal4py/sklearn/svm/svm.py CHANGED Viewed

@@ -158,7 +158,7 @@ def _daal4py_kf(kernel, X_fptype, gamma=1.0, is_sparse=False):
         kf = daal4py.kernel_function_linear(fptype=X_fptype, method=method)
     else:
         raise ValueError(
-            "_daal4py_fit received unexpected kernel specifiction {}.".format(kernel)
+            "_daal4py_fit received unexpected kernel specification {}.".format(kernel)
         )
     return kf

daal4py/sklearn/utils/validation.py CHANGED Viewed

@@ -72,25 +72,24 @@ def _assert_all_finite(
     # Data with small size has too big relative overhead
     # TODO: tune threshold size
-    if hasattr(X, "size"):
-        if X.size < 32768:
-            if sklearn_check_version("1.1"):
-                _sklearn_assert_all_finite(
-                    X,
-                    allow_nan=allow_nan,
-                    msg_dtype=msg_dtype,
-                    estimator_name=estimator_name,
-                    input_name=input_name,
-                )
-            else:
-                _sklearn_assert_all_finite(X, allow_nan=allow_nan, msg_dtype=msg_dtype)
-            return
     is_df = is_DataFrame(X)
+    if not (is_df or isinstance(X, np.ndarray)) or X.size < 32768:
+        if sklearn_check_version("1.1"):
+            _sklearn_assert_all_finite(
+                X,
+                allow_nan=allow_nan,
+                msg_dtype=msg_dtype,
+                estimator_name=estimator_name,
+                input_name=input_name,
+            )
+        else:
+            _sklearn_assert_all_finite(X, allow_nan=allow_nan, msg_dtype=msg_dtype)
+        return
     num_of_types = get_number_of_types(X)
     # if X is heterogeneous pandas.DataFrame then
-    # covert it to a list of arrays
+    # convert it to a list of arrays
     if is_df and num_of_types > 1:
         lst = []
         for idx in X:
@@ -330,7 +329,7 @@ def _daal_check_array(
     has_pd_integer_array = False
     if hasattr(array, "dtypes") and hasattr(array.dtypes, "__array__"):
         # throw warning if columns are sparse. If all columns are sparse, then
-        # array.sparse exists and sparsity will be perserved (later).
+        # array.sparse exists and sparsity will be preserved (later).
         with suppress(ImportError):
             from pandas import SparseDtype

onedal/__init__.py CHANGED Viewed

@@ -21,9 +21,34 @@ from daal4py.sklearn._utils import daal_check_version
 class Backend:
-    """Encapsulates the oneDAL pybind11 modules and provides a unified interface to it together with additional properties about dpc/spmd policies"""
     def __init__(self, backend_module, is_dpc, is_spmd):
+        """A unified interface to an available oneDAL pybind11 module.
+        This class encapsulates a oneDAL pybind11 module allowing for
+        dynamic access of module objects. This simplifies method and
+        attribute access in sklearnex without aliasing in sys.modules.
+        It contains additional attributes for inspection of the pybind11
+        module type (i.e. dpc or spmd) for use in policy creation.
+        Parameters
+        ----------
+            backend_module : oneDAL pybind11 module
+                Pybind11 module to be encapsulated.
+            is_dpc : bool
+                Flag describing if the module is Data Parallel C++-enabled.
+            is_spmd : bool
+                Flag describing if the module is single program, multiple
+                data enabled.
+        Returns
+        -------
+            self : Backend
+                Encapsulated oneDAL pybind11 module.
+        """
         self.backend = backend_module
         self.is_dpc = is_dpc
         self.is_spmd = is_spmd

onedal/_config.py CHANGED Viewed

@@ -59,13 +59,14 @@ def _get_onedal_threadlocal_config():
 def _get_config(copy=True):
-    """Retrieve current values for configuration set
-    by :func:`sklearnex.set_config`
+    """Retrieve current configuration set by :func:`sklearnex.set_config`
     Parameters
     ----------
     copy : bool, default=True
-        If False, a mutable view of the configuration is returned. Each thread
-        has a separate copy of the configuration.
+        If 'False', a mutable view of the configuration is returned. Each
+        thread has a separate copy of the configuration.
     Returns
     -------
     config : dict