scikit-learn-intelex 2025.6.1__py39-none-manylinux_2_28_x86_64.whl → 2025.8.0__py39-none-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- daal4py/_daal4py.cpython-39-x86_64-linux-gnu.so +0 -0
- daal4py/mb/__init__.py +2 -2
- daal4py/mb/gbt_convertors.py +258 -2
- daal4py/mb/tree_based_builders.py +30 -5
- daal4py/mpi_transceiver.cpython-39-x86_64-linux-gnu.so +0 -0
- daal4py/sklearn/cluster/dbscan.py +2 -2
- daal4py/sklearn/linear_model/logistic_loss.py +4 -4
- daal4py/sklearn/linear_model/logistic_path.py +132 -541
- daal4py/sklearn/manifold/_t_sne.py +1 -1
- daal4py/sklearn/svm/svm.py +1 -1
- daal4py/sklearn/utils/validation.py +15 -16
- onedal/__init__.py +26 -1
- onedal/_config.py +5 -4
- onedal/_device_offload.py +84 -94
- onedal/_onedal_py_dpc.cpython-39-x86_64-linux-gnu.so +0 -0
- onedal/_onedal_py_host.cpython-39-x86_64-linux-gnu.so +0 -0
- onedal/_onedal_py_spmd_dpc.cpython-39-x86_64-linux-gnu.so +0 -0
- onedal/basic_statistics/basic_statistics.py +96 -56
- onedal/basic_statistics/incremental_basic_statistics.py +42 -57
- onedal/basic_statistics/tests/test_basic_statistics.py +6 -7
- onedal/basic_statistics/tests/test_incremental_basic_statistics.py +11 -11
- onedal/cluster/dbscan.py +7 -25
- onedal/cluster/kmeans.py +18 -2
- onedal/common/_backend.py +62 -37
- onedal/common/hyperparameters.py +32 -9
- onedal/common/tests/test_sycl.py +6 -1
- onedal/covariance/covariance.py +10 -12
- onedal/covariance/incremental_covariance.py +8 -16
- onedal/datatypes/__init__.py +12 -2
- onedal/datatypes/_data_conversion.py +109 -70
- onedal/datatypes/_dlpack.py +61 -0
- onedal/datatypes/_sycl_usm.py +63 -0
- onedal/datatypes/tests/common.py +8 -3
- onedal/datatypes/tests/test_data.py +61 -19
- onedal/decomposition/incremental_pca.py +8 -17
- onedal/decomposition/pca.py +6 -4
- onedal/ensemble/forest.py +15 -9
- onedal/linear_model/incremental_linear_model.py +65 -175
- onedal/linear_model/linear_model.py +87 -208
- onedal/linear_model/logistic_regression.py +14 -15
- onedal/linear_model/tests/test_linear_regression.py +10 -17
- onedal/primitives/kernel_functions.py +64 -17
- onedal/spmd/decomposition/incremental_pca.py +0 -6
- onedal/spmd/ensemble/forest.py +18 -0
- onedal/svm/svm.py +0 -12
- onedal/tests/test_common.py +15 -1
- onedal/tests/utils/_dataframes_support.py +23 -6
- onedal/tests/utils/_device_selection.py +1 -1
- onedal/utils/_array_api.py +25 -25
- onedal/utils/_sycl_queue_manager.py +106 -54
- onedal/utils/_third_party.py +220 -0
- onedal/utils/validation.py +11 -3
- {scikit_learn_intelex-2025.6.1.dist-info → scikit_learn_intelex-2025.8.0.dist-info}/METADATA +2 -2
- {scikit_learn_intelex-2025.6.1.dist-info → scikit_learn_intelex-2025.8.0.dist-info}/RECORD +110 -108
- sklearnex/__init__.py +2 -1
- sklearnex/_config.py +17 -8
- sklearnex/_device_offload.py +45 -34
- sklearnex/_utils.py +52 -3
- sklearnex/base.py +1 -1
- sklearnex/basic_statistics/basic_statistics.py +31 -45
- sklearnex/basic_statistics/incremental_basic_statistics.py +50 -55
- sklearnex/cluster/dbscan.py +30 -27
- sklearnex/cluster/k_means.py +1 -0
- sklearnex/covariance/incremental_covariance.py +14 -5
- sklearnex/decomposition/pca.py +21 -9
- sklearnex/decomposition/tests/test_pca.py +54 -2
- sklearnex/dispatcher.py +13 -7
- sklearnex/ensemble/_forest.py +17 -5
- sklearnex/ensemble/tests/test_forest.py +22 -7
- sklearnex/linear_model/coordinate_descent.py +2 -0
- sklearnex/linear_model/incremental_linear.py +90 -73
- sklearnex/linear_model/incremental_ridge.py +83 -60
- sklearnex/linear_model/linear.py +53 -41
- sklearnex/linear_model/logistic_regression.py +11 -4
- sklearnex/linear_model/ridge.py +47 -27
- sklearnex/linear_model/tests/test_linear.py +27 -61
- sklearnex/linear_model/tests/test_logreg.py +448 -5
- sklearnex/manifold/tests/test_tsne.py +1 -1
- sklearnex/neighbors/_lof.py +1 -1
- sklearnex/neighbors/common.py +8 -6
- sklearnex/preview/covariance/covariance.py +3 -4
- sklearnex/preview/covariance/tests/test_covariance.py +54 -8
- sklearnex/spmd/basic_statistics/basic_statistics.py +7 -4
- sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +6 -3
- sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +12 -8
- sklearnex/spmd/cluster/dbscan.py +2 -26
- sklearnex/spmd/covariance/incremental_covariance.py +0 -8
- sklearnex/spmd/covariance/tests/test_covariance_spmd.py +11 -4
- sklearnex/spmd/decomposition/incremental_pca.py +0 -7
- sklearnex/spmd/ensemble/forest.py +230 -2
- sklearnex/spmd/ensemble/tests/test_forest_spmd.py +21 -4
- sklearnex/spmd/linear_model/incremental_linear_model.py +0 -7
- sklearnex/spmd/linear_model/linear_model.py +7 -4
- sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +16 -8
- sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +12 -4
- sklearnex/svm/_common.py +1 -1
- sklearnex/svm/tests/test_svm.py +31 -0
- sklearnex/tests/test_common.py +41 -2
- sklearnex/tests/test_config.py +77 -6
- sklearnex/tests/test_hyperparameters.py +85 -17
- sklearnex/tests/test_memory_usage.py +0 -8
- sklearnex/tests/test_n_jobs_support.py +1 -1
- sklearnex/tests/test_patching.py +13 -13
- sklearnex/tests/utils/base.py +4 -22
- sklearnex/utils/_array_api.py +71 -0
- sklearnex/utils/parallel.py +68 -30
- sklearnex/utils/validation.py +29 -11
- onedal/utils/_dpep_helpers.py +0 -71
- {scikit_learn_intelex-2025.6.1.dist-info → scikit_learn_intelex-2025.8.0.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2025.6.1.dist-info → scikit_learn_intelex-2025.8.0.dist-info}/WHEEL +0 -0
- {scikit_learn_intelex-2025.6.1.dist-info → scikit_learn_intelex-2025.8.0.dist-info}/top_level.txt +0 -0
|
Binary file
|
daal4py/mb/__init__.py
CHANGED
|
@@ -29,8 +29,8 @@ def convert_model(model) -> "GBTDAALModel | LogisticDAALModel":
|
|
|
29
29
|
prediction methods.
|
|
30
30
|
|
|
31
31
|
It supports gradient-boosted decision tree ensembles (GBT) from the libraries
|
|
32
|
-
``xgboost``, ``lightgbm``, and ``
|
|
33
|
-
and multinomial) models from scikit-learn.
|
|
32
|
+
``xgboost``, ``lightgbm``, ``catboost``, and ``treelite``; and logistic regression
|
|
33
|
+
(binary and multinomial) models from scikit-learn.
|
|
34
34
|
|
|
35
35
|
See the documentation of the classes :obj:`daal4py.mb.GBTDAALModel` and
|
|
36
36
|
:obj:`daal4py.mb.LogisticDAALModel` for more details.
|
daal4py/mb/gbt_convertors.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
# ===============================================================================
|
|
16
16
|
|
|
17
17
|
import json
|
|
18
|
+
import warnings
|
|
18
19
|
from collections import deque
|
|
19
20
|
from copy import deepcopy
|
|
20
21
|
from tempfile import NamedTemporaryFile
|
|
@@ -197,6 +198,52 @@ class Node:
|
|
|
197
198
|
right_child=right_child,
|
|
198
199
|
)
|
|
199
200
|
|
|
201
|
+
@staticmethod
|
|
202
|
+
def from_treelite_dict(dict_all_nodes: list[dict[str, Any]], node_id: int) -> "Node":
|
|
203
|
+
this_node = dict_all_nodes[node_id]
|
|
204
|
+
is_leaf = "leaf_value" in this_node
|
|
205
|
+
default_left = this_node.get("default_left", False)
|
|
206
|
+
|
|
207
|
+
n_children = 0
|
|
208
|
+
if "left_child" in this_node:
|
|
209
|
+
left_child = Node.from_treelite_dict(dict_all_nodes, this_node["left_child"])
|
|
210
|
+
n_children += 1 + left_child.n_children
|
|
211
|
+
else:
|
|
212
|
+
left_child = None
|
|
213
|
+
if "right_child" in this_node:
|
|
214
|
+
right_child = Node.from_treelite_dict(
|
|
215
|
+
dict_all_nodes, this_node["right_child"]
|
|
216
|
+
)
|
|
217
|
+
n_children += 1 + right_child.n_children
|
|
218
|
+
else:
|
|
219
|
+
right_child = None
|
|
220
|
+
|
|
221
|
+
value = this_node["leaf_value"] if is_leaf else this_node["threshold"]
|
|
222
|
+
if not is_leaf:
|
|
223
|
+
comp = this_node["comparison_op"]
|
|
224
|
+
if comp == "<=":
|
|
225
|
+
value = float(np.nextafter(value, np.inf))
|
|
226
|
+
elif comp in [">", ">="]:
|
|
227
|
+
left_child, right_child = right_child, left_child
|
|
228
|
+
default_left = not default_left
|
|
229
|
+
if comp == ">":
|
|
230
|
+
value = float(np.nextafter(value, -np.inf))
|
|
231
|
+
elif comp != "<":
|
|
232
|
+
raise TypeError(
|
|
233
|
+
f"Model to convert contains unsupported split type: {comp}."
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
return Node(
|
|
237
|
+
cover=this_node.get("sum_hess", 0.0),
|
|
238
|
+
is_leaf=is_leaf,
|
|
239
|
+
default_left=default_left,
|
|
240
|
+
feature=this_node.get("split_feature_id"),
|
|
241
|
+
value=value,
|
|
242
|
+
n_children=n_children,
|
|
243
|
+
left_child=left_child,
|
|
244
|
+
right_child=right_child,
|
|
245
|
+
)
|
|
246
|
+
|
|
200
247
|
def get_value_closest_float_downward(self) -> np.float64:
|
|
201
248
|
"""Get the closest exact fp value smaller than self.value"""
|
|
202
249
|
return np.nextafter(np.single(self.value), np.single(-np.inf))
|
|
@@ -310,6 +357,14 @@ class TreeList(list):
|
|
|
310
357
|
|
|
311
358
|
return tl
|
|
312
359
|
|
|
360
|
+
@staticmethod
|
|
361
|
+
def from_treelite_dict(tl_json: Dict[str, Any]) -> "TreeList":
|
|
362
|
+
tl = TreeList()
|
|
363
|
+
for tree_id, tree_dict in enumerate(tl_json["trees"]):
|
|
364
|
+
root_node = Node.from_treelite_dict(tree_dict["nodes"], 0)
|
|
365
|
+
tl.append(TreeView(tree_id=tree_id, root_node=root_node))
|
|
366
|
+
return tl
|
|
367
|
+
|
|
313
368
|
def __setitem__(self):
|
|
314
369
|
raise NotImplementedError(
|
|
315
370
|
"Use TreeList.from_*() methods to initialize a TreeList"
|
|
@@ -421,7 +476,9 @@ def get_gbt_model_from_lightgbm(model: Any, booster=None) -> Any:
|
|
|
421
476
|
if "is_linear=1" in model_str:
|
|
422
477
|
raise TypeError("Linear trees are not supported.")
|
|
423
478
|
if "[boosting: dart]" in model_str:
|
|
424
|
-
raise TypeError(
|
|
479
|
+
raise TypeError(
|
|
480
|
+
"'Dart' booster is not supported. Try converting to 'treelite' first."
|
|
481
|
+
)
|
|
425
482
|
if "[boosting: rf]" in model_str:
|
|
426
483
|
raise TypeError("Random forest boosters are not supported.")
|
|
427
484
|
if ("[objective: lambdarank]" in model_str) or (
|
|
@@ -476,7 +533,9 @@ def get_gbt_model_from_xgboost(booster: Any, xgb_config=None) -> Any:
|
|
|
476
533
|
xgb_config = get_xgboost_params(booster)
|
|
477
534
|
|
|
478
535
|
if xgb_config["learner"]["learner_train_param"]["booster"] != "gbtree":
|
|
479
|
-
raise TypeError(
|
|
536
|
+
raise TypeError(
|
|
537
|
+
"Only 'gbtree' booster type is supported. For DART, try converting to 'treelite' first."
|
|
538
|
+
)
|
|
480
539
|
|
|
481
540
|
n_targets = xgb_config["learner"]["learner_model_param"].get("num_target")
|
|
482
541
|
if n_targets is not None and int(n_targets) > 1:
|
|
@@ -920,3 +979,200 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
920
979
|
if not add_intercept_to_each_node:
|
|
921
980
|
intercept = booster.get_scale_and_bias()[1]
|
|
922
981
|
return mb.model(base_score=intercept), shap_ready
|
|
982
|
+
|
|
983
|
+
|
|
984
|
+
def get_gbt_model_from_treelite(
|
|
985
|
+
tl_model: "treelite.model.Model",
|
|
986
|
+
) -> tuple[Any, int, int, bool]:
|
|
987
|
+
model_json = json.loads(tl_model.dump_as_json())
|
|
988
|
+
task_type = model_json["task_type"]
|
|
989
|
+
if task_type not in ["kBinaryClf", "kRegressor", "kMultiClf", "kIsolationForest"]:
|
|
990
|
+
raise TypeError(f"Model to convert is of unsupported type: {task_type}")
|
|
991
|
+
if model_json["num_target"] > 1:
|
|
992
|
+
raise TypeError("Multi-target models are not supported.")
|
|
993
|
+
if model_json["postprocessor"] == "multiclass_ova":
|
|
994
|
+
raise TypeError(
|
|
995
|
+
"Multi-class classification models that use One-Vs-All are not supported."
|
|
996
|
+
)
|
|
997
|
+
for tree in model_json["trees"]:
|
|
998
|
+
if tree["has_categorical_split"]:
|
|
999
|
+
raise TypeError("Models with categorical features are not supported.")
|
|
1000
|
+
num_trees = tl_model.num_tree
|
|
1001
|
+
if not num_trees:
|
|
1002
|
+
raise TypeError("Model to convert contains no trees.")
|
|
1003
|
+
|
|
1004
|
+
# Note: the daal4py module always adds up the scores, but some models
|
|
1005
|
+
# might average them instead. In such case, this turns the trees into
|
|
1006
|
+
# additive ones by dividing the predictions by the number of nodes beforehand.
|
|
1007
|
+
if model_json["average_tree_output"]:
|
|
1008
|
+
divide_treelite_leaf_values_by_const(model_json, num_trees)
|
|
1009
|
+
|
|
1010
|
+
base_score = model_json["base_scores"]
|
|
1011
|
+
num_class = model_json["num_class"][0]
|
|
1012
|
+
num_feature = model_json["num_feature"]
|
|
1013
|
+
|
|
1014
|
+
if task_type == "kBinaryClf":
|
|
1015
|
+
num_class = 2
|
|
1016
|
+
if base_score:
|
|
1017
|
+
base_score = list(1 / (1 + np.exp(-np.array(base_score))))
|
|
1018
|
+
|
|
1019
|
+
if num_class > 2:
|
|
1020
|
+
shap_ready = False
|
|
1021
|
+
else:
|
|
1022
|
+
shap_ready = True
|
|
1023
|
+
for tree in model_json["trees"]:
|
|
1024
|
+
if not tree["nodes"][0].get("sum_hess", False):
|
|
1025
|
+
shap_ready = False
|
|
1026
|
+
break
|
|
1027
|
+
|
|
1028
|
+
# In the case of random forests for classification, it might work
|
|
1029
|
+
# by averaging predictions without any link function, whereas
|
|
1030
|
+
# daal4py assumes a logit link. In such case, it's not possible to
|
|
1031
|
+
# convert them to daal4py's logic, but the model can still be used
|
|
1032
|
+
# as a regressor that always outputs something between 0 and 1.
|
|
1033
|
+
is_regression = "Clf" not in task_type
|
|
1034
|
+
if not is_regression and model_json["postprocessor"] == "identity_multiclass":
|
|
1035
|
+
is_regression = True
|
|
1036
|
+
warnings.warn(
|
|
1037
|
+
"Attempting to convert classification model which is not"
|
|
1038
|
+
" based on gradient boosting. Will output a regression"
|
|
1039
|
+
" model instead."
|
|
1040
|
+
)
|
|
1041
|
+
|
|
1042
|
+
looks_like_random_forest = (
|
|
1043
|
+
model_json["postprocessor"] == "identity_multiclass"
|
|
1044
|
+
and len(model_json["base_scores"]) > 1
|
|
1045
|
+
and task_type == "kMultiClf"
|
|
1046
|
+
)
|
|
1047
|
+
if looks_like_random_forest:
|
|
1048
|
+
if num_class > 2 or len(base_score) > 2:
|
|
1049
|
+
raise TypeError("Multi-class random forests are not supported.")
|
|
1050
|
+
if len(model_json["num_class"]) > 1:
|
|
1051
|
+
raise TypeError("Multi-output random forests are not supported.")
|
|
1052
|
+
if len(base_score) == 2 and base_score[0]:
|
|
1053
|
+
raise TypeError("Random forests with base scores are not supported.")
|
|
1054
|
+
|
|
1055
|
+
# In the case of binary random forests, it will always have leaf values
|
|
1056
|
+
# for 2 classes, which is redundant as they sum to 1. daal4py requires
|
|
1057
|
+
# only values for the positive class, so they need to be converted.
|
|
1058
|
+
if looks_like_random_forest:
|
|
1059
|
+
leave_only_last_treelite_leaf_value(model_json)
|
|
1060
|
+
base_score = base_score[-1]
|
|
1061
|
+
|
|
1062
|
+
# In the case of multi-class classification models, if converted
|
|
1063
|
+
# from xgboost, the order of the trees will be the same - i.e.
|
|
1064
|
+
# sequences of one tree of each class, followed by another such
|
|
1065
|
+
# sequence. But treelite could in theory also support building
|
|
1066
|
+
# models where the trees are in a different order, in which case
|
|
1067
|
+
# they will need to be reordered to match xgboost, since that's
|
|
1068
|
+
# how daal4py handles them. And if there is an uneven number of
|
|
1069
|
+
# trees per class, then will need to make up extra trees with
|
|
1070
|
+
# zeros to accommodate it.
|
|
1071
|
+
if task_type == "kMultiClf" and not looks_like_random_forest:
|
|
1072
|
+
num_trees = len(model_json["trees"])
|
|
1073
|
+
if (num_trees % num_class) != 0:
|
|
1074
|
+
shap_ready = False
|
|
1075
|
+
class_ids, num_trees_per_class = np.unique(
|
|
1076
|
+
model_json["class_id"], return_counts=True
|
|
1077
|
+
)
|
|
1078
|
+
max_tree_per_class = num_trees_per_class.max()
|
|
1079
|
+
num_tree_add_per_class = max_tree_per_class - num_trees_per_class
|
|
1080
|
+
for class_ind in range(num_class):
|
|
1081
|
+
for tree in range(num_tree_add_per_class[class_ind]):
|
|
1082
|
+
add_empty_tree_to_treelite_json(model_json, class_ind)
|
|
1083
|
+
|
|
1084
|
+
tree_class_orders = model_json["class_id"]
|
|
1085
|
+
sequential_ids = np.arange(num_class)
|
|
1086
|
+
num_trees = len(model_json["trees"])
|
|
1087
|
+
assert (num_trees % num_class) == 0
|
|
1088
|
+
if not np.array_equal(
|
|
1089
|
+
tree_class_orders, np.tile(sequential_ids, int(num_trees / num_class))
|
|
1090
|
+
):
|
|
1091
|
+
argsorted_class_indices = np.argsort(tree_class_orders)
|
|
1092
|
+
per_class_indices = np.split(argsorted_class_indices, num_class)
|
|
1093
|
+
correct_order = np.vstack(per_class_indices).reshape(-1, order="F")
|
|
1094
|
+
model_json["trees"] = [model_json["trees"][ix] for ix in correct_order]
|
|
1095
|
+
model_json["class_id"] = [model_json["class_id"][ix] for ix in correct_order]
|
|
1096
|
+
|
|
1097
|
+
# In the case of multi-class classification with base scores,
|
|
1098
|
+
# since daal4py only supports scalar intercepts, this follows the
|
|
1099
|
+
# same strategy as in catboost of dividing the intercepts equally
|
|
1100
|
+
# among the number of trees
|
|
1101
|
+
if task_type == "kMultiClf" and not looks_like_random_forest:
|
|
1102
|
+
add_intercept_to_treelite_leafs(model_json, base_score)
|
|
1103
|
+
base_score = None
|
|
1104
|
+
|
|
1105
|
+
if isinstance(base_score, list):
|
|
1106
|
+
if len(base_score) == 1:
|
|
1107
|
+
base_score = base_score[0]
|
|
1108
|
+
else:
|
|
1109
|
+
raise TypeError("Model to convert is malformed.")
|
|
1110
|
+
|
|
1111
|
+
tree_list = TreeList.from_treelite_dict(model_json)
|
|
1112
|
+
return (
|
|
1113
|
+
get_gbt_model_from_tree_list(
|
|
1114
|
+
tree_list,
|
|
1115
|
+
n_iterations=num_trees
|
|
1116
|
+
/ (
|
|
1117
|
+
num_class
|
|
1118
|
+
if task_type == "kMultiClf" and not looks_like_random_forest
|
|
1119
|
+
else 1
|
|
1120
|
+
),
|
|
1121
|
+
is_regression=is_regression,
|
|
1122
|
+
n_features=num_feature,
|
|
1123
|
+
n_classes=num_class,
|
|
1124
|
+
base_score=base_score,
|
|
1125
|
+
),
|
|
1126
|
+
num_class,
|
|
1127
|
+
num_feature,
|
|
1128
|
+
shap_ready,
|
|
1129
|
+
)
|
|
1130
|
+
|
|
1131
|
+
|
|
1132
|
+
def divide_treelite_leaf_values_by_const(
|
|
1133
|
+
tl_json: dict[str, Any], divisor: "int | float"
|
|
1134
|
+
) -> None:
|
|
1135
|
+
for tree in tl_json["trees"]:
|
|
1136
|
+
for node in tree["nodes"]:
|
|
1137
|
+
if "leaf_value" in node:
|
|
1138
|
+
if isinstance(node["leaf_value"], (list, tuple)):
|
|
1139
|
+
node["leaf_value"] = list(np.array(node["leaf_value"]) / divisor)
|
|
1140
|
+
else:
|
|
1141
|
+
node["leaf_value"] /= divisor
|
|
1142
|
+
|
|
1143
|
+
|
|
1144
|
+
def leave_only_last_treelite_leaf_value(tl_json: dict[str, Any]) -> None:
|
|
1145
|
+
for tree in tl_json["trees"]:
|
|
1146
|
+
for node in tree["nodes"]:
|
|
1147
|
+
if "leaf_value" in node:
|
|
1148
|
+
assert len(node["leaf_value"]) == 2
|
|
1149
|
+
node["leaf_value"] = node["leaf_value"][-1]
|
|
1150
|
+
|
|
1151
|
+
|
|
1152
|
+
def add_intercept_to_treelite_leafs(
|
|
1153
|
+
tl_json: dict[str, Any], base_score: list[float]
|
|
1154
|
+
) -> None:
|
|
1155
|
+
num_trees_per_class = len(tl_json["trees"]) / tl_json["num_class"][0]
|
|
1156
|
+
for tree_index, tree in enumerate(tl_json["trees"]):
|
|
1157
|
+
leaf_add = base_score[tl_json["class_id"][tree_index]] / num_trees_per_class
|
|
1158
|
+
for node in tree["nodes"]:
|
|
1159
|
+
if "leaf_value" in node:
|
|
1160
|
+
node["leaf_value"] += leaf_add
|
|
1161
|
+
|
|
1162
|
+
|
|
1163
|
+
def add_empty_tree_to_treelite_json(tl_json: dict[str, Any], class_add: int) -> None:
|
|
1164
|
+
tl_json["class_id"].append(class_add)
|
|
1165
|
+
tl_json["trees"].append(
|
|
1166
|
+
{
|
|
1167
|
+
"num_nodes": 1,
|
|
1168
|
+
"has_categorical_split": False,
|
|
1169
|
+
"nodes": [
|
|
1170
|
+
{
|
|
1171
|
+
"node_id": 0,
|
|
1172
|
+
"leaf_value": 0.0,
|
|
1173
|
+
"data_count": 0,
|
|
1174
|
+
"sum_hess": 0.0,
|
|
1175
|
+
},
|
|
1176
|
+
],
|
|
1177
|
+
}
|
|
1178
|
+
)
|
|
@@ -37,6 +37,7 @@ from .gbt_convertors import (
|
|
|
37
37
|
get_catboost_params,
|
|
38
38
|
get_gbt_model_from_catboost,
|
|
39
39
|
get_gbt_model_from_lightgbm,
|
|
40
|
+
get_gbt_model_from_treelite,
|
|
40
41
|
get_gbt_model_from_xgboost,
|
|
41
42
|
get_lightgbm_params,
|
|
42
43
|
get_xgboost_params,
|
|
@@ -63,7 +64,9 @@ def getFPType(X):
|
|
|
63
64
|
|
|
64
65
|
class GBTDAALBaseModel:
|
|
65
66
|
def __init__(self):
|
|
66
|
-
self.model_type: Optional[
|
|
67
|
+
self.model_type: Optional[
|
|
68
|
+
Literal["xgboost", "catboost", "lightgbm", "treelite"]
|
|
69
|
+
] = None
|
|
67
70
|
|
|
68
71
|
@property
|
|
69
72
|
def _is_regression(self):
|
|
@@ -86,6 +89,8 @@ class GBTDAALBaseModel:
|
|
|
86
89
|
if self.n_classes_ <= 2:
|
|
87
90
|
if objective_fun in ["binary:logistic", "binary:logitraw"]:
|
|
88
91
|
self.n_classes_ = 2
|
|
92
|
+
elif self.n_classes_ == 0:
|
|
93
|
+
self.n_classes_ = 1
|
|
89
94
|
|
|
90
95
|
self.n_features_in_ = int(params["learner"]["learner_model_param"]["num_feature"])
|
|
91
96
|
|
|
@@ -113,6 +118,11 @@ class GBTDAALBaseModel:
|
|
|
113
118
|
self.daal_model_, self.supports_shap_ = get_gbt_model_from_catboost(booster)
|
|
114
119
|
self._get_params_from_catboost(catboost_params)
|
|
115
120
|
|
|
121
|
+
def _convert_model_from_treelite(self, tl_model):
|
|
122
|
+
self.daal_model_, self.n_classes_, self.n_features_in_, self.supports_shap_ = (
|
|
123
|
+
get_gbt_model_from_treelite(tl_model)
|
|
124
|
+
)
|
|
125
|
+
|
|
116
126
|
def _convert_model(self, model):
|
|
117
127
|
(submodule_name, class_name) = (
|
|
118
128
|
model.__class__.__module__,
|
|
@@ -147,6 +157,14 @@ class GBTDAALBaseModel:
|
|
|
147
157
|
# Build GBTDAALModel from CatBoost
|
|
148
158
|
elif (submodule_name, class_name) == ("catboost.core", "CatBoost"):
|
|
149
159
|
self._convert_model_from_catboost(model)
|
|
160
|
+
elif (submodule_name, class_name) == ("treelite.model", "Model"):
|
|
161
|
+
self._convert_model_from_treelite(model)
|
|
162
|
+
elif submodule_name.startswith("sklearn.ensemble"):
|
|
163
|
+
raise TypeError(
|
|
164
|
+
"Cannot convert scikit-learn models. Try converting to treelite "
|
|
165
|
+
"with 'treelite.sklearn.import_model' and then converting the "
|
|
166
|
+
"resulting TreeLite object."
|
|
167
|
+
)
|
|
150
168
|
else:
|
|
151
169
|
raise TypeError(f"Unknown model format {submodule_name}.{class_name}")
|
|
152
170
|
|
|
@@ -303,14 +321,21 @@ class GBTDAALModel(GBTDAALBaseModel):
|
|
|
303
321
|
|
|
304
322
|
Can be created from model objects that meet all of the following criteria:
|
|
305
323
|
|
|
306
|
-
- Were produced from one of the following libraries: ``xgboost``, ``lightgbm``,
|
|
307
|
-
It can work with either the base booster classes
|
|
308
|
-
scikit-learn-compatible classes.
|
|
324
|
+
- Were produced from one of the following libraries: ``xgboost``, ``lightgbm``, ``catboost``,
|
|
325
|
+
or ``treelite`` (with some limitations). It can work with either the base booster classes
|
|
326
|
+
of those libraries or with their scikit-learn-compatible classes.
|
|
309
327
|
- Do not use categorical features.
|
|
310
328
|
- Are for regression or classification (e.g. no ranking). In the case of XGBoost objective
|
|
311
329
|
``binary:logitraw``, it will create a classification model out of it, and in the case of
|
|
312
330
|
objective ``reg:logistic``, will create a regression model.
|
|
313
331
|
- Are not multi-output models. Note that multi-class classification **is** supported.
|
|
332
|
+
- Are not multi-class random forests (multi-class gradient boosters are supported).
|
|
333
|
+
|
|
334
|
+
Note that while models from packages such as scikit-learn are not supported directly,
|
|
335
|
+
they can still be converted to this class by first converting them to TreeLite and
|
|
336
|
+
then converting to :obj:`GBTDAALModel` from that TreeLite model. In such case, note that
|
|
337
|
+
models corresponding to random forest binary classifiers will be treated as regressors
|
|
338
|
+
that predict probabilities.
|
|
314
339
|
|
|
315
340
|
Parameters
|
|
316
341
|
----------
|
|
@@ -330,7 +355,7 @@ class GBTDAALModel(GBTDAALBaseModel):
|
|
|
330
355
|
|
|
331
356
|
def __init__(self, model):
|
|
332
357
|
self._convert_model(model)
|
|
333
|
-
for type_str in ("xgboost", "lightgbm", "catboost"):
|
|
358
|
+
for type_str in ("xgboost", "lightgbm", "catboost", "treelite"):
|
|
334
359
|
if type_str in str(type(model)):
|
|
335
360
|
self.model_type = type_str
|
|
336
361
|
break
|
|
Binary file
|
|
@@ -36,10 +36,10 @@ def _daal_dbscan(X, eps=0.5, min_samples=5, sample_weight=None):
|
|
|
36
36
|
ww = make2d(sample_weight) if sample_weight is not None else None
|
|
37
37
|
XX = make2d(X)
|
|
38
38
|
|
|
39
|
-
fpt = getFPType(XX)
|
|
39
|
+
fpt = getFPType(XX) # codespell:ignore fpt
|
|
40
40
|
alg = daal4py.dbscan(
|
|
41
41
|
method="defaultDense",
|
|
42
|
-
fptype=fpt,
|
|
42
|
+
fptype=fpt, # codespell:ignore fpt
|
|
43
43
|
epsilon=float(eps),
|
|
44
44
|
minObservations=int(min_samples),
|
|
45
45
|
memorySavingMode=False,
|
|
@@ -61,8 +61,8 @@ def _daal4py_logistic_loss_extra_args(
|
|
|
61
61
|
fptype=getFPType(X),
|
|
62
62
|
method="defaultDense",
|
|
63
63
|
interceptFlag=fit_intercept,
|
|
64
|
-
penaltyL1=l1
|
|
65
|
-
penaltyL2=l2
|
|
64
|
+
penaltyL1=l1,
|
|
65
|
+
penaltyL2=l2,
|
|
66
66
|
resultsToCompute=results_to_compute,
|
|
67
67
|
)
|
|
68
68
|
objective_function_algorithm_instance.setup(X, y, beta)
|
|
@@ -99,8 +99,8 @@ def _daal4py_cross_entropy_loss_extra_args(
|
|
|
99
99
|
fptype=getFPType(X),
|
|
100
100
|
method="defaultDense",
|
|
101
101
|
interceptFlag=fit_intercept,
|
|
102
|
-
penaltyL1=l1
|
|
103
|
-
penaltyL2=l2
|
|
102
|
+
penaltyL1=l1,
|
|
103
|
+
penaltyL2=l2,
|
|
104
104
|
resultsToCompute=results_to_compute,
|
|
105
105
|
)
|
|
106
106
|
)
|