scikit-learn-intelex 2025.6.1__py312-none-manylinux_2_28_x86_64.whl → 2025.8.0__py312-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (111) hide show
  1. daal4py/_daal4py.cpython-312-x86_64-linux-gnu.so +0 -0
  2. daal4py/mb/__init__.py +2 -2
  3. daal4py/mb/gbt_convertors.py +258 -2
  4. daal4py/mb/tree_based_builders.py +30 -5
  5. daal4py/mpi_transceiver.cpython-312-x86_64-linux-gnu.so +0 -0
  6. daal4py/sklearn/cluster/dbscan.py +2 -2
  7. daal4py/sklearn/linear_model/logistic_loss.py +4 -4
  8. daal4py/sklearn/linear_model/logistic_path.py +132 -541
  9. daal4py/sklearn/manifold/_t_sne.py +1 -1
  10. daal4py/sklearn/svm/svm.py +1 -1
  11. daal4py/sklearn/utils/validation.py +15 -16
  12. onedal/__init__.py +26 -1
  13. onedal/_config.py +5 -4
  14. onedal/_device_offload.py +84 -94
  15. onedal/_onedal_py_dpc.cpython-312-x86_64-linux-gnu.so +0 -0
  16. onedal/_onedal_py_host.cpython-312-x86_64-linux-gnu.so +0 -0
  17. onedal/_onedal_py_spmd_dpc.cpython-312-x86_64-linux-gnu.so +0 -0
  18. onedal/basic_statistics/basic_statistics.py +96 -56
  19. onedal/basic_statistics/incremental_basic_statistics.py +42 -57
  20. onedal/basic_statistics/tests/test_basic_statistics.py +6 -7
  21. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +11 -11
  22. onedal/cluster/dbscan.py +7 -25
  23. onedal/cluster/kmeans.py +18 -2
  24. onedal/common/_backend.py +62 -37
  25. onedal/common/hyperparameters.py +32 -9
  26. onedal/common/tests/test_sycl.py +6 -1
  27. onedal/covariance/covariance.py +10 -12
  28. onedal/covariance/incremental_covariance.py +8 -16
  29. onedal/datatypes/__init__.py +12 -2
  30. onedal/datatypes/_data_conversion.py +109 -70
  31. onedal/datatypes/_dlpack.py +61 -0
  32. onedal/datatypes/_sycl_usm.py +63 -0
  33. onedal/datatypes/tests/common.py +8 -3
  34. onedal/datatypes/tests/test_data.py +61 -19
  35. onedal/decomposition/incremental_pca.py +8 -17
  36. onedal/decomposition/pca.py +6 -4
  37. onedal/ensemble/forest.py +15 -9
  38. onedal/linear_model/incremental_linear_model.py +65 -175
  39. onedal/linear_model/linear_model.py +87 -208
  40. onedal/linear_model/logistic_regression.py +14 -15
  41. onedal/linear_model/tests/test_linear_regression.py +10 -17
  42. onedal/primitives/kernel_functions.py +64 -17
  43. onedal/spmd/decomposition/incremental_pca.py +0 -6
  44. onedal/spmd/ensemble/forest.py +18 -0
  45. onedal/svm/svm.py +0 -12
  46. onedal/tests/test_common.py +15 -1
  47. onedal/tests/utils/_dataframes_support.py +23 -6
  48. onedal/tests/utils/_device_selection.py +1 -1
  49. onedal/utils/_array_api.py +25 -25
  50. onedal/utils/_sycl_queue_manager.py +106 -54
  51. onedal/utils/_third_party.py +220 -0
  52. onedal/utils/validation.py +11 -3
  53. {scikit_learn_intelex-2025.6.1.dist-info → scikit_learn_intelex-2025.8.0.dist-info}/METADATA +2 -2
  54. {scikit_learn_intelex-2025.6.1.dist-info → scikit_learn_intelex-2025.8.0.dist-info}/RECORD +110 -108
  55. sklearnex/__init__.py +2 -1
  56. sklearnex/_config.py +17 -8
  57. sklearnex/_device_offload.py +45 -34
  58. sklearnex/_utils.py +52 -3
  59. sklearnex/base.py +1 -1
  60. sklearnex/basic_statistics/basic_statistics.py +31 -45
  61. sklearnex/basic_statistics/incremental_basic_statistics.py +50 -55
  62. sklearnex/cluster/dbscan.py +30 -27
  63. sklearnex/cluster/k_means.py +1 -0
  64. sklearnex/covariance/incremental_covariance.py +14 -5
  65. sklearnex/decomposition/pca.py +21 -9
  66. sklearnex/decomposition/tests/test_pca.py +54 -2
  67. sklearnex/dispatcher.py +13 -7
  68. sklearnex/ensemble/_forest.py +17 -5
  69. sklearnex/ensemble/tests/test_forest.py +22 -7
  70. sklearnex/linear_model/coordinate_descent.py +2 -0
  71. sklearnex/linear_model/incremental_linear.py +90 -73
  72. sklearnex/linear_model/incremental_ridge.py +83 -60
  73. sklearnex/linear_model/linear.py +53 -41
  74. sklearnex/linear_model/logistic_regression.py +11 -4
  75. sklearnex/linear_model/ridge.py +47 -27
  76. sklearnex/linear_model/tests/test_linear.py +27 -61
  77. sklearnex/linear_model/tests/test_logreg.py +448 -5
  78. sklearnex/manifold/tests/test_tsne.py +1 -1
  79. sklearnex/neighbors/_lof.py +1 -1
  80. sklearnex/neighbors/common.py +8 -6
  81. sklearnex/preview/covariance/covariance.py +3 -4
  82. sklearnex/preview/covariance/tests/test_covariance.py +54 -8
  83. sklearnex/spmd/basic_statistics/basic_statistics.py +7 -4
  84. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +6 -3
  85. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +12 -8
  86. sklearnex/spmd/cluster/dbscan.py +2 -26
  87. sklearnex/spmd/covariance/incremental_covariance.py +0 -8
  88. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +11 -4
  89. sklearnex/spmd/decomposition/incremental_pca.py +0 -7
  90. sklearnex/spmd/ensemble/forest.py +230 -2
  91. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +21 -4
  92. sklearnex/spmd/linear_model/incremental_linear_model.py +0 -7
  93. sklearnex/spmd/linear_model/linear_model.py +7 -4
  94. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +16 -8
  95. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +12 -4
  96. sklearnex/svm/_common.py +1 -1
  97. sklearnex/svm/tests/test_svm.py +31 -0
  98. sklearnex/tests/test_common.py +41 -2
  99. sklearnex/tests/test_config.py +77 -6
  100. sklearnex/tests/test_hyperparameters.py +85 -17
  101. sklearnex/tests/test_memory_usage.py +0 -8
  102. sklearnex/tests/test_n_jobs_support.py +1 -1
  103. sklearnex/tests/test_patching.py +13 -13
  104. sklearnex/tests/utils/base.py +4 -22
  105. sklearnex/utils/_array_api.py +71 -0
  106. sklearnex/utils/parallel.py +68 -30
  107. sklearnex/utils/validation.py +29 -11
  108. onedal/utils/_dpep_helpers.py +0 -71
  109. {scikit_learn_intelex-2025.6.1.dist-info → scikit_learn_intelex-2025.8.0.dist-info}/LICENSE.txt +0 -0
  110. {scikit_learn_intelex-2025.6.1.dist-info → scikit_learn_intelex-2025.8.0.dist-info}/WHEEL +0 -0
  111. {scikit_learn_intelex-2025.6.1.dist-info → scikit_learn_intelex-2025.8.0.dist-info}/top_level.txt +0 -0
daal4py/mb/__init__.py CHANGED
@@ -29,8 +29,8 @@ def convert_model(model) -> "GBTDAALModel | LogisticDAALModel":
29
29
  prediction methods.
30
30
 
31
31
  It supports gradient-boosted decision tree ensembles (GBT) from the libraries
32
- ``xgboost``, ``lightgbm``, and ``catboost``; and logistic regression (binary
33
- and multinomial) models from scikit-learn.
32
+ ``xgboost``, ``lightgbm``, ``catboost``, and ``treelite``; and logistic regression
33
+ (binary and multinomial) models from scikit-learn.
34
34
 
35
35
  See the documentation of the classes :obj:`daal4py.mb.GBTDAALModel` and
36
36
  :obj:`daal4py.mb.LogisticDAALModel` for more details.
@@ -15,6 +15,7 @@
15
15
  # ===============================================================================
16
16
 
17
17
  import json
18
+ import warnings
18
19
  from collections import deque
19
20
  from copy import deepcopy
20
21
  from tempfile import NamedTemporaryFile
@@ -197,6 +198,52 @@ class Node:
197
198
  right_child=right_child,
198
199
  )
199
200
 
201
+ @staticmethod
202
+ def from_treelite_dict(dict_all_nodes: list[dict[str, Any]], node_id: int) -> "Node":
203
+ this_node = dict_all_nodes[node_id]
204
+ is_leaf = "leaf_value" in this_node
205
+ default_left = this_node.get("default_left", False)
206
+
207
+ n_children = 0
208
+ if "left_child" in this_node:
209
+ left_child = Node.from_treelite_dict(dict_all_nodes, this_node["left_child"])
210
+ n_children += 1 + left_child.n_children
211
+ else:
212
+ left_child = None
213
+ if "right_child" in this_node:
214
+ right_child = Node.from_treelite_dict(
215
+ dict_all_nodes, this_node["right_child"]
216
+ )
217
+ n_children += 1 + right_child.n_children
218
+ else:
219
+ right_child = None
220
+
221
+ value = this_node["leaf_value"] if is_leaf else this_node["threshold"]
222
+ if not is_leaf:
223
+ comp = this_node["comparison_op"]
224
+ if comp == "<=":
225
+ value = float(np.nextafter(value, np.inf))
226
+ elif comp in [">", ">="]:
227
+ left_child, right_child = right_child, left_child
228
+ default_left = not default_left
229
+ if comp == ">":
230
+ value = float(np.nextafter(value, -np.inf))
231
+ elif comp != "<":
232
+ raise TypeError(
233
+ f"Model to convert contains unsupported split type: {comp}."
234
+ )
235
+
236
+ return Node(
237
+ cover=this_node.get("sum_hess", 0.0),
238
+ is_leaf=is_leaf,
239
+ default_left=default_left,
240
+ feature=this_node.get("split_feature_id"),
241
+ value=value,
242
+ n_children=n_children,
243
+ left_child=left_child,
244
+ right_child=right_child,
245
+ )
246
+
200
247
  def get_value_closest_float_downward(self) -> np.float64:
201
248
  """Get the closest exact fp value smaller than self.value"""
202
249
  return np.nextafter(np.single(self.value), np.single(-np.inf))
@@ -310,6 +357,14 @@ class TreeList(list):
310
357
 
311
358
  return tl
312
359
 
360
+ @staticmethod
361
+ def from_treelite_dict(tl_json: Dict[str, Any]) -> "TreeList":
362
+ tl = TreeList()
363
+ for tree_id, tree_dict in enumerate(tl_json["trees"]):
364
+ root_node = Node.from_treelite_dict(tree_dict["nodes"], 0)
365
+ tl.append(TreeView(tree_id=tree_id, root_node=root_node))
366
+ return tl
367
+
313
368
  def __setitem__(self):
314
369
  raise NotImplementedError(
315
370
  "Use TreeList.from_*() methods to initialize a TreeList"
@@ -421,7 +476,9 @@ def get_gbt_model_from_lightgbm(model: Any, booster=None) -> Any:
421
476
  if "is_linear=1" in model_str:
422
477
  raise TypeError("Linear trees are not supported.")
423
478
  if "[boosting: dart]" in model_str:
424
- raise TypeError("'Dart' booster is not supported.")
479
+ raise TypeError(
480
+ "'Dart' booster is not supported. Try converting to 'treelite' first."
481
+ )
425
482
  if "[boosting: rf]" in model_str:
426
483
  raise TypeError("Random forest boosters are not supported.")
427
484
  if ("[objective: lambdarank]" in model_str) or (
@@ -476,7 +533,9 @@ def get_gbt_model_from_xgboost(booster: Any, xgb_config=None) -> Any:
476
533
  xgb_config = get_xgboost_params(booster)
477
534
 
478
535
  if xgb_config["learner"]["learner_train_param"]["booster"] != "gbtree":
479
- raise TypeError("Only 'gbtree' booster type is supported.")
536
+ raise TypeError(
537
+ "Only 'gbtree' booster type is supported. For DART, try converting to 'treelite' first."
538
+ )
480
539
 
481
540
  n_targets = xgb_config["learner"]["learner_model_param"].get("num_target")
482
541
  if n_targets is not None and int(n_targets) > 1:
@@ -920,3 +979,200 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
920
979
  if not add_intercept_to_each_node:
921
980
  intercept = booster.get_scale_and_bias()[1]
922
981
  return mb.model(base_score=intercept), shap_ready
982
+
983
+
984
+ def get_gbt_model_from_treelite(
985
+ tl_model: "treelite.model.Model",
986
+ ) -> tuple[Any, int, int, bool]:
987
+ model_json = json.loads(tl_model.dump_as_json())
988
+ task_type = model_json["task_type"]
989
+ if task_type not in ["kBinaryClf", "kRegressor", "kMultiClf", "kIsolationForest"]:
990
+ raise TypeError(f"Model to convert is of unsupported type: {task_type}")
991
+ if model_json["num_target"] > 1:
992
+ raise TypeError("Multi-target models are not supported.")
993
+ if model_json["postprocessor"] == "multiclass_ova":
994
+ raise TypeError(
995
+ "Multi-class classification models that use One-Vs-All are not supported."
996
+ )
997
+ for tree in model_json["trees"]:
998
+ if tree["has_categorical_split"]:
999
+ raise TypeError("Models with categorical features are not supported.")
1000
+ num_trees = tl_model.num_tree
1001
+ if not num_trees:
1002
+ raise TypeError("Model to convert contains no trees.")
1003
+
1004
+ # Note: the daal4py module always adds up the scores, but some models
1005
+ # might average them instead. In such case, this turns the trees into
1006
+ # additive ones by dividing the predictions by the number of nodes beforehand.
1007
+ if model_json["average_tree_output"]:
1008
+ divide_treelite_leaf_values_by_const(model_json, num_trees)
1009
+
1010
+ base_score = model_json["base_scores"]
1011
+ num_class = model_json["num_class"][0]
1012
+ num_feature = model_json["num_feature"]
1013
+
1014
+ if task_type == "kBinaryClf":
1015
+ num_class = 2
1016
+ if base_score:
1017
+ base_score = list(1 / (1 + np.exp(-np.array(base_score))))
1018
+
1019
+ if num_class > 2:
1020
+ shap_ready = False
1021
+ else:
1022
+ shap_ready = True
1023
+ for tree in model_json["trees"]:
1024
+ if not tree["nodes"][0].get("sum_hess", False):
1025
+ shap_ready = False
1026
+ break
1027
+
1028
+ # In the case of random forests for classification, it might work
1029
+ # by averaging predictions without any link function, whereas
1030
+ # daal4py assumes a logit link. In such case, it's not possible to
1031
+ # convert them to daal4py's logic, but the model can still be used
1032
+ # as a regressor that always outputs something between 0 and 1.
1033
+ is_regression = "Clf" not in task_type
1034
+ if not is_regression and model_json["postprocessor"] == "identity_multiclass":
1035
+ is_regression = True
1036
+ warnings.warn(
1037
+ "Attempting to convert classification model which is not"
1038
+ " based on gradient boosting. Will output a regression"
1039
+ " model instead."
1040
+ )
1041
+
1042
+ looks_like_random_forest = (
1043
+ model_json["postprocessor"] == "identity_multiclass"
1044
+ and len(model_json["base_scores"]) > 1
1045
+ and task_type == "kMultiClf"
1046
+ )
1047
+ if looks_like_random_forest:
1048
+ if num_class > 2 or len(base_score) > 2:
1049
+ raise TypeError("Multi-class random forests are not supported.")
1050
+ if len(model_json["num_class"]) > 1:
1051
+ raise TypeError("Multi-output random forests are not supported.")
1052
+ if len(base_score) == 2 and base_score[0]:
1053
+ raise TypeError("Random forests with base scores are not supported.")
1054
+
1055
+ # In the case of binary random forests, it will always have leaf values
1056
+ # for 2 classes, which is redundant as they sum to 1. daal4py requires
1057
+ # only values for the positive class, so they need to be converted.
1058
+ if looks_like_random_forest:
1059
+ leave_only_last_treelite_leaf_value(model_json)
1060
+ base_score = base_score[-1]
1061
+
1062
+ # In the case of multi-class classification models, if converted
1063
+ # from xgboost, the order of the trees will be the same - i.e.
1064
+ # sequences of one tree of each class, followed by another such
1065
+ # sequence. But treelite could in theory also support building
1066
+ # models where the trees are in a different order, in which case
1067
+ # they will need to be reordered to match xgboost, since that's
1068
+ # how daal4py handles them. And if there is an uneven number of
1069
+ # trees per class, then will need to make up extra trees with
1070
+ # zeros to accommodate it.
1071
+ if task_type == "kMultiClf" and not looks_like_random_forest:
1072
+ num_trees = len(model_json["trees"])
1073
+ if (num_trees % num_class) != 0:
1074
+ shap_ready = False
1075
+ class_ids, num_trees_per_class = np.unique(
1076
+ model_json["class_id"], return_counts=True
1077
+ )
1078
+ max_tree_per_class = num_trees_per_class.max()
1079
+ num_tree_add_per_class = max_tree_per_class - num_trees_per_class
1080
+ for class_ind in range(num_class):
1081
+ for tree in range(num_tree_add_per_class[class_ind]):
1082
+ add_empty_tree_to_treelite_json(model_json, class_ind)
1083
+
1084
+ tree_class_orders = model_json["class_id"]
1085
+ sequential_ids = np.arange(num_class)
1086
+ num_trees = len(model_json["trees"])
1087
+ assert (num_trees % num_class) == 0
1088
+ if not np.array_equal(
1089
+ tree_class_orders, np.tile(sequential_ids, int(num_trees / num_class))
1090
+ ):
1091
+ argsorted_class_indices = np.argsort(tree_class_orders)
1092
+ per_class_indices = np.split(argsorted_class_indices, num_class)
1093
+ correct_order = np.vstack(per_class_indices).reshape(-1, order="F")
1094
+ model_json["trees"] = [model_json["trees"][ix] for ix in correct_order]
1095
+ model_json["class_id"] = [model_json["class_id"][ix] for ix in correct_order]
1096
+
1097
+ # In the case of multi-class classification with base scores,
1098
+ # since daal4py only supports scalar intercepts, this follows the
1099
+ # same strategy as in catboost of dividing the intercepts equally
1100
+ # among the number of trees
1101
+ if task_type == "kMultiClf" and not looks_like_random_forest:
1102
+ add_intercept_to_treelite_leafs(model_json, base_score)
1103
+ base_score = None
1104
+
1105
+ if isinstance(base_score, list):
1106
+ if len(base_score) == 1:
1107
+ base_score = base_score[0]
1108
+ else:
1109
+ raise TypeError("Model to convert is malformed.")
1110
+
1111
+ tree_list = TreeList.from_treelite_dict(model_json)
1112
+ return (
1113
+ get_gbt_model_from_tree_list(
1114
+ tree_list,
1115
+ n_iterations=num_trees
1116
+ / (
1117
+ num_class
1118
+ if task_type == "kMultiClf" and not looks_like_random_forest
1119
+ else 1
1120
+ ),
1121
+ is_regression=is_regression,
1122
+ n_features=num_feature,
1123
+ n_classes=num_class,
1124
+ base_score=base_score,
1125
+ ),
1126
+ num_class,
1127
+ num_feature,
1128
+ shap_ready,
1129
+ )
1130
+
1131
+
1132
+ def divide_treelite_leaf_values_by_const(
1133
+ tl_json: dict[str, Any], divisor: "int | float"
1134
+ ) -> None:
1135
+ for tree in tl_json["trees"]:
1136
+ for node in tree["nodes"]:
1137
+ if "leaf_value" in node:
1138
+ if isinstance(node["leaf_value"], (list, tuple)):
1139
+ node["leaf_value"] = list(np.array(node["leaf_value"]) / divisor)
1140
+ else:
1141
+ node["leaf_value"] /= divisor
1142
+
1143
+
1144
+ def leave_only_last_treelite_leaf_value(tl_json: dict[str, Any]) -> None:
1145
+ for tree in tl_json["trees"]:
1146
+ for node in tree["nodes"]:
1147
+ if "leaf_value" in node:
1148
+ assert len(node["leaf_value"]) == 2
1149
+ node["leaf_value"] = node["leaf_value"][-1]
1150
+
1151
+
1152
+ def add_intercept_to_treelite_leafs(
1153
+ tl_json: dict[str, Any], base_score: list[float]
1154
+ ) -> None:
1155
+ num_trees_per_class = len(tl_json["trees"]) / tl_json["num_class"][0]
1156
+ for tree_index, tree in enumerate(tl_json["trees"]):
1157
+ leaf_add = base_score[tl_json["class_id"][tree_index]] / num_trees_per_class
1158
+ for node in tree["nodes"]:
1159
+ if "leaf_value" in node:
1160
+ node["leaf_value"] += leaf_add
1161
+
1162
+
1163
+ def add_empty_tree_to_treelite_json(tl_json: dict[str, Any], class_add: int) -> None:
1164
+ tl_json["class_id"].append(class_add)
1165
+ tl_json["trees"].append(
1166
+ {
1167
+ "num_nodes": 1,
1168
+ "has_categorical_split": False,
1169
+ "nodes": [
1170
+ {
1171
+ "node_id": 0,
1172
+ "leaf_value": 0.0,
1173
+ "data_count": 0,
1174
+ "sum_hess": 0.0,
1175
+ },
1176
+ ],
1177
+ }
1178
+ )
@@ -37,6 +37,7 @@ from .gbt_convertors import (
37
37
  get_catboost_params,
38
38
  get_gbt_model_from_catboost,
39
39
  get_gbt_model_from_lightgbm,
40
+ get_gbt_model_from_treelite,
40
41
  get_gbt_model_from_xgboost,
41
42
  get_lightgbm_params,
42
43
  get_xgboost_params,
@@ -63,7 +64,9 @@ def getFPType(X):
63
64
 
64
65
  class GBTDAALBaseModel:
65
66
  def __init__(self):
66
- self.model_type: Optional[Literal["xgboost", "catboost", "lightgbm"]] = None
67
+ self.model_type: Optional[
68
+ Literal["xgboost", "catboost", "lightgbm", "treelite"]
69
+ ] = None
67
70
 
68
71
  @property
69
72
  def _is_regression(self):
@@ -86,6 +89,8 @@ class GBTDAALBaseModel:
86
89
  if self.n_classes_ <= 2:
87
90
  if objective_fun in ["binary:logistic", "binary:logitraw"]:
88
91
  self.n_classes_ = 2
92
+ elif self.n_classes_ == 0:
93
+ self.n_classes_ = 1
89
94
 
90
95
  self.n_features_in_ = int(params["learner"]["learner_model_param"]["num_feature"])
91
96
 
@@ -113,6 +118,11 @@ class GBTDAALBaseModel:
113
118
  self.daal_model_, self.supports_shap_ = get_gbt_model_from_catboost(booster)
114
119
  self._get_params_from_catboost(catboost_params)
115
120
 
121
+ def _convert_model_from_treelite(self, tl_model):
122
+ self.daal_model_, self.n_classes_, self.n_features_in_, self.supports_shap_ = (
123
+ get_gbt_model_from_treelite(tl_model)
124
+ )
125
+
116
126
  def _convert_model(self, model):
117
127
  (submodule_name, class_name) = (
118
128
  model.__class__.__module__,
@@ -147,6 +157,14 @@ class GBTDAALBaseModel:
147
157
  # Build GBTDAALModel from CatBoost
148
158
  elif (submodule_name, class_name) == ("catboost.core", "CatBoost"):
149
159
  self._convert_model_from_catboost(model)
160
+ elif (submodule_name, class_name) == ("treelite.model", "Model"):
161
+ self._convert_model_from_treelite(model)
162
+ elif submodule_name.startswith("sklearn.ensemble"):
163
+ raise TypeError(
164
+ "Cannot convert scikit-learn models. Try converting to treelite "
165
+ "with 'treelite.sklearn.import_model' and then converting the "
166
+ "resulting TreeLite object."
167
+ )
150
168
  else:
151
169
  raise TypeError(f"Unknown model format {submodule_name}.{class_name}")
152
170
 
@@ -303,14 +321,21 @@ class GBTDAALModel(GBTDAALBaseModel):
303
321
 
304
322
  Can be created from model objects that meet all of the following criteria:
305
323
 
306
- - Were produced from one of the following libraries: ``xgboost``, ``lightgbm``, or ``catboost``.
307
- It can work with either the base booster classes of those libraries or with their
308
- scikit-learn-compatible classes.
324
+ - Were produced from one of the following libraries: ``xgboost``, ``lightgbm``, ``catboost``,
325
+ or ``treelite`` (with some limitations). It can work with either the base booster classes
326
+ of those libraries or with their scikit-learn-compatible classes.
309
327
  - Do not use categorical features.
310
328
  - Are for regression or classification (e.g. no ranking). In the case of XGBoost objective
311
329
  ``binary:logitraw``, it will create a classification model out of it, and in the case of
312
330
  objective ``reg:logistic``, will create a regression model.
313
331
  - Are not multi-output models. Note that multi-class classification **is** supported.
332
+ - Are not multi-class random forests (multi-class gradient boosters are supported).
333
+
334
+ Note that while models from packages such as scikit-learn are not supported directly,
335
+ they can still be converted to this class by first converting them to TreeLite and
336
+ then converting to :obj:`GBTDAALModel` from that TreeLite model. In such case, note that
337
+ models corresponding to random forest binary classifiers will be treated as regressors
338
+ that predict probabilities.
314
339
 
315
340
  Parameters
316
341
  ----------
@@ -330,7 +355,7 @@ class GBTDAALModel(GBTDAALBaseModel):
330
355
 
331
356
  def __init__(self, model):
332
357
  self._convert_model(model)
333
- for type_str in ("xgboost", "lightgbm", "catboost"):
358
+ for type_str in ("xgboost", "lightgbm", "catboost", "treelite"):
334
359
  if type_str in str(type(model)):
335
360
  self.model_type = type_str
336
361
  break
@@ -36,10 +36,10 @@ def _daal_dbscan(X, eps=0.5, min_samples=5, sample_weight=None):
36
36
  ww = make2d(sample_weight) if sample_weight is not None else None
37
37
  XX = make2d(X)
38
38
 
39
- fpt = getFPType(XX)
39
+ fpt = getFPType(XX) # codespell:ignore fpt
40
40
  alg = daal4py.dbscan(
41
41
  method="defaultDense",
42
- fptype=fpt,
42
+ fptype=fpt, # codespell:ignore fpt
43
43
  epsilon=float(eps),
44
44
  minObservations=int(min_samples),
45
45
  memorySavingMode=False,
@@ -61,8 +61,8 @@ def _daal4py_logistic_loss_extra_args(
61
61
  fptype=getFPType(X),
62
62
  method="defaultDense",
63
63
  interceptFlag=fit_intercept,
64
- penaltyL1=l1 / n,
65
- penaltyL2=l2 / n,
64
+ penaltyL1=l1,
65
+ penaltyL2=l2,
66
66
  resultsToCompute=results_to_compute,
67
67
  )
68
68
  objective_function_algorithm_instance.setup(X, y, beta)
@@ -99,8 +99,8 @@ def _daal4py_cross_entropy_loss_extra_args(
99
99
  fptype=getFPType(X),
100
100
  method="defaultDense",
101
101
  interceptFlag=fit_intercept,
102
- penaltyL1=l1 / n,
103
- penaltyL2=l2 / n,
102
+ penaltyL1=l1,
103
+ penaltyL2=l2,
104
104
  resultsToCompute=results_to_compute,
105
105
  )
106
106
  )