scikit-learn-intelex 2025.5.0__py313-none-manylinux_2_28_x86_64.whl → 2025.6.0__py313-none-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- daal4py/_daal4py.cpython-313-x86_64-linux-gnu.so +0 -0
- daal4py/mb/gbt_convertors.py +119 -52
- daal4py/mb/tree_based_builders.py +31 -16
- daal4py/mpi_transceiver.cpython-313-x86_64-linux-gnu.so +0 -0
- daal4py/sklearn/monkeypatch/dispatcher.py +1 -1
- onedal/_device_offload.py +11 -3
- onedal/_onedal_py_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
- onedal/_onedal_py_host.cpython-313-x86_64-linux-gnu.so +0 -0
- onedal/_onedal_py_spmd_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
- onedal/tests/utils/_device_selection.py +2 -0
- onedal/utils/_array_api.py +11 -1
- onedal/utils/_sycl_queue_manager.py +16 -2
- {scikit_learn_intelex-2025.5.0.dist-info → scikit_learn_intelex-2025.6.0.dist-info}/METADATA +2 -3
- {scikit_learn_intelex-2025.5.0.dist-info → scikit_learn_intelex-2025.6.0.dist-info}/RECORD +47 -47
- sklearnex/_device_offload.py +140 -89
- sklearnex/_utils.py +12 -45
- sklearnex/base.py +109 -0
- sklearnex/basic_statistics/basic_statistics.py +3 -2
- sklearnex/basic_statistics/incremental_basic_statistics.py +3 -6
- sklearnex/cluster/dbscan.py +4 -3
- sklearnex/cluster/k_means.py +18 -13
- sklearnex/covariance/incremental_covariance.py +10 -5
- sklearnex/decomposition/pca.py +2 -3
- sklearnex/ensemble/_forest.py +5 -4
- sklearnex/linear_model/coordinate_descent.py +12 -0
- sklearnex/linear_model/incremental_linear.py +2 -2
- sklearnex/linear_model/incremental_ridge.py +3 -8
- sklearnex/linear_model/linear.py +3 -7
- sklearnex/linear_model/logistic_regression.py +11 -13
- sklearnex/linear_model/ridge.py +3 -2
- sklearnex/manifold/t_sne.py +6 -3
- sklearnex/neighbors/common.py +3 -2
- sklearnex/preview/covariance/covariance.py +3 -6
- sklearnex/preview/decomposition/incremental_pca.py +3 -6
- sklearnex/svm/_common.py +3 -3
- sklearnex/svm/nusvc.py +1 -1
- sklearnex/svm/nusvr.py +1 -1
- sklearnex/svm/svc.py +1 -1
- sklearnex/svm/svr.py +1 -1
- sklearnex/tests/test_common.py +25 -0
- sklearnex/tests/test_config.py +62 -0
- sklearnex/tests/test_memory_usage.py +3 -2
- sklearnex/tests/test_patching.py +89 -60
- sklearnex/tests/test_run_to_run_stability.py +7 -0
- daal4py/doc/third-party-programs.txt +0 -424
- {scikit_learn_intelex-2025.5.0.dist-info → scikit_learn_intelex-2025.6.0.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2025.5.0.dist-info → scikit_learn_intelex-2025.6.0.dist-info}/WHEEL +0 -0
- {scikit_learn_intelex-2025.5.0.dist-info → scikit_learn_intelex-2025.6.0.dist-info}/top_level.txt +0 -0
|
Binary file
|
daal4py/mb/gbt_convertors.py
CHANGED
|
@@ -19,7 +19,6 @@ from collections import deque
|
|
|
19
19
|
from copy import deepcopy
|
|
20
20
|
from tempfile import NamedTemporaryFile
|
|
21
21
|
from typing import Any, Deque, Dict, List, Optional, Tuple
|
|
22
|
-
from warnings import warn
|
|
23
22
|
|
|
24
23
|
import numpy as np
|
|
25
24
|
|
|
@@ -94,18 +93,9 @@ class CatBoostModelData:
|
|
|
94
93
|
else:
|
|
95
94
|
return len(self.trees)
|
|
96
95
|
|
|
97
|
-
@property
|
|
98
|
-
def bias(self):
|
|
99
|
-
if self.is_classification:
|
|
100
|
-
return 0
|
|
101
|
-
return self.__data["scale_and_bias"][1][0] / self.n_iterations
|
|
102
|
-
|
|
103
96
|
@property
|
|
104
97
|
def scale(self):
|
|
105
|
-
|
|
106
|
-
return 1
|
|
107
|
-
else:
|
|
108
|
-
return self.__data["scale_and_bias"][0]
|
|
98
|
+
return self.__data["scale_and_bias"][0]
|
|
109
99
|
|
|
110
100
|
@property
|
|
111
101
|
def default_left(self):
|
|
@@ -223,7 +213,7 @@ class Node:
|
|
|
223
213
|
return self.__feature
|
|
224
214
|
if isinstance(self.__feature, str) and self.__feature.isnumeric():
|
|
225
215
|
return int(self.__feature)
|
|
226
|
-
raise
|
|
216
|
+
raise AttributeError(
|
|
227
217
|
f"Feature names must be integers (got ({type(self.__feature)}){self.__feature})"
|
|
228
218
|
)
|
|
229
219
|
|
|
@@ -242,15 +232,15 @@ class TreeView:
|
|
|
242
232
|
@property
|
|
243
233
|
def value(self) -> float:
|
|
244
234
|
if not self.is_leaf:
|
|
245
|
-
raise
|
|
235
|
+
raise AttributeError("Tree is not a leaf-only tree")
|
|
246
236
|
if self.root_node.value is None:
|
|
247
|
-
raise
|
|
237
|
+
raise AttributeError("Tree is leaf-only but leaf node has no value")
|
|
248
238
|
return self.root_node.value
|
|
249
239
|
|
|
250
240
|
@property
|
|
251
241
|
def cover(self) -> float:
|
|
252
242
|
if not self.is_leaf:
|
|
253
|
-
raise
|
|
243
|
+
raise AttributeError("Tree is not a leaf-only tree")
|
|
254
244
|
return self.root_node.cover
|
|
255
245
|
|
|
256
246
|
@property
|
|
@@ -427,6 +417,18 @@ def get_gbt_model_from_tree_list(
|
|
|
427
417
|
|
|
428
418
|
|
|
429
419
|
def get_gbt_model_from_lightgbm(model: Any, booster=None) -> Any:
|
|
420
|
+
model_str = model.model_to_string()
|
|
421
|
+
if "is_linear=1" in model_str:
|
|
422
|
+
raise TypeError("Linear trees are not supported.")
|
|
423
|
+
if "[boosting: dart]" in model_str:
|
|
424
|
+
raise TypeError("'Dart' booster is not supported.")
|
|
425
|
+
if "[boosting: rf]" in model_str:
|
|
426
|
+
raise TypeError("Random forest boosters are not supported.")
|
|
427
|
+
if ("[objective: lambdarank]" in model_str) or (
|
|
428
|
+
"[objective: rank_xendcg]" in model_str
|
|
429
|
+
):
|
|
430
|
+
raise TypeError("Ranking objectives are not supported.")
|
|
431
|
+
|
|
430
432
|
if booster is None:
|
|
431
433
|
booster = model.dump_model()
|
|
432
434
|
|
|
@@ -437,9 +439,9 @@ def get_gbt_model_from_lightgbm(model: Any, booster=None) -> Any:
|
|
|
437
439
|
is_regression = False
|
|
438
440
|
objective_fun = booster["objective"]
|
|
439
441
|
if n_classes > 2:
|
|
440
|
-
if "
|
|
442
|
+
if ("ova" in objective_fun) or ("ovr" in objective_fun):
|
|
441
443
|
raise TypeError(
|
|
442
|
-
"multiclass (softmax) objective is
|
|
444
|
+
"Only multiclass (softmax) objective is supported for multiclass classification"
|
|
443
445
|
)
|
|
444
446
|
elif "binary" in objective_fun: # nClasses == 1
|
|
445
447
|
n_classes = 2
|
|
@@ -473,6 +475,13 @@ def get_gbt_model_from_xgboost(booster: Any, xgb_config=None) -> Any:
|
|
|
473
475
|
if xgb_config is None:
|
|
474
476
|
xgb_config = get_xgboost_params(booster)
|
|
475
477
|
|
|
478
|
+
if xgb_config["learner"]["learner_train_param"]["booster"] != "gbtree":
|
|
479
|
+
raise TypeError("Only 'gbtree' booster type is supported.")
|
|
480
|
+
|
|
481
|
+
n_targets = xgb_config["learner"]["learner_model_param"].get("num_target")
|
|
482
|
+
if n_targets is not None and int(n_targets) > 1:
|
|
483
|
+
raise TypeError("Multi-target boosters are not supported.")
|
|
484
|
+
|
|
476
485
|
n_features = int(xgb_config["learner"]["learner_model_param"]["num_feature"])
|
|
477
486
|
n_classes = int(xgb_config["learner"]["learner_model_param"]["num_class"])
|
|
478
487
|
base_score = float(xgb_config["learner"]["learner_model_param"]["base_score"])
|
|
@@ -504,11 +513,6 @@ def get_gbt_model_from_xgboost(booster: Any, xgb_config=None) -> Any:
|
|
|
504
513
|
if objective_fun == "binary:logitraw":
|
|
505
514
|
# daal4py always applies a sigmoid for pred_proba, wheres XGBoost
|
|
506
515
|
# returns raw predictions with logitraw
|
|
507
|
-
warn(
|
|
508
|
-
"objective='binary:logitraw' selected\n"
|
|
509
|
-
"XGBoost returns raw class scores when calling pred_proba()\n"
|
|
510
|
-
"whilst scikit-learn-intelex always uses binary:logistic\n"
|
|
511
|
-
)
|
|
512
516
|
base_score = float(1 / (1 + np.exp(-base_score)))
|
|
513
517
|
else:
|
|
514
518
|
is_regression = True
|
|
@@ -567,6 +571,22 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
567
571
|
"Categorical features are not supported in daal4py Gradient Boosting Trees"
|
|
568
572
|
)
|
|
569
573
|
|
|
574
|
+
objective = booster.get_params().get("objective", "")
|
|
575
|
+
if (
|
|
576
|
+
"Rank" in objective
|
|
577
|
+
or "Query" in objective
|
|
578
|
+
or "Pair" in objective
|
|
579
|
+
or objective in ["LambdaMart", "StochasticFilter", "GroupQuantile"]
|
|
580
|
+
):
|
|
581
|
+
raise TypeError("Ranking objectives are not supported.")
|
|
582
|
+
if "Multi" in objective and objective != "MultiClass":
|
|
583
|
+
if model.is_classification:
|
|
584
|
+
raise TypeError(
|
|
585
|
+
"Only 'MultiClass' loss is supported for multi-class classification."
|
|
586
|
+
)
|
|
587
|
+
else:
|
|
588
|
+
raise TypeError("Multi-output models are not supported.")
|
|
589
|
+
|
|
570
590
|
if model.is_classification:
|
|
571
591
|
mb = gbt_clf_model_builder(
|
|
572
592
|
n_features=model.n_features,
|
|
@@ -587,21 +607,37 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
587
607
|
{"feature_index": feature["feature_index"], "value": feature_border}
|
|
588
608
|
)
|
|
589
609
|
|
|
610
|
+
# Note: catboost models might have a 'bias' (intercept) which gets added
|
|
611
|
+
# to all predictions. In the case of single-output models, this is a scalar,
|
|
612
|
+
# but in the case of multi-output models such as multinomial logistic, it
|
|
613
|
+
# is a vector. Since daal4py doesn't support vector-valued intercepts, this
|
|
614
|
+
# adds the intercept to every terminal node instead, by dividing it equally
|
|
615
|
+
# among all trees. Usually, catboost would anyway set them to zero, but it
|
|
616
|
+
# still allows setting custom intercepts.
|
|
617
|
+
cb_bias = booster.get_scale_and_bias()[1]
|
|
618
|
+
add_intercept_to_each_node = isinstance(cb_bias, list)
|
|
619
|
+
if add_intercept_to_each_node:
|
|
620
|
+
cb_bias = np.array(cb_bias) / model.n_iterations
|
|
621
|
+
if not model.is_classification:
|
|
622
|
+
raise TypeError("Multi-output regression models are not supported.")
|
|
623
|
+
|
|
624
|
+
def add_vector_bias(values: list[float]) -> list[float]:
|
|
625
|
+
return list(np.array(values) + cb_bias)
|
|
626
|
+
|
|
590
627
|
trees_explicit = []
|
|
591
628
|
tree_symmetric = []
|
|
592
629
|
|
|
630
|
+
all_trees_are_empty = True
|
|
631
|
+
|
|
593
632
|
if model.is_symmetric_tree:
|
|
594
633
|
for tree in model.oblivious_trees:
|
|
595
|
-
|
|
634
|
+
tree_splits = tree.get("splits", [])
|
|
635
|
+
cur_tree_depth = len(tree_splits) if tree_splits is not None else 0
|
|
596
636
|
tree_symmetric.append((tree, cur_tree_depth))
|
|
597
637
|
else:
|
|
598
638
|
for tree in model.trees:
|
|
599
639
|
n_nodes = 1
|
|
600
|
-
|
|
601
|
-
# handle leaf node
|
|
602
|
-
values = __get_value_as_list(tree)
|
|
603
|
-
root_node = CatBoostNode(value=[value * model.scale for value in values])
|
|
604
|
-
continue
|
|
640
|
+
|
|
605
641
|
# Check if node is a leaf (in case of stump)
|
|
606
642
|
if "split" in tree:
|
|
607
643
|
# Get number of trees and splits info via BFS
|
|
@@ -622,12 +658,15 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
622
658
|
nodes_queue.append((cur_node_data["left"], left_node))
|
|
623
659
|
nodes_queue.append((cur_node_data["right"], right_node))
|
|
624
660
|
n_nodes += 2
|
|
661
|
+
all_trees_are_empty = False
|
|
625
662
|
else:
|
|
626
663
|
root_node = CatBoostNode()
|
|
627
664
|
if model.is_classification and model.n_classes > 2:
|
|
628
665
|
root_node.value = [value * model.scale for value in tree["value"]]
|
|
666
|
+
if add_intercept_to_each_node:
|
|
667
|
+
root_node.value = add_vector_bias(root_node.value)
|
|
629
668
|
else:
|
|
630
|
-
root_node.value = [tree["value"] * model.scale
|
|
669
|
+
root_node.value = [tree["value"] * model.scale]
|
|
631
670
|
trees_explicit.append((root_node, n_nodes))
|
|
632
671
|
|
|
633
672
|
tree_id = []
|
|
@@ -646,9 +685,15 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
646
685
|
for i in range(model.n_iterations):
|
|
647
686
|
for _ in range(n_tree_each_iter):
|
|
648
687
|
if model.is_symmetric_tree:
|
|
649
|
-
|
|
688
|
+
if not len(tree_symmetric):
|
|
689
|
+
n_nodes = 1
|
|
690
|
+
else:
|
|
691
|
+
n_nodes = 2 ** (tree_symmetric[i][1] + 1) - 1
|
|
650
692
|
else:
|
|
651
|
-
|
|
693
|
+
if not len(trees_explicit):
|
|
694
|
+
n_nodes = 1
|
|
695
|
+
else:
|
|
696
|
+
n_nodes = trees_explicit[i][1]
|
|
652
697
|
|
|
653
698
|
if model.is_classification and model.n_classes > 2:
|
|
654
699
|
tree_id.append(mb.create_tree(n_nodes, class_label))
|
|
@@ -663,9 +708,9 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
663
708
|
tree_id.append(mb.create_tree(n_nodes))
|
|
664
709
|
|
|
665
710
|
if model.is_symmetric_tree:
|
|
711
|
+
shap_ready = True # this code branch provides all info for SHAP values
|
|
666
712
|
for class_label in range(n_tree_each_iter):
|
|
667
713
|
for i in range(model.n_iterations):
|
|
668
|
-
shap_ready = True # this code branch provides all info for SHAP values
|
|
669
714
|
cur_tree_info = tree_symmetric[i][0]
|
|
670
715
|
cur_tree_id = tree_id[i * n_tree_each_iter + class_label]
|
|
671
716
|
cur_tree_leaf_val = cur_tree_info["leaf_values"]
|
|
@@ -674,7 +719,8 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
674
719
|
if cur_tree_depth == 0:
|
|
675
720
|
mb.add_leaf(
|
|
676
721
|
tree_id=cur_tree_id,
|
|
677
|
-
response=cur_tree_leaf_val[
|
|
722
|
+
response=cur_tree_leaf_val[class_label] * model.scale
|
|
723
|
+
+ (cb_bias[class_label] if add_intercept_to_each_node else 0),
|
|
678
724
|
cover=cur_tree_leaf_weights[0],
|
|
679
725
|
)
|
|
680
726
|
else:
|
|
@@ -686,6 +732,7 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
686
732
|
cur_tree_leaf_weights
|
|
687
733
|
)
|
|
688
734
|
root_weight = cur_tree_weights_per_level[0][0]
|
|
735
|
+
|
|
689
736
|
root_id = mb.add_split(
|
|
690
737
|
tree_id=cur_tree_id,
|
|
691
738
|
feature_index=cur_level_split["feature_index"],
|
|
@@ -704,6 +751,9 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
704
751
|
cur_level_split = splits[
|
|
705
752
|
cur_tree_info["splits"][cur_level]["split_index"]
|
|
706
753
|
]
|
|
754
|
+
cover_nodes = next_level_weights[cur_level_node_index]
|
|
755
|
+
if cover_nodes == 0:
|
|
756
|
+
shap_ready = False
|
|
707
757
|
cur_left_node = mb.add_split(
|
|
708
758
|
tree_id=cur_tree_id,
|
|
709
759
|
parent_id=cur_parent,
|
|
@@ -711,7 +761,7 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
711
761
|
feature_index=cur_level_split["feature_index"],
|
|
712
762
|
feature_value=cur_level_split["value"],
|
|
713
763
|
default_left=model.default_left,
|
|
714
|
-
cover=
|
|
764
|
+
cover=cover_nodes,
|
|
715
765
|
)
|
|
716
766
|
# cur_level_node_index += 1
|
|
717
767
|
cur_right_node = mb.add_split(
|
|
@@ -721,7 +771,7 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
721
771
|
feature_index=cur_level_split["feature_index"],
|
|
722
772
|
feature_value=cur_level_split["value"],
|
|
723
773
|
default_left=model.default_left,
|
|
724
|
-
cover=
|
|
774
|
+
cover=cover_nodes,
|
|
725
775
|
)
|
|
726
776
|
# cur_level_node_index += 1
|
|
727
777
|
cur_level_nodes.append(cur_left_node)
|
|
@@ -734,8 +784,7 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
734
784
|
mb.add_leaf(
|
|
735
785
|
tree_id=cur_tree_id,
|
|
736
786
|
response=cur_tree_leaf_val[2 * last_level_node_num]
|
|
737
|
-
* model.scale
|
|
738
|
-
+ model.bias,
|
|
787
|
+
* model.scale,
|
|
739
788
|
parent_id=prev_level_nodes[last_level_node_num],
|
|
740
789
|
position=0,
|
|
741
790
|
cover=cur_tree_leaf_weights[2 * last_level_node_num],
|
|
@@ -743,8 +792,7 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
743
792
|
mb.add_leaf(
|
|
744
793
|
tree_id=cur_tree_id,
|
|
745
794
|
response=cur_tree_leaf_val[2 * last_level_node_num + 1]
|
|
746
|
-
* model.scale
|
|
747
|
-
+ model.bias,
|
|
795
|
+
* model.scale,
|
|
748
796
|
parent_id=prev_level_nodes[last_level_node_num],
|
|
749
797
|
position=1,
|
|
750
798
|
cover=cur_tree_leaf_weights[2 * last_level_node_num + 1],
|
|
@@ -761,7 +809,11 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
761
809
|
mb.add_leaf(
|
|
762
810
|
tree_id=cur_tree_id,
|
|
763
811
|
response=cur_tree_leaf_val[left_index] * model.scale
|
|
764
|
-
+
|
|
812
|
+
+ (
|
|
813
|
+
cb_bias[class_label]
|
|
814
|
+
if add_intercept_to_each_node
|
|
815
|
+
else 0
|
|
816
|
+
),
|
|
765
817
|
parent_id=prev_level_nodes[last_level_node_num],
|
|
766
818
|
position=0,
|
|
767
819
|
cover=0.0,
|
|
@@ -769,13 +821,18 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
769
821
|
mb.add_leaf(
|
|
770
822
|
tree_id=cur_tree_id,
|
|
771
823
|
response=cur_tree_leaf_val[right_index] * model.scale
|
|
772
|
-
+
|
|
824
|
+
+ (
|
|
825
|
+
cb_bias[class_label]
|
|
826
|
+
if add_intercept_to_each_node
|
|
827
|
+
else 0
|
|
828
|
+
),
|
|
773
829
|
parent_id=prev_level_nodes[last_level_node_num],
|
|
774
830
|
position=1,
|
|
775
831
|
cover=0.0,
|
|
776
832
|
)
|
|
777
833
|
else:
|
|
778
834
|
shap_ready = False
|
|
835
|
+
scale = booster.get_scale_and_bias()[0]
|
|
779
836
|
for class_label in range(n_tree_each_iter):
|
|
780
837
|
for i in range(model.n_iterations):
|
|
781
838
|
root_node = trees_explicit[i][0]
|
|
@@ -809,7 +866,12 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
809
866
|
else:
|
|
810
867
|
mb.add_leaf(
|
|
811
868
|
tree_id=cur_tree_id,
|
|
812
|
-
response=left_node.value[class_label]
|
|
869
|
+
response=scale * left_node.value[class_label]
|
|
870
|
+
+ (
|
|
871
|
+
cb_bias[class_label]
|
|
872
|
+
if add_intercept_to_each_node
|
|
873
|
+
else 0
|
|
874
|
+
),
|
|
813
875
|
parent_id=cur_node_id,
|
|
814
876
|
position=0,
|
|
815
877
|
cover=0.0,
|
|
@@ -830,7 +892,12 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
830
892
|
else:
|
|
831
893
|
mb.add_leaf(
|
|
832
894
|
tree_id=cur_tree_id,
|
|
833
|
-
response=cur_node.right.value[class_label]
|
|
895
|
+
response=scale * cur_node.right.value[class_label]
|
|
896
|
+
+ (
|
|
897
|
+
cb_bias[class_label]
|
|
898
|
+
if add_intercept_to_each_node
|
|
899
|
+
else 0
|
|
900
|
+
),
|
|
834
901
|
parent_id=cur_node_id,
|
|
835
902
|
position=1,
|
|
836
903
|
cover=0.0,
|
|
@@ -838,18 +905,18 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
|
|
|
838
905
|
|
|
839
906
|
else:
|
|
840
907
|
# Tree has only one node
|
|
908
|
+
# Note: the root node already has scale and bias added to it,
|
|
909
|
+
# so no need to add them again here like it is done for the leafs.
|
|
841
910
|
mb.add_leaf(
|
|
842
911
|
tree_id=cur_tree_id,
|
|
843
912
|
response=root_node.value[class_label],
|
|
844
913
|
cover=0.0,
|
|
845
914
|
)
|
|
846
915
|
|
|
847
|
-
if not
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
)
|
|
855
|
-
return mb.model(base_score=0.0)
|
|
916
|
+
if all_trees_are_empty and not model.is_symmetric_tree:
|
|
917
|
+
shap_ready = True
|
|
918
|
+
|
|
919
|
+
intercept = 0.0
|
|
920
|
+
if not add_intercept_to_each_node:
|
|
921
|
+
intercept = booster.get_scale_and_bias()[1]
|
|
922
|
+
return mb.model(base_score=intercept), shap_ready
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
# daal4py Model builders API
|
|
18
18
|
|
|
19
|
+
import warnings
|
|
19
20
|
from typing import Literal, Optional
|
|
20
21
|
|
|
21
22
|
import numpy as np
|
|
@@ -91,21 +92,25 @@ class GBTDAALBaseModel:
|
|
|
91
92
|
def _get_params_from_catboost(self, params):
|
|
92
93
|
if "class_params" in params["model_info"]:
|
|
93
94
|
self.n_classes_ = len(params["model_info"]["class_params"]["class_to_label"])
|
|
95
|
+
else:
|
|
96
|
+
self.n_classes_ = 1
|
|
94
97
|
self.n_features_in_ = len(params["features_info"]["float_features"])
|
|
95
98
|
|
|
96
99
|
def _convert_model_from_lightgbm(self, booster):
|
|
97
100
|
lgbm_params = get_lightgbm_params(booster)
|
|
98
101
|
self.daal_model_ = get_gbt_model_from_lightgbm(booster, lgbm_params)
|
|
99
102
|
self._get_params_from_lightgbm(lgbm_params)
|
|
103
|
+
self.supports_shap_ = self.n_classes_ < 3
|
|
100
104
|
|
|
101
105
|
def _convert_model_from_xgboost(self, booster):
|
|
102
106
|
xgb_params = get_xgboost_params(booster)
|
|
103
107
|
self.daal_model_ = get_gbt_model_from_xgboost(booster, xgb_params)
|
|
104
108
|
self._get_params_from_xgboost(xgb_params)
|
|
109
|
+
self.supports_shap_ = self.n_classes_ < 3
|
|
105
110
|
|
|
106
111
|
def _convert_model_from_catboost(self, booster):
|
|
107
112
|
catboost_params = get_catboost_params(booster)
|
|
108
|
-
self.daal_model_ = get_gbt_model_from_catboost(booster)
|
|
113
|
+
self.daal_model_, self.supports_shap_ = get_gbt_model_from_catboost(booster)
|
|
109
114
|
self._get_params_from_catboost(catboost_params)
|
|
110
115
|
|
|
111
116
|
def _convert_model(self, model):
|
|
@@ -249,21 +254,17 @@ class GBTDAALBaseModel:
|
|
|
249
254
|
X, fptype, pred_contribs, pred_interactions
|
|
250
255
|
)
|
|
251
256
|
except TypeError as e:
|
|
252
|
-
if "unexpected keyword argument 'resultsToCompute'" in str(e)
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
257
|
+
if "unexpected keyword argument 'resultsToCompute'" in str(e) and (
|
|
258
|
+
pred_contribs or pred_interactions
|
|
259
|
+
):
|
|
260
|
+
# SHAP values requested, but not supported by this version
|
|
261
|
+
raise TypeError(
|
|
262
|
+
f"{'pred_contribs' if pred_contribs else 'pred_interactions'} not supported by this version of daalp4y"
|
|
263
|
+
) from e
|
|
258
264
|
else:
|
|
259
265
|
# unknown type error
|
|
260
266
|
raise
|
|
261
267
|
|
|
262
|
-
# fallback to calculation without `resultsToCompute`
|
|
263
|
-
predict_algo = d4p.gbt_regression_prediction(fptype=fptype)
|
|
264
|
-
predict_result = predict_algo.compute(X, self.daal_model_)
|
|
265
|
-
return predict_result.prediction.ravel()
|
|
266
|
-
|
|
267
268
|
def _predict_regression_with_results_to_compute(
|
|
268
269
|
self, X, fptype, pred_contribs=False, pred_interactions=False
|
|
269
270
|
):
|
|
@@ -316,6 +317,15 @@ class GBTDAALModel(GBTDAALBaseModel):
|
|
|
316
317
|
model : booster object from another library
|
|
317
318
|
The fitted GBT model from which this object will be created. See rest of the documentation
|
|
318
319
|
for supported input types.
|
|
320
|
+
|
|
321
|
+
Attributes
|
|
322
|
+
----------
|
|
323
|
+
is_classifier_ : bool
|
|
324
|
+
Whether this is a classification model.
|
|
325
|
+
is_regressor_ : bool
|
|
326
|
+
Whether this is a regression model.
|
|
327
|
+
supports_shap_ : bool
|
|
328
|
+
Whether the model supports SHAP calculations.
|
|
319
329
|
"""
|
|
320
330
|
|
|
321
331
|
def __init__(self, model):
|
|
@@ -345,14 +355,19 @@ class GBTDAALModel(GBTDAALBaseModel):
|
|
|
345
355
|
|
|
346
356
|
:rtype: np.ndarray
|
|
347
357
|
"""
|
|
358
|
+
if pred_contribs or pred_interactions:
|
|
359
|
+
if not self.supports_shap_:
|
|
360
|
+
raise TypeError("SHAP calculations are not available for this model.")
|
|
361
|
+
if self.model_type == "catboost":
|
|
362
|
+
warnings.warn(
|
|
363
|
+
"SHAP values from models converted from CatBoost do not match "
|
|
364
|
+
"against those of the original library. See "
|
|
365
|
+
"https://github.com/catboost/catboost/issues/2556 for more details."
|
|
366
|
+
)
|
|
348
367
|
fptype = getFPType(X)
|
|
349
368
|
if self._is_regression:
|
|
350
369
|
return self._predict_regression(X, fptype, pred_contribs, pred_interactions)
|
|
351
370
|
else:
|
|
352
|
-
if (pred_contribs or pred_interactions) and self.model_type != "xgboost":
|
|
353
|
-
raise NotImplementedError(
|
|
354
|
-
f"{'pred_contribs' if pred_contribs else 'pred_interactions'} is not implemented for classification models"
|
|
355
|
-
)
|
|
356
371
|
return self._predict_classification(
|
|
357
372
|
X, fptype, "computeClassLabels", pred_contribs, pred_interactions
|
|
358
373
|
)
|
|
Binary file
|
|
@@ -188,7 +188,7 @@ def enable(name=None, verbose=True, deprecation=True, get_map=_get_map_of_algori
|
|
|
188
188
|
if verbose and deprecation and sys.stderr is not None:
|
|
189
189
|
sys.stderr.write(
|
|
190
190
|
"oneAPI Data Analytics Library solvers for sklearn enabled: "
|
|
191
|
-
"https://
|
|
191
|
+
"https://uxlfoundation.github.io/scikit-learn-intelex/\n"
|
|
192
192
|
)
|
|
193
193
|
|
|
194
194
|
|
onedal/_device_offload.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
# ==============================================================================
|
|
16
16
|
|
|
17
17
|
import inspect
|
|
18
|
+
import logging
|
|
18
19
|
from collections.abc import Iterable
|
|
19
20
|
from functools import wraps
|
|
20
21
|
|
|
@@ -34,6 +35,8 @@ else:
|
|
|
34
35
|
|
|
35
36
|
SyclQueue = getattr(_dpc_backend, "SyclQueue", None)
|
|
36
37
|
|
|
38
|
+
logger = logging.getLogger("sklearnex")
|
|
39
|
+
|
|
37
40
|
|
|
38
41
|
def supports_queue(func):
|
|
39
42
|
"""
|
|
@@ -158,12 +161,17 @@ def support_input_format(func):
|
|
|
158
161
|
else:
|
|
159
162
|
self = None
|
|
160
163
|
|
|
161
|
-
#
|
|
164
|
+
# KNeighbors*.fit can not be used with raw inputs, ignore `use_raw_input=True`
|
|
162
165
|
override_raw_input = (
|
|
163
166
|
self
|
|
164
167
|
and self.__class__.__name__ in ("KNeighborsClassifier", "KNeighborsRegressor")
|
|
165
168
|
and func.__name__ == "fit"
|
|
166
169
|
)
|
|
170
|
+
if override_raw_input:
|
|
171
|
+
pretty_name = f"{self.__class__.__name__}.{func.__name__}"
|
|
172
|
+
logger.warning(
|
|
173
|
+
f"Using raw inputs is not supported for {pretty_name}. Ignoring `use_raw_input=True` setting."
|
|
174
|
+
)
|
|
167
175
|
if _get_config()["use_raw_input"] is True and not override_raw_input:
|
|
168
176
|
if "queue" not in kwargs:
|
|
169
177
|
usm_iface = getattr(args[0], "__sycl_usm_array_interface__", None)
|
|
@@ -190,9 +198,9 @@ def support_input_format(func):
|
|
|
190
198
|
result = _convert_to_dpnp(result)
|
|
191
199
|
return result
|
|
192
200
|
|
|
193
|
-
if
|
|
201
|
+
if get_config().get("transform_output") in ("default", None):
|
|
194
202
|
input_array_api = getattr(data[0], "__array_namespace__", lambda: None)()
|
|
195
|
-
if input_array_api:
|
|
203
|
+
if input_array_api and not _is_numpy_namespace(input_array_api):
|
|
196
204
|
input_array_api_device = data[0].device
|
|
197
205
|
result = _asarray(result, input_array_api, device=input_array_api_device)
|
|
198
206
|
return result
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -63,6 +63,8 @@ def get_memory_usm():
|
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
def is_dpctl_device_available(targets):
|
|
66
|
+
if not isinstance(targets, (list, tuple)):
|
|
67
|
+
raise TypeError("`targets` should be a list or tuple of strings.")
|
|
66
68
|
if dpctl_available:
|
|
67
69
|
for device in targets:
|
|
68
70
|
if device == "cpu" and not dpctl.has_cpu_devices():
|
onedal/utils/_array_api.py
CHANGED
|
@@ -40,9 +40,19 @@ if dpnp_available:
|
|
|
40
40
|
return array
|
|
41
41
|
|
|
42
42
|
|
|
43
|
+
def _supports_buffer_protocol(obj):
|
|
44
|
+
# the array_api standard mandates conversion with the buffer protocol,
|
|
45
|
+
# which can only be checked via a try-catch in native python
|
|
46
|
+
try:
|
|
47
|
+
memoryview(obj)
|
|
48
|
+
except TypeError:
|
|
49
|
+
return False
|
|
50
|
+
return True
|
|
51
|
+
|
|
52
|
+
|
|
43
53
|
def _asarray(data, xp, *args, **kwargs):
|
|
44
54
|
"""Converted input object to array format of xp namespace provided."""
|
|
45
|
-
if hasattr(data, "__array_namespace__"):
|
|
55
|
+
if hasattr(data, "__array_namespace__") or _supports_buffer_protocol(data):
|
|
46
56
|
return xp.asarray(data, *args, **kwargs)
|
|
47
57
|
elif isinstance(data, Iterable):
|
|
48
58
|
if isinstance(data, tuple):
|
|
@@ -26,6 +26,11 @@ else:
|
|
|
26
26
|
|
|
27
27
|
SyclQueue = getattr(_dpc_backend, "SyclQueue", None)
|
|
28
28
|
|
|
29
|
+
# This special object signifies that the queue system should be
|
|
30
|
+
# disabled. It will force computation to host. This occurs when the
|
|
31
|
+
# global queue is set to this value (and therefore should not be
|
|
32
|
+
# modified).
|
|
33
|
+
__fallback_queue = object()
|
|
29
34
|
# single instance of global queue
|
|
30
35
|
__global_queue = None
|
|
31
36
|
|
|
@@ -46,8 +51,11 @@ def __create_sycl_queue(target):
|
|
|
46
51
|
def get_global_queue():
|
|
47
52
|
"""Get the global queue. Retrieve it from the config if not set."""
|
|
48
53
|
if (queue := __global_queue) is not None:
|
|
49
|
-
if SyclQueue
|
|
50
|
-
|
|
54
|
+
if SyclQueue:
|
|
55
|
+
if queue is __fallback_queue:
|
|
56
|
+
return None
|
|
57
|
+
elif not isinstance(queue, SyclQueue):
|
|
58
|
+
raise ValueError("Global queue is not a SyclQueue object.")
|
|
51
59
|
return queue
|
|
52
60
|
|
|
53
61
|
target = _get_config()["target_offload"]
|
|
@@ -73,6 +81,12 @@ def update_global_queue(queue):
|
|
|
73
81
|
__global_queue = queue
|
|
74
82
|
|
|
75
83
|
|
|
84
|
+
def fallback_to_host():
|
|
85
|
+
"""Enforce a host queue."""
|
|
86
|
+
global __global_queue
|
|
87
|
+
__global_queue = __fallback_queue
|
|
88
|
+
|
|
89
|
+
|
|
76
90
|
def from_data(*data):
|
|
77
91
|
"""Extract the queue from provided data. This updates the global queue as well."""
|
|
78
92
|
for item in data:
|
{scikit_learn_intelex-2025.5.0.dist-info → scikit_learn_intelex-2025.6.0.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: scikit-learn-intelex
|
|
3
|
-
Version: 2025.
|
|
3
|
+
Version: 2025.6.0
|
|
4
4
|
Summary: Intel(R) Extension for Scikit-learn is a seamless way to speed up your Scikit-learn application.
|
|
5
5
|
Home-page: https://github.com/intel/scikit-learn-intelex
|
|
6
6
|
Author: Intel Corporation
|
|
@@ -17,7 +17,6 @@ Classifier: Environment :: Console
|
|
|
17
17
|
Classifier: Intended Audience :: Developers
|
|
18
18
|
Classifier: Intended Audience :: Other Audience
|
|
19
19
|
Classifier: Intended Audience :: Science/Research
|
|
20
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
21
20
|
Classifier: Operating System :: Microsoft :: Windows
|
|
22
21
|
Classifier: Operating System :: POSIX :: Linux
|
|
23
22
|
Classifier: Programming Language :: Python :: 3.9
|
|
@@ -31,7 +30,7 @@ Classifier: Topic :: Software Development
|
|
|
31
30
|
Requires-Python: >=3.7
|
|
32
31
|
Description-Content-Type: text/markdown
|
|
33
32
|
License-File: LICENSE.txt
|
|
34
|
-
Requires-Dist: daal (==2025.
|
|
33
|
+
Requires-Dist: daal (==2025.6.0)
|
|
35
34
|
Requires-Dist: numpy (>=1.19)
|
|
36
35
|
Requires-Dist: scikit-learn (>=0.22)
|
|
37
36
|
|