scikit-learn-intelex 2025.5.0__py311-none-manylinux_2_28_x86_64.whl → 2025.6.1__py311-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (52) hide show
  1. daal4py/_daal4py.cpython-311-x86_64-linux-gnu.so +0 -0
  2. daal4py/mb/gbt_convertors.py +119 -52
  3. daal4py/mb/tree_based_builders.py +31 -16
  4. daal4py/mpi_transceiver.cpython-311-x86_64-linux-gnu.so +0 -0
  5. daal4py/sklearn/ensemble/_forest.py +2 -2
  6. daal4py/sklearn/manifold/_t_sne.py +7 -1
  7. daal4py/sklearn/metrics/_pairwise.py +1 -1
  8. daal4py/sklearn/monkeypatch/dispatcher.py +1 -1
  9. onedal/_device_offload.py +11 -3
  10. onedal/_onedal_py_dpc.cpython-311-x86_64-linux-gnu.so +0 -0
  11. onedal/_onedal_py_host.cpython-311-x86_64-linux-gnu.so +0 -0
  12. onedal/_onedal_py_spmd_dpc.cpython-311-x86_64-linux-gnu.so +0 -0
  13. onedal/ensemble/forest.py +3 -1
  14. onedal/tests/utils/_device_selection.py +2 -0
  15. onedal/utils/_array_api.py +17 -2
  16. onedal/utils/_sycl_queue_manager.py +16 -2
  17. {scikit_learn_intelex-2025.5.0.dist-info → scikit_learn_intelex-2025.6.1.dist-info}/METADATA +2 -3
  18. {scikit_learn_intelex-2025.5.0.dist-info → scikit_learn_intelex-2025.6.1.dist-info}/RECORD +51 -51
  19. sklearnex/_device_offload.py +140 -89
  20. sklearnex/_utils.py +12 -45
  21. sklearnex/base.py +109 -0
  22. sklearnex/basic_statistics/basic_statistics.py +3 -2
  23. sklearnex/basic_statistics/incremental_basic_statistics.py +3 -6
  24. sklearnex/cluster/dbscan.py +4 -3
  25. sklearnex/cluster/k_means.py +18 -13
  26. sklearnex/covariance/incremental_covariance.py +10 -5
  27. sklearnex/decomposition/pca.py +2 -3
  28. sklearnex/ensemble/_forest.py +5 -4
  29. sklearnex/linear_model/coordinate_descent.py +12 -0
  30. sklearnex/linear_model/incremental_linear.py +2 -2
  31. sklearnex/linear_model/incremental_ridge.py +3 -8
  32. sklearnex/linear_model/linear.py +3 -7
  33. sklearnex/linear_model/logistic_regression.py +11 -13
  34. sklearnex/linear_model/ridge.py +3 -2
  35. sklearnex/manifold/t_sne.py +6 -3
  36. sklearnex/neighbors/common.py +3 -2
  37. sklearnex/preview/covariance/covariance.py +3 -6
  38. sklearnex/preview/decomposition/incremental_pca.py +3 -6
  39. sklearnex/svm/_common.py +3 -3
  40. sklearnex/svm/nusvc.py +1 -1
  41. sklearnex/svm/nusvr.py +1 -1
  42. sklearnex/svm/svc.py +1 -1
  43. sklearnex/svm/svr.py +1 -1
  44. sklearnex/tests/test_common.py +25 -0
  45. sklearnex/tests/test_config.py +62 -0
  46. sklearnex/tests/test_memory_usage.py +3 -2
  47. sklearnex/tests/test_patching.py +89 -60
  48. sklearnex/tests/test_run_to_run_stability.py +7 -0
  49. daal4py/doc/third-party-programs.txt +0 -424
  50. {scikit_learn_intelex-2025.5.0.dist-info → scikit_learn_intelex-2025.6.1.dist-info}/LICENSE.txt +0 -0
  51. {scikit_learn_intelex-2025.5.0.dist-info → scikit_learn_intelex-2025.6.1.dist-info}/WHEEL +0 -0
  52. {scikit_learn_intelex-2025.5.0.dist-info → scikit_learn_intelex-2025.6.1.dist-info}/top_level.txt +0 -0
@@ -19,7 +19,6 @@ from collections import deque
19
19
  from copy import deepcopy
20
20
  from tempfile import NamedTemporaryFile
21
21
  from typing import Any, Deque, Dict, List, Optional, Tuple
22
- from warnings import warn
23
22
 
24
23
  import numpy as np
25
24
 
@@ -94,18 +93,9 @@ class CatBoostModelData:
94
93
  else:
95
94
  return len(self.trees)
96
95
 
97
- @property
98
- def bias(self):
99
- if self.is_classification:
100
- return 0
101
- return self.__data["scale_and_bias"][1][0] / self.n_iterations
102
-
103
96
  @property
104
97
  def scale(self):
105
- if self.is_classification:
106
- return 1
107
- else:
108
- return self.__data["scale_and_bias"][0]
98
+ return self.__data["scale_and_bias"][0]
109
99
 
110
100
  @property
111
101
  def default_left(self):
@@ -223,7 +213,7 @@ class Node:
223
213
  return self.__feature
224
214
  if isinstance(self.__feature, str) and self.__feature.isnumeric():
225
215
  return int(self.__feature)
226
- raise ValueError(
216
+ raise AttributeError(
227
217
  f"Feature names must be integers (got ({type(self.__feature)}){self.__feature})"
228
218
  )
229
219
 
@@ -242,15 +232,15 @@ class TreeView:
242
232
  @property
243
233
  def value(self) -> float:
244
234
  if not self.is_leaf:
245
- raise ValueError("Tree is not a leaf-only tree")
235
+ raise AttributeError("Tree is not a leaf-only tree")
246
236
  if self.root_node.value is None:
247
- raise ValueError("Tree is leaf-only but leaf node has no value")
237
+ raise AttributeError("Tree is leaf-only but leaf node has no value")
248
238
  return self.root_node.value
249
239
 
250
240
  @property
251
241
  def cover(self) -> float:
252
242
  if not self.is_leaf:
253
- raise ValueError("Tree is not a leaf-only tree")
243
+ raise AttributeError("Tree is not a leaf-only tree")
254
244
  return self.root_node.cover
255
245
 
256
246
  @property
@@ -427,6 +417,18 @@ def get_gbt_model_from_tree_list(
427
417
 
428
418
 
429
419
  def get_gbt_model_from_lightgbm(model: Any, booster=None) -> Any:
420
+ model_str = model.model_to_string()
421
+ if "is_linear=1" in model_str:
422
+ raise TypeError("Linear trees are not supported.")
423
+ if "[boosting: dart]" in model_str:
424
+ raise TypeError("'Dart' booster is not supported.")
425
+ if "[boosting: rf]" in model_str:
426
+ raise TypeError("Random forest boosters are not supported.")
427
+ if ("[objective: lambdarank]" in model_str) or (
428
+ "[objective: rank_xendcg]" in model_str
429
+ ):
430
+ raise TypeError("Ranking objectives are not supported.")
431
+
430
432
  if booster is None:
431
433
  booster = model.dump_model()
432
434
 
@@ -437,9 +439,9 @@ def get_gbt_model_from_lightgbm(model: Any, booster=None) -> Any:
437
439
  is_regression = False
438
440
  objective_fun = booster["objective"]
439
441
  if n_classes > 2:
440
- if "multiclass" not in objective_fun:
442
+ if ("ova" in objective_fun) or ("ovr" in objective_fun):
441
443
  raise TypeError(
442
- "multiclass (softmax) objective is only supported for multiclass classification"
444
+ "Only multiclass (softmax) objective is supported for multiclass classification"
443
445
  )
444
446
  elif "binary" in objective_fun: # nClasses == 1
445
447
  n_classes = 2
@@ -473,6 +475,13 @@ def get_gbt_model_from_xgboost(booster: Any, xgb_config=None) -> Any:
473
475
  if xgb_config is None:
474
476
  xgb_config = get_xgboost_params(booster)
475
477
 
478
+ if xgb_config["learner"]["learner_train_param"]["booster"] != "gbtree":
479
+ raise TypeError("Only 'gbtree' booster type is supported.")
480
+
481
+ n_targets = xgb_config["learner"]["learner_model_param"].get("num_target")
482
+ if n_targets is not None and int(n_targets) > 1:
483
+ raise TypeError("Multi-target boosters are not supported.")
484
+
476
485
  n_features = int(xgb_config["learner"]["learner_model_param"]["num_feature"])
477
486
  n_classes = int(xgb_config["learner"]["learner_model_param"]["num_class"])
478
487
  base_score = float(xgb_config["learner"]["learner_model_param"]["base_score"])
@@ -504,11 +513,6 @@ def get_gbt_model_from_xgboost(booster: Any, xgb_config=None) -> Any:
504
513
  if objective_fun == "binary:logitraw":
505
514
  # daal4py always applies a sigmoid for pred_proba, wheres XGBoost
506
515
  # returns raw predictions with logitraw
507
- warn(
508
- "objective='binary:logitraw' selected\n"
509
- "XGBoost returns raw class scores when calling pred_proba()\n"
510
- "whilst scikit-learn-intelex always uses binary:logistic\n"
511
- )
512
516
  base_score = float(1 / (1 + np.exp(-base_score)))
513
517
  else:
514
518
  is_regression = True
@@ -567,6 +571,22 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
567
571
  "Categorical features are not supported in daal4py Gradient Boosting Trees"
568
572
  )
569
573
 
574
+ objective = booster.get_params().get("objective", "")
575
+ if (
576
+ "Rank" in objective
577
+ or "Query" in objective
578
+ or "Pair" in objective
579
+ or objective in ["LambdaMart", "StochasticFilter", "GroupQuantile"]
580
+ ):
581
+ raise TypeError("Ranking objectives are not supported.")
582
+ if "Multi" in objective and objective != "MultiClass":
583
+ if model.is_classification:
584
+ raise TypeError(
585
+ "Only 'MultiClass' loss is supported for multi-class classification."
586
+ )
587
+ else:
588
+ raise TypeError("Multi-output models are not supported.")
589
+
570
590
  if model.is_classification:
571
591
  mb = gbt_clf_model_builder(
572
592
  n_features=model.n_features,
@@ -587,21 +607,37 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
587
607
  {"feature_index": feature["feature_index"], "value": feature_border}
588
608
  )
589
609
 
610
+ # Note: catboost models might have a 'bias' (intercept) which gets added
611
+ # to all predictions. In the case of single-output models, this is a scalar,
612
+ # but in the case of multi-output models such as multinomial logistic, it
613
+ # is a vector. Since daal4py doesn't support vector-valued intercepts, this
614
+ # adds the intercept to every terminal node instead, by dividing it equally
615
+ # among all trees. Usually, catboost would anyway set them to zero, but it
616
+ # still allows setting custom intercepts.
617
+ cb_bias = booster.get_scale_and_bias()[1]
618
+ add_intercept_to_each_node = isinstance(cb_bias, list)
619
+ if add_intercept_to_each_node:
620
+ cb_bias = np.array(cb_bias) / model.n_iterations
621
+ if not model.is_classification:
622
+ raise TypeError("Multi-output regression models are not supported.")
623
+
624
+ def add_vector_bias(values: list[float]) -> list[float]:
625
+ return list(np.array(values) + cb_bias)
626
+
590
627
  trees_explicit = []
591
628
  tree_symmetric = []
592
629
 
630
+ all_trees_are_empty = True
631
+
593
632
  if model.is_symmetric_tree:
594
633
  for tree in model.oblivious_trees:
595
- cur_tree_depth = len(tree.get("splits", []))
634
+ tree_splits = tree.get("splits", [])
635
+ cur_tree_depth = len(tree_splits) if tree_splits is not None else 0
596
636
  tree_symmetric.append((tree, cur_tree_depth))
597
637
  else:
598
638
  for tree in model.trees:
599
639
  n_nodes = 1
600
- if "split" not in tree:
601
- # handle leaf node
602
- values = __get_value_as_list(tree)
603
- root_node = CatBoostNode(value=[value * model.scale for value in values])
604
- continue
640
+
605
641
  # Check if node is a leaf (in case of stump)
606
642
  if "split" in tree:
607
643
  # Get number of trees and splits info via BFS
@@ -622,12 +658,15 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
622
658
  nodes_queue.append((cur_node_data["left"], left_node))
623
659
  nodes_queue.append((cur_node_data["right"], right_node))
624
660
  n_nodes += 2
661
+ all_trees_are_empty = False
625
662
  else:
626
663
  root_node = CatBoostNode()
627
664
  if model.is_classification and model.n_classes > 2:
628
665
  root_node.value = [value * model.scale for value in tree["value"]]
666
+ if add_intercept_to_each_node:
667
+ root_node.value = add_vector_bias(root_node.value)
629
668
  else:
630
- root_node.value = [tree["value"] * model.scale + model.bias]
669
+ root_node.value = [tree["value"] * model.scale]
631
670
  trees_explicit.append((root_node, n_nodes))
632
671
 
633
672
  tree_id = []
@@ -646,9 +685,15 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
646
685
  for i in range(model.n_iterations):
647
686
  for _ in range(n_tree_each_iter):
648
687
  if model.is_symmetric_tree:
649
- n_nodes = 2 ** (tree_symmetric[i][1] + 1) - 1
688
+ if not len(tree_symmetric):
689
+ n_nodes = 1
690
+ else:
691
+ n_nodes = 2 ** (tree_symmetric[i][1] + 1) - 1
650
692
  else:
651
- n_nodes = trees_explicit[i][1]
693
+ if not len(trees_explicit):
694
+ n_nodes = 1
695
+ else:
696
+ n_nodes = trees_explicit[i][1]
652
697
 
653
698
  if model.is_classification and model.n_classes > 2:
654
699
  tree_id.append(mb.create_tree(n_nodes, class_label))
@@ -663,9 +708,9 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
663
708
  tree_id.append(mb.create_tree(n_nodes))
664
709
 
665
710
  if model.is_symmetric_tree:
711
+ shap_ready = True # this code branch provides all info for SHAP values
666
712
  for class_label in range(n_tree_each_iter):
667
713
  for i in range(model.n_iterations):
668
- shap_ready = True # this code branch provides all info for SHAP values
669
714
  cur_tree_info = tree_symmetric[i][0]
670
715
  cur_tree_id = tree_id[i * n_tree_each_iter + class_label]
671
716
  cur_tree_leaf_val = cur_tree_info["leaf_values"]
@@ -674,7 +719,8 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
674
719
  if cur_tree_depth == 0:
675
720
  mb.add_leaf(
676
721
  tree_id=cur_tree_id,
677
- response=cur_tree_leaf_val[0],
722
+ response=cur_tree_leaf_val[class_label] * model.scale
723
+ + (cb_bias[class_label] if add_intercept_to_each_node else 0),
678
724
  cover=cur_tree_leaf_weights[0],
679
725
  )
680
726
  else:
@@ -686,6 +732,7 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
686
732
  cur_tree_leaf_weights
687
733
  )
688
734
  root_weight = cur_tree_weights_per_level[0][0]
735
+
689
736
  root_id = mb.add_split(
690
737
  tree_id=cur_tree_id,
691
738
  feature_index=cur_level_split["feature_index"],
@@ -704,6 +751,9 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
704
751
  cur_level_split = splits[
705
752
  cur_tree_info["splits"][cur_level]["split_index"]
706
753
  ]
754
+ cover_nodes = next_level_weights[cur_level_node_index]
755
+ if cover_nodes == 0:
756
+ shap_ready = False
707
757
  cur_left_node = mb.add_split(
708
758
  tree_id=cur_tree_id,
709
759
  parent_id=cur_parent,
@@ -711,7 +761,7 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
711
761
  feature_index=cur_level_split["feature_index"],
712
762
  feature_value=cur_level_split["value"],
713
763
  default_left=model.default_left,
714
- cover=next_level_weights[cur_level_node_index],
764
+ cover=cover_nodes,
715
765
  )
716
766
  # cur_level_node_index += 1
717
767
  cur_right_node = mb.add_split(
@@ -721,7 +771,7 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
721
771
  feature_index=cur_level_split["feature_index"],
722
772
  feature_value=cur_level_split["value"],
723
773
  default_left=model.default_left,
724
- cover=next_level_weights[cur_level_node_index],
774
+ cover=cover_nodes,
725
775
  )
726
776
  # cur_level_node_index += 1
727
777
  cur_level_nodes.append(cur_left_node)
@@ -734,8 +784,7 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
734
784
  mb.add_leaf(
735
785
  tree_id=cur_tree_id,
736
786
  response=cur_tree_leaf_val[2 * last_level_node_num]
737
- * model.scale
738
- + model.bias,
787
+ * model.scale,
739
788
  parent_id=prev_level_nodes[last_level_node_num],
740
789
  position=0,
741
790
  cover=cur_tree_leaf_weights[2 * last_level_node_num],
@@ -743,8 +792,7 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
743
792
  mb.add_leaf(
744
793
  tree_id=cur_tree_id,
745
794
  response=cur_tree_leaf_val[2 * last_level_node_num + 1]
746
- * model.scale
747
- + model.bias,
795
+ * model.scale,
748
796
  parent_id=prev_level_nodes[last_level_node_num],
749
797
  position=1,
750
798
  cover=cur_tree_leaf_weights[2 * last_level_node_num + 1],
@@ -761,7 +809,11 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
761
809
  mb.add_leaf(
762
810
  tree_id=cur_tree_id,
763
811
  response=cur_tree_leaf_val[left_index] * model.scale
764
- + model.bias,
812
+ + (
813
+ cb_bias[class_label]
814
+ if add_intercept_to_each_node
815
+ else 0
816
+ ),
765
817
  parent_id=prev_level_nodes[last_level_node_num],
766
818
  position=0,
767
819
  cover=0.0,
@@ -769,13 +821,18 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
769
821
  mb.add_leaf(
770
822
  tree_id=cur_tree_id,
771
823
  response=cur_tree_leaf_val[right_index] * model.scale
772
- + model.bias,
824
+ + (
825
+ cb_bias[class_label]
826
+ if add_intercept_to_each_node
827
+ else 0
828
+ ),
773
829
  parent_id=prev_level_nodes[last_level_node_num],
774
830
  position=1,
775
831
  cover=0.0,
776
832
  )
777
833
  else:
778
834
  shap_ready = False
835
+ scale = booster.get_scale_and_bias()[0]
779
836
  for class_label in range(n_tree_each_iter):
780
837
  for i in range(model.n_iterations):
781
838
  root_node = trees_explicit[i][0]
@@ -809,7 +866,12 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
809
866
  else:
810
867
  mb.add_leaf(
811
868
  tree_id=cur_tree_id,
812
- response=left_node.value[class_label],
869
+ response=scale * left_node.value[class_label]
870
+ + (
871
+ cb_bias[class_label]
872
+ if add_intercept_to_each_node
873
+ else 0
874
+ ),
813
875
  parent_id=cur_node_id,
814
876
  position=0,
815
877
  cover=0.0,
@@ -830,7 +892,12 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
830
892
  else:
831
893
  mb.add_leaf(
832
894
  tree_id=cur_tree_id,
833
- response=cur_node.right.value[class_label],
895
+ response=scale * cur_node.right.value[class_label]
896
+ + (
897
+ cb_bias[class_label]
898
+ if add_intercept_to_each_node
899
+ else 0
900
+ ),
834
901
  parent_id=cur_node_id,
835
902
  position=1,
836
903
  cover=0.0,
@@ -838,18 +905,18 @@ def get_gbt_model_from_catboost(booster: Any) -> Any:
838
905
 
839
906
  else:
840
907
  # Tree has only one node
908
+ # Note: the root node already has scale and bias added to it,
909
+ # so no need to add them again here like it is done for the leafs.
841
910
  mb.add_leaf(
842
911
  tree_id=cur_tree_id,
843
912
  response=root_node.value[class_label],
844
913
  cover=0.0,
845
914
  )
846
915
 
847
- if not shap_ready:
848
- warn("Converted models of this type do not support SHAP value calculation")
849
- else:
850
- warn(
851
- "CatBoost SHAP values seem to be incorrect. "
852
- "Values from converted models will differ. "
853
- "See https://github.com/catboost/catboost/issues/2556 for more details."
854
- )
855
- return mb.model(base_score=0.0)
916
+ if all_trees_are_empty and not model.is_symmetric_tree:
917
+ shap_ready = True
918
+
919
+ intercept = 0.0
920
+ if not add_intercept_to_each_node:
921
+ intercept = booster.get_scale_and_bias()[1]
922
+ return mb.model(base_score=intercept), shap_ready
@@ -16,6 +16,7 @@
16
16
 
17
17
  # daal4py Model builders API
18
18
 
19
+ import warnings
19
20
  from typing import Literal, Optional
20
21
 
21
22
  import numpy as np
@@ -91,21 +92,25 @@ class GBTDAALBaseModel:
91
92
  def _get_params_from_catboost(self, params):
92
93
  if "class_params" in params["model_info"]:
93
94
  self.n_classes_ = len(params["model_info"]["class_params"]["class_to_label"])
95
+ else:
96
+ self.n_classes_ = 1
94
97
  self.n_features_in_ = len(params["features_info"]["float_features"])
95
98
 
96
99
  def _convert_model_from_lightgbm(self, booster):
97
100
  lgbm_params = get_lightgbm_params(booster)
98
101
  self.daal_model_ = get_gbt_model_from_lightgbm(booster, lgbm_params)
99
102
  self._get_params_from_lightgbm(lgbm_params)
103
+ self.supports_shap_ = self.n_classes_ < 3
100
104
 
101
105
  def _convert_model_from_xgboost(self, booster):
102
106
  xgb_params = get_xgboost_params(booster)
103
107
  self.daal_model_ = get_gbt_model_from_xgboost(booster, xgb_params)
104
108
  self._get_params_from_xgboost(xgb_params)
109
+ self.supports_shap_ = self.n_classes_ < 3
105
110
 
106
111
  def _convert_model_from_catboost(self, booster):
107
112
  catboost_params = get_catboost_params(booster)
108
- self.daal_model_ = get_gbt_model_from_catboost(booster)
113
+ self.daal_model_, self.supports_shap_ = get_gbt_model_from_catboost(booster)
109
114
  self._get_params_from_catboost(catboost_params)
110
115
 
111
116
  def _convert_model(self, model):
@@ -249,21 +254,17 @@ class GBTDAALBaseModel:
249
254
  X, fptype, pred_contribs, pred_interactions
250
255
  )
251
256
  except TypeError as e:
252
- if "unexpected keyword argument 'resultsToCompute'" in str(e):
253
- if pred_contribs or pred_interactions:
254
- # SHAP values requested, but not supported by this version
255
- raise TypeError(
256
- f"{'pred_contribs' if pred_contribs else 'pred_interactions'} not supported by this version of daalp4y"
257
- ) from e
257
+ if "unexpected keyword argument 'resultsToCompute'" in str(e) and (
258
+ pred_contribs or pred_interactions
259
+ ):
260
+ # SHAP values requested, but not supported by this version
261
+ raise TypeError(
262
+ f"{'pred_contribs' if pred_contribs else 'pred_interactions'} not supported by this version of daalp4y"
263
+ ) from e
258
264
  else:
259
265
  # unknown type error
260
266
  raise
261
267
 
262
- # fallback to calculation without `resultsToCompute`
263
- predict_algo = d4p.gbt_regression_prediction(fptype=fptype)
264
- predict_result = predict_algo.compute(X, self.daal_model_)
265
- return predict_result.prediction.ravel()
266
-
267
268
  def _predict_regression_with_results_to_compute(
268
269
  self, X, fptype, pred_contribs=False, pred_interactions=False
269
270
  ):
@@ -316,6 +317,15 @@ class GBTDAALModel(GBTDAALBaseModel):
316
317
  model : booster object from another library
317
318
  The fitted GBT model from which this object will be created. See rest of the documentation
318
319
  for supported input types.
320
+
321
+ Attributes
322
+ ----------
323
+ is_classifier_ : bool
324
+ Whether this is a classification model.
325
+ is_regressor_ : bool
326
+ Whether this is a regression model.
327
+ supports_shap_ : bool
328
+ Whether the model supports SHAP calculations.
319
329
  """
320
330
 
321
331
  def __init__(self, model):
@@ -345,14 +355,19 @@ class GBTDAALModel(GBTDAALBaseModel):
345
355
 
346
356
  :rtype: np.ndarray
347
357
  """
358
+ if pred_contribs or pred_interactions:
359
+ if not self.supports_shap_:
360
+ raise TypeError("SHAP calculations are not available for this model.")
361
+ if self.model_type == "catboost":
362
+ warnings.warn(
363
+ "SHAP values from models converted from CatBoost do not match "
364
+ "against those of the original library. See "
365
+ "https://github.com/catboost/catboost/issues/2556 for more details."
366
+ )
348
367
  fptype = getFPType(X)
349
368
  if self._is_regression:
350
369
  return self._predict_regression(X, fptype, pred_contribs, pred_interactions)
351
370
  else:
352
- if (pred_contribs or pred_interactions) and self.model_type != "xgboost":
353
- raise NotImplementedError(
354
- f"{'pred_contribs' if pred_contribs else 'pred_interactions'} is not implemented for classification models"
355
- )
356
371
  return self._predict_classification(
357
372
  X, fptype, "computeClassLabels", pred_contribs, pred_interactions
358
373
  )
@@ -679,8 +679,8 @@ class RandomForestClassifier(RandomForestClassifier_original, RandomForestBase):
679
679
  dfc_predictionResult = dfc_algorithm.compute(X, self.daal_model_)
680
680
 
681
681
  pred = dfc_predictionResult.probabilities
682
-
683
- return pred
682
+ # TODO: fix probabilities out of [0, 1] interval on oneDAL side
683
+ return pred.clip(0.0, 1.0)
684
684
 
685
685
  def _daal_fit_classifier(self, X, y, sample_weight=None):
686
686
  y = check_array(y, ensure_2d=False, dtype=None)
@@ -66,7 +66,13 @@ class TSNE(BaseTSNE):
66
66
  [n_samples],
67
67
  [P.nnz],
68
68
  [self.n_iter_without_progress],
69
- [self._max_iter if sklearn_check_version("1.5") else self.n_iter],
69
+ [
70
+ (
71
+ self.max_iter
72
+ if sklearn_check_version("1.7")
73
+ else (self._max_iter if sklearn_check_version("1.5") else self.n_iter)
74
+ )
75
+ ],
70
76
  ]
71
77
 
72
78
  # Pass params to daal4py backend
@@ -18,6 +18,7 @@ import warnings
18
18
  from functools import partial
19
19
 
20
20
  import numpy as np
21
+ from joblib import effective_n_jobs
21
22
  from sklearn.exceptions import DataConversionWarning
22
23
  from sklearn.metrics import pairwise_distances as pairwise_distances_original
23
24
  from sklearn.metrics.pairwise import (
@@ -28,7 +29,6 @@ from sklearn.metrics.pairwise import (
28
29
  _parallel_pairwise,
29
30
  check_pairwise_arrays,
30
31
  )
31
- from sklearn.utils._joblib import effective_n_jobs
32
32
  from sklearn.utils.validation import check_non_negative
33
33
 
34
34
  try:
@@ -188,7 +188,7 @@ def enable(name=None, verbose=True, deprecation=True, get_map=_get_map_of_algori
188
188
  if verbose and deprecation and sys.stderr is not None:
189
189
  sys.stderr.write(
190
190
  "oneAPI Data Analytics Library solvers for sklearn enabled: "
191
- "https://intelpython.github.io/daal4py/sklearn.html\n"
191
+ "https://uxlfoundation.github.io/scikit-learn-intelex/\n"
192
192
  )
193
193
 
194
194
 
onedal/_device_offload.py CHANGED
@@ -15,6 +15,7 @@
15
15
  # ==============================================================================
16
16
 
17
17
  import inspect
18
+ import logging
18
19
  from collections.abc import Iterable
19
20
  from functools import wraps
20
21
 
@@ -34,6 +35,8 @@ else:
34
35
 
35
36
  SyclQueue = getattr(_dpc_backend, "SyclQueue", None)
36
37
 
38
+ logger = logging.getLogger("sklearnex")
39
+
37
40
 
38
41
  def supports_queue(func):
39
42
  """
@@ -158,12 +161,17 @@ def support_input_format(func):
158
161
  else:
159
162
  self = None
160
163
 
161
- # Check if the function is KNeighborsClassifier.fit
164
+ # KNeighbors*.fit can not be used with raw inputs, ignore `use_raw_input=True`
162
165
  override_raw_input = (
163
166
  self
164
167
  and self.__class__.__name__ in ("KNeighborsClassifier", "KNeighborsRegressor")
165
168
  and func.__name__ == "fit"
166
169
  )
170
+ if override_raw_input:
171
+ pretty_name = f"{self.__class__.__name__}.{func.__name__}"
172
+ logger.warning(
173
+ f"Using raw inputs is not supported for {pretty_name}. Ignoring `use_raw_input=True` setting."
174
+ )
167
175
  if _get_config()["use_raw_input"] is True and not override_raw_input:
168
176
  if "queue" not in kwargs:
169
177
  usm_iface = getattr(args[0], "__sycl_usm_array_interface__", None)
@@ -190,9 +198,9 @@ def support_input_format(func):
190
198
  result = _convert_to_dpnp(result)
191
199
  return result
192
200
 
193
- if not get_config().get("transform_output"):
201
+ if get_config().get("transform_output") in ("default", None):
194
202
  input_array_api = getattr(data[0], "__array_namespace__", lambda: None)()
195
- if input_array_api:
203
+ if input_array_api and not _is_numpy_namespace(input_array_api):
196
204
  input_array_api_device = data[0].device
197
205
  result = _asarray(result, input_array_api, device=input_array_api_device)
198
206
  return result
onedal/ensemble/forest.py CHANGED
@@ -424,7 +424,9 @@ class BaseForest(BaseEnsemble, metaclass=ABCMeta):
424
424
  else:
425
425
  result = self.infer(params, model, X)
426
426
 
427
- return from_table(result.probabilities)
427
+ # TODO: fix probabilities out of [0, 1] interval on oneDAL side
428
+ pred = from_table(result.probabilities)
429
+ return pred.clip(0.0, 1.0)
428
430
 
429
431
 
430
432
  class RandomForestClassifier(ClassifierMixin, BaseForest, metaclass=ABCMeta):
@@ -63,6 +63,8 @@ def get_memory_usm():
63
63
 
64
64
 
65
65
  def is_dpctl_device_available(targets):
66
+ if not isinstance(targets, (list, tuple)):
67
+ raise TypeError("`targets` should be a list or tuple of strings.")
66
68
  if dpctl_available:
67
69
  for device in targets:
68
70
  if device == "cpu" and not dpctl.has_cpu_devices():
@@ -40,9 +40,19 @@ if dpnp_available:
40
40
  return array
41
41
 
42
42
 
43
+ def _supports_buffer_protocol(obj):
44
+ # the array_api standard mandates conversion with the buffer protocol,
45
+ # which can only be checked via a try-catch in native python
46
+ try:
47
+ memoryview(obj)
48
+ except TypeError:
49
+ return False
50
+ return True
51
+
52
+
43
53
  def _asarray(data, xp, *args, **kwargs):
44
54
  """Converted input object to array format of xp namespace provided."""
45
- if hasattr(data, "__array_namespace__"):
55
+ if hasattr(data, "__array_namespace__") or _supports_buffer_protocol(data):
46
56
  return xp.asarray(data, *args, **kwargs)
47
57
  elif isinstance(data, Iterable):
48
58
  if isinstance(data, tuple):
@@ -58,7 +68,12 @@ def _asarray(data, xp, *args, **kwargs):
58
68
 
59
69
  def _is_numpy_namespace(xp):
60
70
  """Return True if xp is backed by NumPy."""
61
- return xp.__name__ in {"numpy", "array_api_compat.numpy", "numpy.array_api"}
71
+ return xp.__name__ in {
72
+ "numpy",
73
+ "array_api_compat.numpy",
74
+ "numpy.array_api",
75
+ "sklearn.externals.array_api_compat.numpy",
76
+ }
62
77
 
63
78
 
64
79
  def _get_sycl_namespace(*arrays):