scikit-learn-intelex 2023.2.1__py38-none-win_amd64.whl → 2024.0.1__py38-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (109) hide show
  1. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/__init__.py +2 -2
  2. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/__main__.py +16 -12
  3. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/_config.py +2 -2
  4. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/_device_offload.py +90 -56
  5. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/_utils.py +95 -0
  6. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/basic_statistics/__init__.py +3 -3
  7. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/basic_statistics/basic_statistics.py +2 -2
  8. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/cluster/__init__.py +4 -4
  9. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/cluster/dbscan.py +187 -0
  10. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/cluster/k_means.py +2 -2
  11. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/cluster/tests/test_dbscan.py +12 -6
  12. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/cluster/tests/test_kmeans.py +5 -5
  13. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/decomposition/__init__.py +3 -3
  14. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/decomposition/pca.py +2 -2
  15. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/decomposition/tests/test_pca.py +5 -4
  16. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/dispatcher.py +102 -72
  17. {scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview → scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex}/ensemble/__init__.py +12 -4
  18. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/ensemble/_forest.py +1947 -0
  19. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/ensemble/tests/test_forest.py +118 -0
  20. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/glob/__main__.py +31 -16
  21. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/glob/dispatcher.py +21 -14
  22. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/__init__.py +10 -10
  23. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/coordinate_descent.py +2 -2
  24. {scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview → scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex}/linear_model/linear.py +173 -83
  25. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/logistic_path.py +3 -3
  26. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/ridge.py +2 -2
  27. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/tests/test_linear.py +23 -7
  28. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/tests/test_logreg.py +4 -3
  29. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/manifold/__init__.py +3 -3
  30. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/manifold/t_sne.py +2 -2
  31. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/manifold/tests/test_tsne.py +4 -3
  32. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/metrics/__init__.py +5 -5
  33. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/metrics/pairwise.py +2 -2
  34. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/metrics/ranking.py +2 -2
  35. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/metrics/tests/test_metrics.py +8 -6
  36. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/model_selection/__init__.py +3 -3
  37. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/model_selection/split.py +2 -2
  38. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/model_selection/tests/test_model_selection.py +6 -3
  39. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/__init__.py +9 -5
  40. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/common.py +100 -77
  41. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/knn_classification.py +331 -0
  42. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/knn_regression.py +307 -0
  43. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/knn_unsupervised.py +116 -58
  44. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/lof.py +118 -56
  45. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/tests/test_neighbors.py +85 -0
  46. {scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/decomposition → scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview}/__init__.py +18 -20
  47. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/preview/cluster/__init__.py +3 -3
  48. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/preview/cluster/_common.py +7 -7
  49. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/preview/cluster/k_means.py +104 -73
  50. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/linear_model/linear.py → scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/decomposition/__init__.py +4 -1
  51. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/preview/decomposition/pca.py +128 -100
  52. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/linear_model/tests/test_preview_linear.py → scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/decomposition/tests/test_preview_pca.py +18 -16
  53. {scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/linear_model → scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd}/__init__.py +24 -22
  54. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/basic_statistics/__init__.py +3 -3
  55. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/basic_statistics/basic_statistics.py +2 -2
  56. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/cluster/__init__.py +11 -5
  57. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/cluster/dbscan.py +50 -0
  58. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/cluster/kmeans.py +2 -2
  59. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/decomposition/__init__.py +3 -3
  60. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/decomposition/pca.py +2 -2
  61. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/ensemble/__init__.py +3 -3
  62. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/ensemble/forest.py +16 -14
  63. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/__init__.py +3 -3
  64. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/linear_model.py +2 -2
  65. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/neighbors/__init__.py +3 -3
  66. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/neighbors/neighbors.py +3 -3
  67. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/__init__.py +11 -8
  68. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/_common.py +56 -56
  69. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/nusvc.py +110 -55
  70. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/nusvr.py +65 -31
  71. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/svc.py +136 -78
  72. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/svr.py +65 -31
  73. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/svm/tests/test_svm.py +102 -0
  74. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/_models_info.py +170 -0
  75. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_config.py +9 -8
  76. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_memory_usage.py +63 -69
  77. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_monkeypatch.py +55 -53
  78. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/test_parallel.py +50 -0
  79. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_patching.py +8 -7
  80. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/test_run_to_run_stability_tests.py +428 -0
  81. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/utils/_launch_algorithms.py +39 -39
  82. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/utils/__init__.py +3 -3
  83. scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/utils/parallel.py +59 -0
  84. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/utils/validation.py +2 -2
  85. {scikit_learn_intelex-2023.2.1.dist-info → scikit_learn_intelex-2024.0.1.dist-info}/METADATA +34 -35
  86. scikit_learn_intelex-2024.0.1.dist-info/RECORD +90 -0
  87. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/_utils.py +0 -82
  88. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/cluster/dbscan.py +0 -18
  89. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/ensemble/__init__.py +0 -20
  90. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/ensemble/forest.py +0 -18
  91. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/ensemble/tests/test_forest.py +0 -46
  92. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/neighbors/knn_classification.py +0 -228
  93. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/neighbors/knn_regression.py +0 -213
  94. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/neighbors/tests/test_neighbors.py +0 -57
  95. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/__init__.py +0 -18
  96. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/decomposition/tests/test_preview_pca.py +0 -28
  97. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/ensemble/extra_trees.py +0 -1261
  98. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/ensemble/forest.py +0 -1155
  99. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/ensemble/tests/test_preview_ensemble.py +0 -67
  100. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/linear_model/_common.py +0 -66
  101. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/spmd/__init__.py +0 -23
  102. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/svm/tests/test_svm.py +0 -63
  103. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/tests/_models_info.py +0 -159
  104. scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/tests/test_run_to_run_stability_tests.py +0 -383
  105. scikit_learn_intelex-2023.2.1.dist-info/RECORD +0 -95
  106. {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/doc/third-party-programs.txt +0 -0
  107. {scikit_learn_intelex-2023.2.1.dist-info → scikit_learn_intelex-2024.0.1.dist-info}/LICENSE.txt +0 -0
  108. {scikit_learn_intelex-2023.2.1.dist-info → scikit_learn_intelex-2024.0.1.dist-info}/WHEEL +0 -0
  109. {scikit_learn_intelex-2023.2.1.dist-info → scikit_learn_intelex-2024.0.1.dist-info}/top_level.txt +0 -0
@@ -1,1155 +0,0 @@
1
- #!/usr/bin/env python
2
- # ===============================================================================
3
- # Copyright 2021 Intel Corporation
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
- # ===============================================================================
17
-
18
- from daal4py.sklearn._utils import (
19
- daal_check_version, sklearn_check_version,
20
- make2d, check_tree_nodes
21
- )
22
-
23
- import numpy as np
24
-
25
- import numbers
26
-
27
- import warnings
28
-
29
- from abc import ABC
30
-
31
- from sklearn.exceptions import DataConversionWarning
32
-
33
- from ..._config import get_config
34
- from ..._device_offload import dispatch, wrap_output_data
35
-
36
- from sklearn.ensemble import RandomForestClassifier as sklearn_RandomForestClassifier
37
- from sklearn.ensemble import RandomForestRegressor as sklearn_RandomForestRegressor
38
-
39
- from sklearn.utils.validation import (
40
- check_is_fitted,
41
- check_consistent_length,
42
- check_array,
43
- check_X_y)
44
-
45
- from onedal.datatypes import _num_features, _num_samples
46
-
47
- from sklearn.utils import check_random_state, deprecated
48
-
49
- from sklearn.base import clone
50
-
51
- from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
52
- from sklearn.tree._tree import Tree
53
-
54
- from onedal.ensemble import RandomForestClassifier as onedal_RandomForestClassifier
55
- from onedal.ensemble import RandomForestRegressor as onedal_RandomForestRegressor
56
- from onedal.primitives import get_tree_state_cls, get_tree_state_reg
57
-
58
- from scipy import sparse as sp
59
-
60
- if sklearn_check_version('1.2'):
61
- from sklearn.utils._param_validation import Interval, StrOptions
62
-
63
-
64
- class BaseRandomForest(ABC):
65
- def _fit_proba(self, X, y, sample_weight=None, queue=None):
66
- params = self.get_params()
67
- self.__class__(**params)
68
-
69
- # We use stock metaestimators below, so the only way
70
- # to pass a queue is using config_context.
71
- cfg = get_config()
72
- cfg['target_offload'] = queue
73
-
74
- def _save_attributes(self):
75
- self._onedal_model = self._onedal_estimator._onedal_model
76
- # TODO:
77
- # update for regression
78
- if self.oob_score:
79
- self.oob_score_ = self._onedal_estimator.oob_score_
80
- self.oob_prediction_ = self._onedal_estimator.oob_prediction_
81
- return self
82
-
83
- def _onedal_classifier(self, **onedal_params):
84
- return onedal_RandomForestClassifier(**onedal_params)
85
-
86
- def _onedal_regressor(self, **onedal_params):
87
- return onedal_RandomForestRegressor(**onedal_params)
88
-
89
- # TODO:
90
- # move to onedal modul.
91
- def _check_parameters(self):
92
- if not self.bootstrap and self.max_samples is not None:
93
- raise ValueError(
94
- "`max_sample` cannot be set if `bootstrap=False`. "
95
- "Either switch to `bootstrap=True` or set "
96
- "`max_sample=None`."
97
- )
98
- if isinstance(self.min_samples_leaf, numbers.Integral):
99
- if not 1 <= self.min_samples_leaf:
100
- raise ValueError("min_samples_leaf must be at least 1 "
101
- "or in (0, 0.5], got %s"
102
- % self.min_samples_leaf)
103
- else: # float
104
- if not 0. < self.min_samples_leaf <= 0.5:
105
- raise ValueError("min_samples_leaf must be at least 1 "
106
- "or in (0, 0.5], got %s"
107
- % self.min_samples_leaf)
108
- if isinstance(self.min_samples_split, numbers.Integral):
109
- if not 2 <= self.min_samples_split:
110
- raise ValueError("min_samples_split must be an integer "
111
- "greater than 1 or a float in (0.0, 1.0]; "
112
- "got the integer %s"
113
- % self.min_samples_split)
114
- else: # float
115
- if not 0. < self.min_samples_split <= 1.:
116
- raise ValueError("min_samples_split must be an integer "
117
- "greater than 1 or a float in (0.0, 1.0]; "
118
- "got the float %s"
119
- % self.min_samples_split)
120
- if not 0 <= self.min_weight_fraction_leaf <= 0.5:
121
- raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
122
- if self.min_impurity_split is not None:
123
- warnings.warn("The min_impurity_split parameter is deprecated. "
124
- "Its default value has changed from 1e-7 to 0 in "
125
- "version 0.23, and it will be removed in 0.25. "
126
- "Use the min_impurity_decrease parameter instead.",
127
- FutureWarning)
128
-
129
- if self.min_impurity_split < 0.:
130
- raise ValueError("min_impurity_split must be greater than "
131
- "or equal to 0")
132
- if self.min_impurity_decrease < 0.:
133
- raise ValueError("min_impurity_decrease must be greater than "
134
- "or equal to 0")
135
- if self.max_leaf_nodes is not None:
136
- if not isinstance(self.max_leaf_nodes, numbers.Integral):
137
- raise ValueError(
138
- "max_leaf_nodes must be integral number but was "
139
- "%r" %
140
- self.max_leaf_nodes)
141
- if self.max_leaf_nodes < 2:
142
- raise ValueError(
143
- ("max_leaf_nodes {0} must be either None "
144
- "or larger than 1").format(
145
- self.max_leaf_nodes))
146
- if isinstance(self.max_bins, numbers.Integral):
147
- if not 2 <= self.max_bins:
148
- raise ValueError("max_bins must be at least 2, got %s"
149
- % self.max_bins)
150
- else:
151
- raise ValueError("max_bins must be integral number but was "
152
- "%r" % self.max_bins)
153
- if isinstance(self.min_bin_size, numbers.Integral):
154
- if not 1 <= self.min_bin_size:
155
- raise ValueError("min_bin_size must be at least 1, got %s"
156
- % self.min_bin_size)
157
- else:
158
- raise ValueError("min_bin_size must be integral number but was "
159
- "%r" % self.min_bin_size)
160
-
161
- def check_sample_weight(self, sample_weight, X, dtype=None):
162
- n_samples = _num_samples(X)
163
-
164
- if dtype is not None and dtype not in [np.float32, np.float64]:
165
- dtype = np.float64
166
-
167
- if sample_weight is None:
168
- sample_weight = np.ones(n_samples, dtype=dtype)
169
- elif isinstance(sample_weight, numbers.Number):
170
- sample_weight = np.full(n_samples, sample_weight, dtype=dtype)
171
- else:
172
- if dtype is None:
173
- dtype = [np.float64, np.float32]
174
- sample_weight = check_array(
175
- sample_weight,
176
- accept_sparse=False,
177
- ensure_2d=False,
178
- dtype=dtype,
179
- order="C")
180
- if sample_weight.ndim != 1:
181
- raise ValueError("Sample weights must be 1D array or scalar")
182
-
183
- if sample_weight.shape != (n_samples,):
184
- raise ValueError("sample_weight.shape == {}, expected {}!"
185
- .format(sample_weight.shape, (n_samples,)))
186
- return sample_weight
187
-
188
-
189
- class RandomForestClassifier(sklearn_RandomForestClassifier, BaseRandomForest):
190
- __doc__ = sklearn_RandomForestClassifier.__doc__
191
-
192
- if sklearn_check_version('1.2'):
193
- _parameter_constraints: dict = {
194
- **sklearn_RandomForestClassifier._parameter_constraints,
195
- "max_bins": [Interval(numbers.Integral, 2, None, closed="left")],
196
- "min_bin_size": [Interval(numbers.Integral, 1, None, closed="left")],
197
- "splitter_mode": [StrOptions({"best", "random"})]
198
- }
199
-
200
- if sklearn_check_version('1.0'):
201
- def __init__(
202
- self,
203
- n_estimators=100,
204
- criterion="gini",
205
- max_depth=None,
206
- min_samples_split=2,
207
- min_samples_leaf=1,
208
- min_weight_fraction_leaf=0.,
209
- max_features='sqrt' if sklearn_check_version('1.1') else 'auto',
210
- max_leaf_nodes=None,
211
- min_impurity_decrease=0.,
212
- bootstrap=True,
213
- oob_score=False,
214
- n_jobs=None,
215
- random_state=None,
216
- verbose=0,
217
- warm_start=False,
218
- class_weight=None,
219
- ccp_alpha=0.0,
220
- max_samples=None,
221
- max_bins=256,
222
- min_bin_size=1,
223
- splitter_mode='best'):
224
- super(RandomForestClassifier, self).__init__(
225
- n_estimators=n_estimators,
226
- criterion=criterion,
227
- max_depth=max_depth,
228
- min_samples_split=min_samples_split,
229
- min_samples_leaf=min_samples_leaf,
230
- min_weight_fraction_leaf=min_weight_fraction_leaf,
231
- max_features=max_features,
232
- max_leaf_nodes=max_leaf_nodes,
233
- min_impurity_decrease=min_impurity_decrease,
234
- bootstrap=bootstrap,
235
- oob_score=oob_score,
236
- n_jobs=n_jobs,
237
- random_state=random_state,
238
- verbose=verbose,
239
- warm_start=warm_start,
240
- class_weight=class_weight
241
- )
242
- self.warm_start = warm_start
243
- self.ccp_alpha = ccp_alpha
244
- self.max_samples = max_samples
245
- self.max_bins = max_bins
246
- self.min_bin_size = min_bin_size
247
- self.min_impurity_split = None
248
- self.splitter_mode = splitter_mode
249
- # self._estimator = DecisionTreeClassifier()
250
- else:
251
- def __init__(self,
252
- n_estimators=100,
253
- criterion="gini",
254
- max_depth=None,
255
- min_samples_split=2,
256
- min_samples_leaf=1,
257
- min_weight_fraction_leaf=0.,
258
- max_features="auto",
259
- max_leaf_nodes=None,
260
- min_impurity_decrease=0.,
261
- min_impurity_split=None,
262
- bootstrap=True,
263
- oob_score=False,
264
- n_jobs=None,
265
- random_state=None,
266
- verbose=0,
267
- warm_start=False,
268
- class_weight=None,
269
- ccp_alpha=0.0,
270
- max_samples=None,
271
- max_bins=256,
272
- min_bin_size=1,
273
- splitter_mode='best'):
274
- super(RandomForestClassifier, self).__init__(
275
- n_estimators=n_estimators,
276
- criterion=criterion,
277
- max_depth=max_depth,
278
- min_samples_split=min_samples_split,
279
- min_samples_leaf=min_samples_leaf,
280
- min_weight_fraction_leaf=min_weight_fraction_leaf,
281
- max_features=max_features,
282
- max_leaf_nodes=max_leaf_nodes,
283
- min_impurity_decrease=min_impurity_decrease,
284
- min_impurity_split=min_impurity_split,
285
- bootstrap=bootstrap,
286
- oob_score=oob_score,
287
- n_jobs=n_jobs,
288
- random_state=random_state,
289
- verbose=verbose,
290
- warm_start=warm_start,
291
- class_weight=class_weight,
292
- ccp_alpha=ccp_alpha,
293
- max_samples=max_samples
294
- )
295
- self.warm_start = warm_start
296
- self.ccp_alpha = ccp_alpha
297
- self.max_samples = max_samples
298
- self.max_bins = max_bins
299
- self.min_bin_size = min_bin_size
300
- self.min_impurity_split = None
301
- self.splitter_mode = splitter_mode
302
- # self._estimator = DecisionTreeClassifier()
303
-
304
- def fit(self, X, y, sample_weight=None):
305
- """
306
- Build a forest of trees from the training set (X, y).
307
-
308
- Parameters
309
- ----------
310
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
311
- The training input samples. Internally, its dtype will be converted
312
- to ``dtype=np.float32``. If a sparse matrix is provided, it will be
313
- converted into a sparse ``csc_matrix``.
314
-
315
- y : array-like of shape (n_samples,) or (n_samples, n_outputs)
316
- The target values (class labels in classification, real numbers in
317
- regression).
318
-
319
- sample_weight : array-like of shape (n_samples,), default=None
320
- Sample weights. If None, then samples are equally weighted. Splits
321
- that would create child nodes with net zero or negative weight are
322
- ignored while searching for a split in each node. In the case of
323
- classification, splits are also ignored if they would result in any
324
- single class carrying a negative weight in either child node.
325
-
326
- Returns
327
- -------
328
- self : object
329
- """
330
- dispatch(self, 'fit', {
331
- 'onedal': self.__class__._onedal_fit,
332
- 'sklearn': sklearn_RandomForestClassifier.fit,
333
- }, X, y, sample_weight)
334
- return self
335
-
336
- def _onedal_ready(self, X, y, sample_weight):
337
- if sp.issparse(y):
338
- raise ValueError(
339
- "sparse multilabel-indicator for y is not supported."
340
- )
341
- if not self.bootstrap and self.max_samples is not None:
342
- raise ValueError(
343
- "`max_sample` cannot be set if `bootstrap=False`. "
344
- "Either switch to `bootstrap=True` or set "
345
- "`max_sample=None`."
346
- )
347
- if not self.bootstrap and self.oob_score:
348
- raise ValueError("Out of bag estimation only available"
349
- " if bootstrap=True")
350
- if sklearn_check_version("1.2"):
351
- self._validate_params()
352
- else:
353
- self._check_parameters()
354
-
355
- correct_sparsity = not sp.issparse(X)
356
- correct_ccp_alpha = self.ccp_alpha == 0.0
357
- correct_criterion = self.criterion == "gini"
358
- correct_warm_start = self.warm_start is False
359
-
360
- if daal_check_version((2021, 'P', 500)):
361
- correct_oob_score = not self.oob_score
362
- else:
363
- correct_oob_score = self.oob_score
364
-
365
- ready = all([correct_oob_score,
366
- correct_sparsity,
367
- correct_ccp_alpha,
368
- correct_criterion,
369
- correct_warm_start])
370
- if ready:
371
- if sklearn_check_version("1.0"):
372
- self._check_feature_names(X, reset=True)
373
- X = check_array(X, dtype=[np.float32, np.float64])
374
- y = np.asarray(y)
375
- y = np.atleast_1d(y)
376
- if y.ndim == 2 and y.shape[1] == 1:
377
- warnings.warn(
378
- "A column-vector y was passed when a 1d array was"
379
- " expected. Please change the shape of y to "
380
- "(n_samples,), for example using ravel().",
381
- DataConversionWarning,
382
- stacklevel=2)
383
- check_consistent_length(X, y)
384
-
385
- y = make2d(y)
386
- self.n_outputs_ = y.shape[1]
387
- ready = ready and self.n_outputs_ == 1
388
- # TODO: Fix to support integers as input
389
- ready = ready and (y.dtype in [np.float32, np.float64, np.int32, np.int64])
390
-
391
- return ready, X, y, sample_weight
392
-
393
- @wrap_output_data
394
- def predict(self, X):
395
- """
396
- Predict class for X.
397
-
398
- The predicted class of an input sample is a vote by the trees in
399
- the forest, weighted by their probability estimates. That is,
400
- the predicted class is the one with highest mean probability
401
- estimate across the trees.
402
-
403
- Parameters
404
- ----------
405
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
406
- The input samples. Internally, its dtype will be converted to
407
- ``dtype=np.float32``. If a sparse matrix is provided, it will be
408
- converted into a sparse ``csr_matrix``.
409
-
410
- Returns
411
- -------
412
- y : ndarray of shape (n_samples,) or (n_samples, n_outputs)
413
- The predicted classes.
414
- """
415
- return dispatch(self, 'predict', {
416
- 'onedal': self.__class__._onedal_predict,
417
- 'sklearn': sklearn_RandomForestClassifier.predict,
418
- }, X)
419
-
420
- @wrap_output_data
421
- def predict_proba(self, X):
422
- """
423
- Predict class probabilities for X.
424
-
425
- The predicted class probabilities of an input sample are computed as
426
- the mean predicted class probabilities of the trees in the forest.
427
- The class probability of a single tree is the fraction of samples of
428
- the same class in a leaf.
429
-
430
- Parameters
431
- ----------
432
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
433
- The input samples. Internally, its dtype will be converted to
434
- ``dtype=np.float32``. If a sparse matrix is provided, it will be
435
- converted into a sparse ``csr_matrix``.
436
-
437
- Returns
438
- -------
439
- p : ndarray of shape (n_samples, n_classes), or a list of n_outputs
440
- such arrays if n_outputs > 1.
441
- The class probabilities of the input samples. The order of the
442
- classes corresponds to that in the attribute :term:`classes_`.
443
- """
444
- # TODO:
445
- # _check_proba()
446
- # self._check_proba()
447
- if sklearn_check_version("1.0"):
448
- self._check_feature_names(X, reset=False)
449
- if hasattr(self, 'n_features_in_'):
450
- try:
451
- num_features = _num_features(X)
452
- except TypeError:
453
- num_features = _num_samples(X)
454
- if num_features != self.n_features_in_:
455
- raise ValueError(
456
- (f'X has {num_features} features, '
457
- f'but RandomForestClassifier is expecting '
458
- f'{self.n_features_in_} features as input'))
459
- return dispatch(self, 'predict_proba', {
460
- 'onedal': self.__class__._onedal_predict_proba,
461
- 'sklearn': sklearn_RandomForestClassifier.predict_proba,
462
- }, X)
463
-
464
- if sklearn_check_version('1.0'):
465
- @deprecated(
466
- "Attribute `n_features_` was deprecated in version 1.0 and will be "
467
- "removed in 1.2. Use `n_features_in_` instead.")
468
- @property
469
- def n_features_(self):
470
- return self.n_features_in_
471
-
472
- @property
473
- def _estimators_(self):
474
- if hasattr(self, '_cached_estimators_'):
475
- if self._cached_estimators_:
476
- return self._cached_estimators_
477
- if sklearn_check_version('0.22'):
478
- check_is_fitted(self)
479
- else:
480
- check_is_fitted(self, '_onedal_model')
481
- classes_ = self.classes_[0]
482
- n_classes_ = self.n_classes_[0]
483
- # convert model to estimators
484
- params = {
485
- 'criterion': self.criterion,
486
- 'max_depth': self.max_depth,
487
- 'min_samples_split': self.min_samples_split,
488
- 'min_samples_leaf': self.min_samples_leaf,
489
- 'min_weight_fraction_leaf': self.min_weight_fraction_leaf,
490
- 'max_features': self.max_features,
491
- 'max_leaf_nodes': self.max_leaf_nodes,
492
- 'min_impurity_decrease': self.min_impurity_decrease,
493
- 'random_state': None,
494
- }
495
- if not sklearn_check_version('1.0'):
496
- params['min_impurity_split'] = self.min_impurity_split
497
- est = DecisionTreeClassifier(**params)
498
- # we need to set est.tree_ field with Trees constructed from Intel(R)
499
- # oneAPI Data Analytics Library solution
500
- estimators_ = []
501
- random_state_checked = check_random_state(self.random_state)
502
- for i in range(self.n_estimators):
503
- est_i = clone(est)
504
- est_i.set_params(
505
- random_state=random_state_checked.randint(
506
- np.iinfo(
507
- np.int32).max))
508
- if sklearn_check_version('1.0'):
509
- est_i.n_features_in_ = self.n_features_in_
510
- else:
511
- est_i.n_features_ = self.n_features_in_
512
- est_i.n_outputs_ = self.n_outputs_
513
- est_i.classes_ = classes_
514
- est_i.n_classes_ = n_classes_
515
- tree_i_state_class = get_tree_state_cls(
516
- self._onedal_model, i, n_classes_)
517
- tree_i_state_dict = {
518
- 'max_depth': tree_i_state_class.max_depth,
519
- 'node_count': tree_i_state_class.node_count,
520
- 'nodes': check_tree_nodes(tree_i_state_class.node_ar),
521
- 'values': tree_i_state_class.value_ar}
522
- est_i.tree_ = Tree(
523
- self.n_features_in_,
524
- np.array(
525
- [n_classes_],
526
- dtype=np.intp),
527
- self.n_outputs_)
528
- est_i.tree_.__setstate__(tree_i_state_dict)
529
- estimators_.append(est_i)
530
-
531
- self._cached_estimators_ = estimators_
532
- return estimators_
533
-
534
- def _onedal_cpu_supported(self, method_name, *data):
535
- if method_name == 'fit':
536
- ready, X, y, sample_weight = self._onedal_ready(*data)
537
- if self.splitter_mode == 'random':
538
- warnings.warn("'random' splitter mode supports GPU devices only "
539
- "and requires oneDAL version >= 2023.1.1. "
540
- "Using 'best' mode instead.", RuntimeWarning)
541
- self.splitter_mode = 'best'
542
- if not ready:
543
- return False
544
- elif sp.issparse(X):
545
- return False
546
- elif sp.issparse(y):
547
- return False
548
- elif sp.issparse(sample_weight):
549
- return False
550
- elif not self.ccp_alpha == 0.0:
551
- return False
552
- elif self.warm_start:
553
- return False
554
- elif self.oob_score and not daal_check_version((2023, 'P', 101)):
555
- return False
556
- elif not self.n_outputs_ == 1:
557
- return False
558
- elif hasattr(self, 'estimators_'):
559
- return False
560
- else:
561
- return True
562
- if method_name in ['predict', 'predict_proba']:
563
- X = data[0]
564
- if not hasattr(self, '_onedal_model'):
565
- return False
566
- elif sp.issparse(X):
567
- return False
568
- elif not (hasattr(self, 'n_outputs_') and self.n_outputs_ == 1):
569
- return False
570
- elif not daal_check_version((2021, 'P', 400)):
571
- return False
572
- elif self.warm_start:
573
- return False
574
- else:
575
- return True
576
- raise RuntimeError(
577
- f'Unknown method {method_name} in {self.__class__.__name__}')
578
-
579
- def _onedal_gpu_supported(self, method_name, *data):
580
- if method_name == 'fit':
581
- ready, X, y, sample_weight = self._onedal_ready(*data)
582
- if self.splitter_mode == 'random' and \
583
- not daal_check_version((2023, 'P', 101)):
584
- warnings.warn("'random' splitter mode requires OneDAL >= 2023.1.1. "
585
- "Using 'best' mode instead.", RuntimeWarning)
586
- self.splitter_mode = 'best'
587
- if not ready:
588
- return False
589
- elif sp.issparse(X):
590
- return False
591
- elif sp.issparse(y):
592
- return False
593
- elif sp.issparse(sample_weight):
594
- return False
595
- elif sample_weight is not None: # `sample_weight` is not supported.
596
- return False
597
- elif not self.ccp_alpha == 0.0:
598
- return False
599
- elif self.warm_start:
600
- return False
601
- elif self.oob_score:
602
- return False
603
- elif not self.n_outputs_ == 1:
604
- return False
605
- elif hasattr(self, 'estimators_'):
606
- return False
607
- else:
608
- return True
609
- if method_name in ['predict', 'predict_proba']:
610
- X = data[0]
611
- if not hasattr(self, '_onedal_model'):
612
- return False
613
- elif sp.issparse(X):
614
- return False
615
- elif not (hasattr(self, 'n_outputs_') and self.n_outputs_ == 1):
616
- return False
617
- elif not daal_check_version((2021, 'P', 400)):
618
- return False
619
- elif self.warm_start:
620
- return False
621
- else:
622
- return True
623
- raise RuntimeError(
624
- f'Unknown method {method_name} in {self.__class__.__name__}')
625
-
626
- def _onedal_fit(self, X, y, sample_weight=None, queue=None):
627
- if sklearn_check_version('1.2'):
628
- X, y = self._validate_data(
629
- X, y, multi_output=False, accept_sparse=False,
630
- dtype=[np.float64, np.float32]
631
- )
632
- else:
633
- X, y = check_X_y(
634
- X, y, accept_sparse=False, dtype=[np.float64, np.float32],
635
- multi_output=False
636
- )
637
-
638
- if sample_weight is not None:
639
- sample_weight = self.check_sample_weight(sample_weight, X)
640
-
641
- y = np.atleast_1d(y)
642
- if y.ndim == 2 and y.shape[1] == 1:
643
- warnings.warn(
644
- "A column-vector y was passed when a 1d array was"
645
- " expected. Please change the shape of y to "
646
- "(n_samples,), for example using ravel().",
647
- DataConversionWarning,
648
- stacklevel=2,
649
- )
650
- if y.ndim == 1:
651
- # reshape is necessary to preserve the data contiguity against vs
652
- # [:, np.newaxis] that does not.
653
- y = np.reshape(y, (-1, 1))
654
-
655
- y, expanded_class_weight = self._validate_y_class_weight(y)
656
-
657
- n_classes_ = self.n_classes_[0]
658
- self.n_features_in_ = X.shape[1]
659
- if not sklearn_check_version('1.0'):
660
- self.n_features_ = self.n_features_in_
661
-
662
- if expanded_class_weight is not None:
663
- if sample_weight is not None:
664
- sample_weight = sample_weight * expanded_class_weight
665
- else:
666
- sample_weight = expanded_class_weight
667
- if sample_weight is not None:
668
- sample_weight = [sample_weight]
669
-
670
- if n_classes_ < 2:
671
- raise ValueError(
672
- "Training data only contain information about one class.")
673
-
674
- if self.oob_score:
675
- err = 'out_of_bag_error_accuracy|out_of_bag_error_decision_function'
676
- else:
677
- err = 'none'
678
-
679
- onedal_params = {
680
- 'n_estimators': self.n_estimators,
681
- 'criterion': self.criterion,
682
- 'max_depth': self.max_depth,
683
- 'min_samples_split': self.min_samples_split,
684
- 'min_samples_leaf': self.min_samples_leaf,
685
- 'min_weight_fraction_leaf': self.min_weight_fraction_leaf,
686
- 'max_features': self.max_features,
687
- 'max_leaf_nodes': self.max_leaf_nodes,
688
- 'min_impurity_decrease': self.min_impurity_decrease,
689
- 'min_impurity_split': self.min_impurity_split,
690
- 'bootstrap': self.bootstrap,
691
- 'oob_score': self.oob_score,
692
- 'n_jobs': self.n_jobs,
693
- 'random_state': self.random_state,
694
- 'verbose': self.verbose,
695
- 'warm_start': self.warm_start,
696
- 'error_metric_mode': err,
697
- 'variable_importance_mode': 'mdi',
698
- 'class_weight': self.class_weight,
699
- 'max_bins': self.max_bins,
700
- 'min_bin_size': self.min_bin_size,
701
- 'max_samples': self.max_samples
702
- }
703
- if daal_check_version((2023, 'P', 101)):
704
- onedal_params['splitter_mode'] = self.splitter_mode
705
- self._cached_estimators_ = None
706
-
707
- # Compute
708
- self._onedal_estimator = self._onedal_classifier(**onedal_params)
709
- self._onedal_estimator.fit(X, y, sample_weight, queue=queue)
710
-
711
- self._save_attributes()
712
- if sklearn_check_version("1.2"):
713
- self._estimator = DecisionTreeClassifier()
714
- self.estimators_ = self._estimators_
715
- # Decapsulate classes_ attributes
716
- self.n_classes_ = self.n_classes_[0]
717
- self.classes_ = self.classes_[0]
718
- return self
719
-
720
- def _onedal_predict(self, X, queue=None):
721
- X = check_array(X, dtype=[np.float32, np.float64])
722
- check_is_fitted(self)
723
- if sklearn_check_version("1.0"):
724
- self._check_feature_names(X, reset=False)
725
-
726
- res = self._onedal_estimator.predict(X, queue=queue)
727
- return np.take(self.classes_,
728
- res.ravel().astype(np.int64, casting='unsafe'))
729
-
730
- def _onedal_predict_proba(self, X, queue=None):
731
- X = check_array(X, dtype=[np.float64, np.float32])
732
- check_is_fitted(self)
733
- if sklearn_check_version('0.23'):
734
- self._check_n_features(X, reset=False)
735
- if sklearn_check_version("1.0"):
736
- self._check_feature_names(X, reset=False)
737
- return self._onedal_estimator.predict_proba(X, queue=queue)
738
-
739
-
740
- class RandomForestRegressor(sklearn_RandomForestRegressor, BaseRandomForest):
741
- __doc__ = sklearn_RandomForestRegressor.__doc__
742
-
743
- if sklearn_check_version('1.2'):
744
- _parameter_constraints: dict = {
745
- **sklearn_RandomForestRegressor._parameter_constraints,
746
- "max_bins": [Interval(numbers.Integral, 2, None, closed="left")],
747
- "min_bin_size": [Interval(numbers.Integral, 1, None, closed="left")],
748
- "splitter_mode": [StrOptions({"best", "random"})]
749
- }
750
-
751
- if sklearn_check_version('1.0'):
752
- def __init__(
753
- self,
754
- n_estimators=100,
755
- *,
756
- criterion="squared_error",
757
- max_depth=None,
758
- min_samples_split=2,
759
- min_samples_leaf=1,
760
- min_weight_fraction_leaf=0.,
761
- max_features=1.0 if sklearn_check_version('1.1') else 'auto',
762
- max_leaf_nodes=None,
763
- min_impurity_decrease=0.,
764
- bootstrap=True,
765
- oob_score=False,
766
- n_jobs=None,
767
- random_state=None,
768
- verbose=0,
769
- warm_start=False,
770
- ccp_alpha=0.0,
771
- max_samples=None,
772
- max_bins=256,
773
- min_bin_size=1,
774
- splitter_mode='best'):
775
- super(RandomForestRegressor, self).__init__(
776
- n_estimators=n_estimators,
777
- criterion=criterion,
778
- max_depth=max_depth,
779
- min_samples_split=min_samples_split,
780
- min_samples_leaf=min_samples_leaf,
781
- min_weight_fraction_leaf=min_weight_fraction_leaf,
782
- max_features=max_features,
783
- max_leaf_nodes=max_leaf_nodes,
784
- min_impurity_decrease=min_impurity_decrease,
785
- bootstrap=bootstrap,
786
- oob_score=oob_score,
787
- n_jobs=n_jobs,
788
- random_state=random_state,
789
- verbose=verbose,
790
- warm_start=warm_start
791
- )
792
- self.warm_start = warm_start
793
- self.ccp_alpha = ccp_alpha
794
- self.max_samples = max_samples
795
- self.max_bins = max_bins
796
- self.min_bin_size = min_bin_size
797
- self.min_impurity_split = None
798
- self.splitter_mode = splitter_mode
799
- else:
800
- def __init__(self,
801
- n_estimators=100, *,
802
- criterion="mse",
803
- max_depth=None,
804
- min_samples_split=2,
805
- min_samples_leaf=1,
806
- min_weight_fraction_leaf=0.,
807
- max_features="auto",
808
- max_leaf_nodes=None,
809
- min_impurity_decrease=0.,
810
- min_impurity_split=None,
811
- bootstrap=True,
812
- oob_score=False,
813
- n_jobs=None,
814
- random_state=None,
815
- verbose=0,
816
- warm_start=False,
817
- ccp_alpha=0.0,
818
- max_samples=None,
819
- max_bins=256,
820
- min_bin_size=1,
821
- splitter_mode='best'):
822
- super(RandomForestRegressor, self).__init__(
823
- n_estimators=n_estimators,
824
- criterion=criterion,
825
- max_depth=max_depth,
826
- min_samples_split=min_samples_split,
827
- min_samples_leaf=min_samples_leaf,
828
- min_weight_fraction_leaf=min_weight_fraction_leaf,
829
- max_features=max_features,
830
- max_leaf_nodes=max_leaf_nodes,
831
- min_impurity_decrease=min_impurity_decrease,
832
- min_impurity_split=min_impurity_split,
833
- bootstrap=bootstrap,
834
- oob_score=oob_score,
835
- n_jobs=n_jobs,
836
- random_state=random_state,
837
- verbose=verbose,
838
- warm_start=warm_start,
839
- ccp_alpha=ccp_alpha,
840
- max_samples=max_samples
841
- )
842
- self.warm_start = warm_start
843
- self.ccp_alpha = ccp_alpha
844
- self.max_samples = max_samples
845
- self.max_bins = max_bins
846
- self.min_bin_size = min_bin_size
847
- self.min_impurity_split = None
848
- self.splitter_mode = splitter_mode
849
-
850
- @property
851
- def _estimators_(self):
852
- if hasattr(self, '_cached_estimators_'):
853
- if self._cached_estimators_:
854
- return self._cached_estimators_
855
- if sklearn_check_version('0.22'):
856
- check_is_fitted(self)
857
- else:
858
- check_is_fitted(self, '_onedal_model')
859
- # convert model to estimators
860
- params = {
861
- 'criterion': self.criterion,
862
- 'max_depth': self.max_depth,
863
- 'min_samples_split': self.min_samples_split,
864
- 'min_samples_leaf': self.min_samples_leaf,
865
- 'min_weight_fraction_leaf': self.min_weight_fraction_leaf,
866
- 'max_features': self.max_features,
867
- 'max_leaf_nodes': self.max_leaf_nodes,
868
- 'min_impurity_decrease': self.min_impurity_decrease,
869
- 'random_state': None,
870
- }
871
- if not sklearn_check_version('1.0'):
872
- params['min_impurity_split'] = self.min_impurity_split
873
- est = DecisionTreeRegressor(**params)
874
- # we need to set est.tree_ field with Trees constructed from Intel(R)
875
- # oneAPI Data Analytics Library solution
876
- estimators_ = []
877
- random_state_checked = check_random_state(self.random_state)
878
- for i in range(self.n_estimators):
879
- est_i = clone(est)
880
- est_i.set_params(
881
- random_state=random_state_checked.randint(
882
- np.iinfo(
883
- np.int32).max))
884
- if sklearn_check_version('1.0'):
885
- est_i.n_features_in_ = self.n_features_in_
886
- else:
887
- est_i.n_features_ = self.n_features_in_
888
- est_i.n_classes_ = 1
889
- est_i.n_outputs_ = self.n_outputs_
890
- tree_i_state_class = get_tree_state_reg(
891
- self._onedal_model, i)
892
- tree_i_state_dict = {
893
- 'max_depth': tree_i_state_class.max_depth,
894
- 'node_count': tree_i_state_class.node_count,
895
- 'nodes': check_tree_nodes(tree_i_state_class.node_ar),
896
- 'values': tree_i_state_class.value_ar}
897
-
898
- est_i.tree_ = Tree(
899
- self.n_features_in_, np.array(
900
- [1], dtype=np.intp), self.n_outputs_)
901
- est_i.tree_.__setstate__(tree_i_state_dict)
902
- estimators_.append(est_i)
903
-
904
- return estimators_
905
-
906
- def _onedal_ready(self, X, y, sample_weight):
907
- # TODO:
908
- # move some common checks for both devices here.
909
-
910
- # We have to get `n_outputs_` before dispatching
911
- # oneDAL requirements: Number of outputs `n_outputs_` should be 1.
912
- y = np.asarray(y)
913
-
914
- if y.ndim == 1:
915
- # reshape is necessary to preserve the data contiguity against vs
916
- # [:, np.newaxis] that does not.
917
- y = np.reshape(y, (-1, 1))
918
- self.n_outputs_ = y.shape[1]
919
- ready = self.n_outputs_ == 1
920
- return ready, X, y, sample_weight
921
-
922
- def _onedal_cpu_supported(self, method_name, *data):
923
- if method_name == 'fit':
924
- ready, X, y, sample_weight = self._onedal_ready(*data)
925
- if self.splitter_mode == 'random':
926
- warnings.warn("'random' splitter mode supports GPU devices only "
927
- "and requires oneDAL version >= 2023.1.1. "
928
- "Using 'best' mode instead.", RuntimeWarning)
929
- self.splitter_mode = 'best'
930
- if not ready:
931
- return False
932
- elif not (self.oob_score and daal_check_version(
933
- (2021, 'P', 500)) or not self.oob_score):
934
- return False
935
- elif self.criterion not in ["mse", "squared_error"]:
936
- return False
937
- elif sp.issparse(X):
938
- return False
939
- elif sp.issparse(y):
940
- return False
941
- elif sp.issparse(sample_weight):
942
- return False
943
- elif not self.ccp_alpha == 0.0:
944
- return False
945
- elif self.warm_start:
946
- return False
947
- elif self.oob_score and not daal_check_version((2023, 'P', 101)):
948
- return False
949
- elif not self.n_outputs_ == 1:
950
- return False
951
- elif hasattr(self, 'estimators_'):
952
- return False
953
- else:
954
- return True
955
- if method_name == 'predict':
956
- if not hasattr(self, '_onedal_model'):
957
- return False
958
- elif sp.issparse(data[0]):
959
- return False
960
- elif not (hasattr(self, 'n_outputs_') and self.n_outputs_ == 1):
961
- return False
962
- elif not daal_check_version((2021, 'P', 400)):
963
- return False
964
- elif self.warm_start:
965
- return False
966
- else:
967
- return True
968
- raise RuntimeError(
969
- f'Unknown method {method_name} in {self.__class__.__name__}')
970
-
971
- def _onedal_gpu_supported(self, method_name, *data):
972
- if method_name == 'fit':
973
- ready, X, y, sample_weight = self._onedal_ready(*data)
974
- if self.splitter_mode == 'random' and \
975
- not daal_check_version((2023, 'P', 101)):
976
- warnings.warn("'random' splitter mode requires OneDAL >= 2023.1.1. "
977
- "Using 'best' mode instead.", RuntimeWarning)
978
- self.splitter_mode = 'best'
979
- if not ready:
980
- return False
981
- elif not (self.oob_score and daal_check_version(
982
- (2021, 'P', 500)) or not self.oob_score):
983
- return False
984
- elif self.criterion not in ["mse", "squared_error"]:
985
- return False
986
- elif sp.issparse(X):
987
- return False
988
- elif sp.issparse(y):
989
- return False
990
- elif sample_weight is not None: # `sample_weight` is not supported.
991
- return False
992
- elif not self.ccp_alpha == 0.0:
993
- return False
994
- elif self.warm_start:
995
- return False
996
- elif self.oob_score:
997
- return False
998
- elif hasattr(self, 'estimators_'):
999
- return False
1000
- else:
1001
- return True
1002
- if method_name == 'predict':
1003
- X = data[0]
1004
- if not hasattr(self, '_onedal_model'):
1005
- return False
1006
- elif sp.issparse(X):
1007
- return False
1008
- elif not (hasattr(self, 'n_outputs_') and self.n_outputs_ == 1):
1009
- return False
1010
- elif not daal_check_version((2021, 'P', 400)):
1011
- return False
1012
- elif self.warm_start:
1013
- return False
1014
- else:
1015
- return True
1016
- raise RuntimeError(
1017
- f'Unknown method {method_name} in {self.__class__.__name__}')
1018
-
1019
- def _onedal_fit(self, X, y, sample_weight=None, queue=None):
1020
- if sp.issparse(y):
1021
- raise ValueError(
1022
- "sparse multilabel-indicator for y is not supported."
1023
- )
1024
- if sklearn_check_version("1.2"):
1025
- self._validate_params()
1026
- else:
1027
- self._check_parameters()
1028
- if sample_weight is not None:
1029
- sample_weight = self.check_sample_weight(sample_weight, X)
1030
- if sklearn_check_version("1.0"):
1031
- self._check_feature_names(X, reset=True)
1032
- X = check_array(X, dtype=[np.float64, np.float32])
1033
- y = np.atleast_1d(np.asarray(y))
1034
- y = check_array(y, ensure_2d=False, dtype=X.dtype)
1035
- check_consistent_length(X, y)
1036
- self.n_features_in_ = X.shape[1]
1037
- if not sklearn_check_version('1.0'):
1038
- self.n_features_ = self.n_features_in_
1039
- rs_ = check_random_state(self.random_state)
1040
-
1041
- if self.oob_score:
1042
- err = 'out_of_bag_error_r2|out_of_bag_error_prediction'
1043
- else:
1044
- err = 'none'
1045
-
1046
- onedal_params = {
1047
- 'n_estimators': self.n_estimators,
1048
- 'criterion': self.criterion,
1049
- 'max_depth': self.max_depth,
1050
- 'min_samples_split': self.min_samples_split,
1051
- 'min_samples_leaf': self.min_samples_leaf,
1052
- 'min_weight_fraction_leaf': self.min_weight_fraction_leaf,
1053
- 'max_features': self.max_features,
1054
- 'max_leaf_nodes': self.max_leaf_nodes,
1055
- 'min_impurity_decrease': self.min_impurity_decrease,
1056
- 'bootstrap': self.bootstrap,
1057
- 'oob_score': self.oob_score,
1058
- 'n_jobs': self.n_jobs,
1059
- 'random_state': rs_,
1060
- 'verbose': self.verbose,
1061
- 'warm_start': self.warm_start,
1062
- 'error_metric_mode': err,
1063
- 'variable_importance_mode': 'mdi',
1064
- 'max_samples': self.max_samples
1065
- }
1066
- if daal_check_version((2023, 'P', 101)):
1067
- onedal_params['splitter_mode'] = self.splitter_mode
1068
- self._cached_estimators_ = None
1069
- self._onedal_estimator = self._onedal_regressor(**onedal_params)
1070
- self._onedal_estimator.fit(X, y, sample_weight, queue=queue)
1071
-
1072
- self._save_attributes()
1073
- if sklearn_check_version("1.2"):
1074
- self._estimator = DecisionTreeRegressor()
1075
- self.estimators_ = self._estimators_
1076
- return self
1077
-
1078
- def _onedal_predict(self, X, queue=None):
1079
- if sklearn_check_version("1.0"):
1080
- self._check_feature_names(X, reset=False)
1081
- X = self._validate_X_predict(X)
1082
- return self._onedal_estimator.predict(X, queue=queue)
1083
-
1084
- def fit(self, X, y, sample_weight=None):
1085
- """
1086
- Build a forest of trees from the training set (X, y).
1087
-
1088
- Parameters
1089
- ----------
1090
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
1091
- The training input samples. Internally, its dtype will be converted
1092
- to ``dtype=np.float32``. If a sparse matrix is provided, it will be
1093
- converted into a sparse ``csc_matrix``.
1094
-
1095
- y : array-like of shape (n_samples,) or (n_samples, n_outputs)
1096
- The target values (class labels in classification, real numbers in
1097
- regression).
1098
-
1099
- sample_weight : array-like of shape (n_samples,), default=None
1100
- Sample weights. If None, then samples are equally weighted. Splits
1101
- that would create child nodes with net zero or negative weight are
1102
- ignored while searching for a split in each node. In the case of
1103
- classification, splits are also ignored if they would result in any
1104
- single class carrying a negative weight in either child node.
1105
-
1106
- Returns
1107
- -------
1108
- self : object
1109
- """
1110
- if not self.bootstrap and self.max_samples is not None:
1111
- raise ValueError(
1112
- "`max_sample` cannot be set if `bootstrap=False`. "
1113
- "Either switch to `bootstrap=True` or set "
1114
- "`max_sample=None`."
1115
- )
1116
- dispatch(self, 'fit', {
1117
- 'onedal': self.__class__._onedal_fit,
1118
- 'sklearn': sklearn_RandomForestRegressor.fit,
1119
- }, X, y, sample_weight)
1120
- return self
1121
-
1122
- @wrap_output_data
1123
- def predict(self, X):
1124
- """
1125
- Predict class for X.
1126
-
1127
- The predicted class of an input sample is a vote by the trees in
1128
- the forest, weighted by their probability estimates. That is,
1129
- the predicted class is the one with highest mean probability
1130
- estimate across the trees.
1131
-
1132
- Parameters
1133
- ----------
1134
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
1135
- The input samples. Internally, its dtype will be converted to
1136
- ``dtype=np.float32``. If a sparse matrix is provided, it will be
1137
- converted into a sparse ``csr_matrix``.
1138
-
1139
- Returns
1140
- -------
1141
- y : ndarray of shape (n_samples,) or (n_samples, n_outputs)
1142
- The predicted classes.
1143
- """
1144
- return dispatch(self, 'predict', {
1145
- 'onedal': self.__class__._onedal_predict,
1146
- 'sklearn': sklearn_RandomForestRegressor.predict,
1147
- }, X)
1148
-
1149
- if sklearn_check_version('1.0'):
1150
- @deprecated(
1151
- "Attribute `n_features_` was deprecated in version 1.0 and will be "
1152
- "removed in 1.2. Use `n_features_in_` instead.")
1153
- @property
1154
- def n_features_(self):
1155
- return self.n_features_in_