scikit-learn-intelex 2024.0.0__py311-none-win_amd64.whl → 2024.0.1__py311-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/_utils.py +2 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/dispatcher.py +70 -77
- {scikit_learn_intelex-2024.0.0.data/data/Lib/site-packages/sklearnex/preview → scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex}/ensemble/__init__.py +6 -2
- scikit_learn_intelex-2024.0.0.data/data/Lib/site-packages/sklearnex/preview/ensemble/extra_trees.py → scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/ensemble/_forest.py +960 -494
- scikit_learn_intelex-2024.0.0.data/data/Lib/site-packages/sklearnex/preview/ensemble/tests/test_preview_ensemble.py → scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/ensemble/tests/test_forest.py +18 -15
- {scikit_learn_intelex-2024.0.0.data/data/Lib/site-packages/sklearnex/preview → scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex}/linear_model/linear.py +59 -12
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/tests/test_linear.py +15 -4
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/preview/__init__.py +1 -1
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/preview/cluster/k_means.py +3 -1
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/ensemble/forest.py +2 -6
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_memory_usage.py +0 -14
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_monkeypatch.py +8 -5
- {scikit_learn_intelex-2024.0.0.dist-info → scikit_learn_intelex-2024.0.1.dist-info}/METADATA +34 -35
- scikit_learn_intelex-2024.0.1.dist-info/RECORD +90 -0
- scikit_learn_intelex-2024.0.0.data/data/Lib/site-packages/sklearnex/ensemble/__init__.py +0 -20
- scikit_learn_intelex-2024.0.0.data/data/Lib/site-packages/sklearnex/ensemble/forest.py +0 -18
- scikit_learn_intelex-2024.0.0.data/data/Lib/site-packages/sklearnex/ensemble/tests/test_forest.py +0 -54
- scikit_learn_intelex-2024.0.0.data/data/Lib/site-packages/sklearnex/linear_model/linear.py +0 -17
- scikit_learn_intelex-2024.0.0.data/data/Lib/site-packages/sklearnex/preview/ensemble/forest.py +0 -1557
- scikit_learn_intelex-2024.0.0.data/data/Lib/site-packages/sklearnex/preview/linear_model/__init__.py +0 -20
- scikit_learn_intelex-2024.0.0.data/data/Lib/site-packages/sklearnex/preview/linear_model/_common.py +0 -66
- scikit_learn_intelex-2024.0.0.data/data/Lib/site-packages/sklearnex/preview/linear_model/tests/test_preview_linear.py +0 -47
- scikit_learn_intelex-2024.0.0.dist-info/RECORD +0 -98
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/__main__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/_config.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/_device_offload.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/basic_statistics/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/basic_statistics/basic_statistics.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/cluster/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/cluster/dbscan.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/cluster/k_means.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/cluster/tests/test_dbscan.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/cluster/tests/test_kmeans.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/decomposition/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/decomposition/pca.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/decomposition/tests/test_pca.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/doc/third-party-programs.txt +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/glob/__main__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/glob/dispatcher.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/coordinate_descent.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/logistic_path.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/ridge.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/tests/test_logreg.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/manifold/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/manifold/t_sne.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/manifold/tests/test_tsne.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/metrics/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/metrics/pairwise.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/metrics/ranking.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/metrics/tests/test_metrics.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/model_selection/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/model_selection/split.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/model_selection/tests/test_model_selection.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/common.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/knn_classification.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/knn_regression.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/knn_unsupervised.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/lof.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/tests/test_neighbors.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/preview/cluster/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/preview/cluster/_common.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/preview/decomposition/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/preview/decomposition/pca.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/preview/decomposition/tests/test_preview_pca.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/basic_statistics/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/basic_statistics/basic_statistics.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/cluster/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/cluster/dbscan.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/cluster/kmeans.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/decomposition/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/decomposition/pca.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/ensemble/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/linear_model.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/neighbors/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/neighbors/neighbors.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/_common.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/nusvc.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/nusvr.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/svc.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/svr.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/tests/test_svm.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/_models_info.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_config.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_parallel.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_patching.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_run_to_run_stability_tests.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/utils/_launch_algorithms.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/utils/__init__.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/utils/parallel.py +0 -0
- {scikit_learn_intelex-2024.0.0.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/utils/validation.py +0 -0
- {scikit_learn_intelex-2024.0.0.dist-info → scikit_learn_intelex-2024.0.1.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2024.0.0.dist-info → scikit_learn_intelex-2024.0.1.dist-info}/WHEEL +0 -0
- {scikit_learn_intelex-2024.0.0.dist-info → scikit_learn_intelex-2024.0.1.dist-info}/top_level.txt +0 -0
scikit_learn_intelex-2024.0.0.data/data/Lib/site-packages/sklearnex/preview/ensemble/forest.py
DELETED
|
@@ -1,1557 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# ===============================================================================
|
|
3
|
-
# Copyright 2021 Intel Corporation
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
16
|
-
# ===============================================================================
|
|
17
|
-
|
|
18
|
-
import numbers
|
|
19
|
-
import warnings
|
|
20
|
-
from abc import ABC
|
|
21
|
-
|
|
22
|
-
import numpy as np
|
|
23
|
-
from scipy import sparse as sp
|
|
24
|
-
from sklearn.base import clone
|
|
25
|
-
from sklearn.ensemble import RandomForestClassifier as sklearn_RandomForestClassifier
|
|
26
|
-
from sklearn.ensemble import RandomForestRegressor as sklearn_RandomForestRegressor
|
|
27
|
-
from sklearn.exceptions import DataConversionWarning
|
|
28
|
-
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
|
|
29
|
-
from sklearn.tree._tree import Tree
|
|
30
|
-
from sklearn.utils import check_random_state, deprecated
|
|
31
|
-
from sklearn.utils.validation import (
|
|
32
|
-
check_array,
|
|
33
|
-
check_consistent_length,
|
|
34
|
-
check_is_fitted,
|
|
35
|
-
check_X_y,
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
from daal4py.sklearn._utils import (
|
|
39
|
-
check_tree_nodes,
|
|
40
|
-
daal_check_version,
|
|
41
|
-
make2d,
|
|
42
|
-
sklearn_check_version,
|
|
43
|
-
)
|
|
44
|
-
from onedal.ensemble import RandomForestClassifier as onedal_RandomForestClassifier
|
|
45
|
-
from onedal.ensemble import RandomForestRegressor as onedal_RandomForestRegressor
|
|
46
|
-
from onedal.primitives import get_tree_state_cls, get_tree_state_reg
|
|
47
|
-
from onedal.utils import _num_features, _num_samples
|
|
48
|
-
|
|
49
|
-
from ..._config import get_config
|
|
50
|
-
from ..._device_offload import dispatch, wrap_output_data
|
|
51
|
-
from ..._utils import PatchingConditionsChain
|
|
52
|
-
|
|
53
|
-
if sklearn_check_version("1.2"):
|
|
54
|
-
from sklearn.utils._param_validation import Interval, StrOptions
|
|
55
|
-
if sklearn_check_version("1.4"):
|
|
56
|
-
from daal4py.sklearn.utils import _assert_all_finite
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
class BaseRandomForest(ABC):
|
|
60
|
-
def _fit_proba(self, X, y, sample_weight=None, queue=None):
|
|
61
|
-
params = self.get_params()
|
|
62
|
-
self.__class__(**params)
|
|
63
|
-
|
|
64
|
-
# We use stock metaestimators below, so the only way
|
|
65
|
-
# to pass a queue is using config_context.
|
|
66
|
-
cfg = get_config()
|
|
67
|
-
cfg["target_offload"] = queue
|
|
68
|
-
|
|
69
|
-
def _save_attributes(self):
|
|
70
|
-
self._onedal_model = self._onedal_estimator._onedal_model
|
|
71
|
-
|
|
72
|
-
if self.oob_score:
|
|
73
|
-
self.oob_score_ = self._onedal_estimator.oob_score_
|
|
74
|
-
if hasattr(self._onedal_estimator, "oob_prediction_"):
|
|
75
|
-
self.oob_prediction_ = self._onedal_estimator.oob_prediction_
|
|
76
|
-
if hasattr(self._onedal_estimator, "oob_decision_function_"):
|
|
77
|
-
self.oob_decision_function_ = (
|
|
78
|
-
self._onedal_estimator.oob_decision_function_
|
|
79
|
-
)
|
|
80
|
-
return self
|
|
81
|
-
|
|
82
|
-
def _onedal_classifier(self, **onedal_params):
|
|
83
|
-
return onedal_RandomForestClassifier(**onedal_params)
|
|
84
|
-
|
|
85
|
-
def _onedal_regressor(self, **onedal_params):
|
|
86
|
-
return onedal_RandomForestRegressor(**onedal_params)
|
|
87
|
-
|
|
88
|
-
# TODO:
|
|
89
|
-
# move to onedal modul.
|
|
90
|
-
def _check_parameters(self):
|
|
91
|
-
if not self.bootstrap and self.max_samples is not None:
|
|
92
|
-
raise ValueError(
|
|
93
|
-
"`max_sample` cannot be set if `bootstrap=False`. "
|
|
94
|
-
"Either switch to `bootstrap=True` or set "
|
|
95
|
-
"`max_sample=None`."
|
|
96
|
-
)
|
|
97
|
-
if isinstance(self.min_samples_leaf, numbers.Integral):
|
|
98
|
-
if not 1 <= self.min_samples_leaf:
|
|
99
|
-
raise ValueError(
|
|
100
|
-
"min_samples_leaf must be at least 1 "
|
|
101
|
-
"or in (0, 0.5], got %s" % self.min_samples_leaf
|
|
102
|
-
)
|
|
103
|
-
else: # float
|
|
104
|
-
if not 0.0 < self.min_samples_leaf <= 0.5:
|
|
105
|
-
raise ValueError(
|
|
106
|
-
"min_samples_leaf must be at least 1 "
|
|
107
|
-
"or in (0, 0.5], got %s" % self.min_samples_leaf
|
|
108
|
-
)
|
|
109
|
-
if isinstance(self.min_samples_split, numbers.Integral):
|
|
110
|
-
if not 2 <= self.min_samples_split:
|
|
111
|
-
raise ValueError(
|
|
112
|
-
"min_samples_split must be an integer "
|
|
113
|
-
"greater than 1 or a float in (0.0, 1.0]; "
|
|
114
|
-
"got the integer %s" % self.min_samples_split
|
|
115
|
-
)
|
|
116
|
-
else: # float
|
|
117
|
-
if not 0.0 < self.min_samples_split <= 1.0:
|
|
118
|
-
raise ValueError(
|
|
119
|
-
"min_samples_split must be an integer "
|
|
120
|
-
"greater than 1 or a float in (0.0, 1.0]; "
|
|
121
|
-
"got the float %s" % self.min_samples_split
|
|
122
|
-
)
|
|
123
|
-
if not 0 <= self.min_weight_fraction_leaf <= 0.5:
|
|
124
|
-
raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
|
|
125
|
-
if self.min_impurity_split is not None:
|
|
126
|
-
warnings.warn(
|
|
127
|
-
"The min_impurity_split parameter is deprecated. "
|
|
128
|
-
"Its default value has changed from 1e-7 to 0 in "
|
|
129
|
-
"version 0.23, and it will be removed in 0.25. "
|
|
130
|
-
"Use the min_impurity_decrease parameter instead.",
|
|
131
|
-
FutureWarning,
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
if self.min_impurity_split < 0.0:
|
|
135
|
-
raise ValueError(
|
|
136
|
-
"min_impurity_split must be greater than " "or equal to 0"
|
|
137
|
-
)
|
|
138
|
-
if self.min_impurity_decrease < 0.0:
|
|
139
|
-
raise ValueError(
|
|
140
|
-
"min_impurity_decrease must be greater than " "or equal to 0"
|
|
141
|
-
)
|
|
142
|
-
if self.max_leaf_nodes is not None:
|
|
143
|
-
if not isinstance(self.max_leaf_nodes, numbers.Integral):
|
|
144
|
-
raise ValueError(
|
|
145
|
-
"max_leaf_nodes must be integral number but was "
|
|
146
|
-
"%r" % self.max_leaf_nodes
|
|
147
|
-
)
|
|
148
|
-
if self.max_leaf_nodes < 2:
|
|
149
|
-
raise ValueError(
|
|
150
|
-
("max_leaf_nodes {0} must be either None " "or larger than 1").format(
|
|
151
|
-
self.max_leaf_nodes
|
|
152
|
-
)
|
|
153
|
-
)
|
|
154
|
-
if isinstance(self.max_bins, numbers.Integral):
|
|
155
|
-
if not 2 <= self.max_bins:
|
|
156
|
-
raise ValueError("max_bins must be at least 2, got %s" % self.max_bins)
|
|
157
|
-
else:
|
|
158
|
-
raise ValueError(
|
|
159
|
-
"max_bins must be integral number but was " "%r" % self.max_bins
|
|
160
|
-
)
|
|
161
|
-
if isinstance(self.min_bin_size, numbers.Integral):
|
|
162
|
-
if not 1 <= self.min_bin_size:
|
|
163
|
-
raise ValueError(
|
|
164
|
-
"min_bin_size must be at least 1, got %s" % self.min_bin_size
|
|
165
|
-
)
|
|
166
|
-
else:
|
|
167
|
-
raise ValueError(
|
|
168
|
-
"min_bin_size must be integral number but was " "%r" % self.min_bin_size
|
|
169
|
-
)
|
|
170
|
-
|
|
171
|
-
def check_sample_weight(self, sample_weight, X, dtype=None):
|
|
172
|
-
n_samples = _num_samples(X)
|
|
173
|
-
|
|
174
|
-
if dtype is not None and dtype not in [np.float32, np.float64]:
|
|
175
|
-
dtype = np.float64
|
|
176
|
-
|
|
177
|
-
if sample_weight is None:
|
|
178
|
-
sample_weight = np.ones(n_samples, dtype=dtype)
|
|
179
|
-
elif isinstance(sample_weight, numbers.Number):
|
|
180
|
-
sample_weight = np.full(n_samples, sample_weight, dtype=dtype)
|
|
181
|
-
else:
|
|
182
|
-
if dtype is None:
|
|
183
|
-
dtype = [np.float64, np.float32]
|
|
184
|
-
sample_weight = check_array(
|
|
185
|
-
sample_weight,
|
|
186
|
-
accept_sparse=False,
|
|
187
|
-
ensure_2d=False,
|
|
188
|
-
dtype=dtype,
|
|
189
|
-
order="C",
|
|
190
|
-
)
|
|
191
|
-
if sample_weight.ndim != 1:
|
|
192
|
-
raise ValueError("Sample weights must be 1D array or scalar")
|
|
193
|
-
|
|
194
|
-
if sample_weight.shape != (n_samples,):
|
|
195
|
-
raise ValueError(
|
|
196
|
-
"sample_weight.shape == {}, expected {}!".format(
|
|
197
|
-
sample_weight.shape, (n_samples,)
|
|
198
|
-
)
|
|
199
|
-
)
|
|
200
|
-
return sample_weight
|
|
201
|
-
|
|
202
|
-
@property
|
|
203
|
-
def estimators_(self):
|
|
204
|
-
if hasattr(self, "_cached_estimators_"):
|
|
205
|
-
if self._cached_estimators_ is None and self._onedal_model:
|
|
206
|
-
self._estimators_()
|
|
207
|
-
return self._cached_estimators_
|
|
208
|
-
else:
|
|
209
|
-
raise AttributeError(
|
|
210
|
-
f"'{self.__class__.__name__}' has no attribute 'estimators_'"
|
|
211
|
-
)
|
|
212
|
-
|
|
213
|
-
@estimators_.setter
|
|
214
|
-
def estimators_(self, estimators):
|
|
215
|
-
# Needed to allow for proper sklearn operation in fallback mode
|
|
216
|
-
self._cached_estimators_ = estimators
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
class RandomForestClassifier(sklearn_RandomForestClassifier, BaseRandomForest):
|
|
220
|
-
__doc__ = sklearn_RandomForestClassifier.__doc__
|
|
221
|
-
|
|
222
|
-
if sklearn_check_version("1.2"):
|
|
223
|
-
_parameter_constraints: dict = {
|
|
224
|
-
**sklearn_RandomForestClassifier._parameter_constraints,
|
|
225
|
-
"max_bins": [Interval(numbers.Integral, 2, None, closed="left")],
|
|
226
|
-
"min_bin_size": [Interval(numbers.Integral, 1, None, closed="left")],
|
|
227
|
-
"splitter_mode": [StrOptions({"best", "random"})],
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
if sklearn_check_version("1.4"):
|
|
231
|
-
|
|
232
|
-
def __init__(
|
|
233
|
-
self,
|
|
234
|
-
n_estimators=100,
|
|
235
|
-
criterion="gini",
|
|
236
|
-
max_depth=None,
|
|
237
|
-
min_samples_split=2,
|
|
238
|
-
min_samples_leaf=1,
|
|
239
|
-
min_weight_fraction_leaf=0.0,
|
|
240
|
-
max_features="sqrt",
|
|
241
|
-
max_leaf_nodes=None,
|
|
242
|
-
min_impurity_decrease=0.0,
|
|
243
|
-
bootstrap=True,
|
|
244
|
-
oob_score=False,
|
|
245
|
-
n_jobs=None,
|
|
246
|
-
random_state=None,
|
|
247
|
-
verbose=0,
|
|
248
|
-
warm_start=False,
|
|
249
|
-
class_weight=None,
|
|
250
|
-
ccp_alpha=0.0,
|
|
251
|
-
max_samples=None,
|
|
252
|
-
monotonic_cst=None,
|
|
253
|
-
max_bins=256,
|
|
254
|
-
min_bin_size=1,
|
|
255
|
-
splitter_mode="best",
|
|
256
|
-
):
|
|
257
|
-
super(RandomForestClassifier, self).__init__(
|
|
258
|
-
n_estimators=n_estimators,
|
|
259
|
-
criterion=criterion,
|
|
260
|
-
max_depth=max_depth,
|
|
261
|
-
min_samples_split=min_samples_split,
|
|
262
|
-
min_samples_leaf=min_samples_leaf,
|
|
263
|
-
min_weight_fraction_leaf=min_weight_fraction_leaf,
|
|
264
|
-
max_features=max_features,
|
|
265
|
-
max_leaf_nodes=max_leaf_nodes,
|
|
266
|
-
min_impurity_decrease=min_impurity_decrease,
|
|
267
|
-
bootstrap=bootstrap,
|
|
268
|
-
oob_score=oob_score,
|
|
269
|
-
n_jobs=n_jobs,
|
|
270
|
-
random_state=random_state,
|
|
271
|
-
verbose=verbose,
|
|
272
|
-
warm_start=warm_start,
|
|
273
|
-
class_weight=class_weight,
|
|
274
|
-
monotonic_cst=monotonic_cst,
|
|
275
|
-
)
|
|
276
|
-
self.warm_start = warm_start
|
|
277
|
-
self.ccp_alpha = ccp_alpha
|
|
278
|
-
self.max_samples = max_samples
|
|
279
|
-
self.monotonic_cst = monotonic_cst
|
|
280
|
-
self.max_bins = max_bins
|
|
281
|
-
self.min_bin_size = min_bin_size
|
|
282
|
-
self.min_impurity_split = None
|
|
283
|
-
self.splitter_mode = splitter_mode
|
|
284
|
-
|
|
285
|
-
elif sklearn_check_version("1.0"):
|
|
286
|
-
|
|
287
|
-
def __init__(
|
|
288
|
-
self,
|
|
289
|
-
n_estimators=100,
|
|
290
|
-
criterion="gini",
|
|
291
|
-
max_depth=None,
|
|
292
|
-
min_samples_split=2,
|
|
293
|
-
min_samples_leaf=1,
|
|
294
|
-
min_weight_fraction_leaf=0.0,
|
|
295
|
-
max_features="sqrt" if sklearn_check_version("1.1") else "auto",
|
|
296
|
-
max_leaf_nodes=None,
|
|
297
|
-
min_impurity_decrease=0.0,
|
|
298
|
-
bootstrap=True,
|
|
299
|
-
oob_score=False,
|
|
300
|
-
n_jobs=None,
|
|
301
|
-
random_state=None,
|
|
302
|
-
verbose=0,
|
|
303
|
-
warm_start=False,
|
|
304
|
-
class_weight=None,
|
|
305
|
-
ccp_alpha=0.0,
|
|
306
|
-
max_samples=None,
|
|
307
|
-
max_bins=256,
|
|
308
|
-
min_bin_size=1,
|
|
309
|
-
splitter_mode="best",
|
|
310
|
-
):
|
|
311
|
-
super(RandomForestClassifier, self).__init__(
|
|
312
|
-
n_estimators=n_estimators,
|
|
313
|
-
criterion=criterion,
|
|
314
|
-
max_depth=max_depth,
|
|
315
|
-
min_samples_split=min_samples_split,
|
|
316
|
-
min_samples_leaf=min_samples_leaf,
|
|
317
|
-
min_weight_fraction_leaf=min_weight_fraction_leaf,
|
|
318
|
-
max_features=max_features,
|
|
319
|
-
max_leaf_nodes=max_leaf_nodes,
|
|
320
|
-
min_impurity_decrease=min_impurity_decrease,
|
|
321
|
-
bootstrap=bootstrap,
|
|
322
|
-
oob_score=oob_score,
|
|
323
|
-
n_jobs=n_jobs,
|
|
324
|
-
random_state=random_state,
|
|
325
|
-
verbose=verbose,
|
|
326
|
-
warm_start=warm_start,
|
|
327
|
-
class_weight=class_weight,
|
|
328
|
-
)
|
|
329
|
-
self.warm_start = warm_start
|
|
330
|
-
self.ccp_alpha = ccp_alpha
|
|
331
|
-
self.max_samples = max_samples
|
|
332
|
-
self.max_bins = max_bins
|
|
333
|
-
self.min_bin_size = min_bin_size
|
|
334
|
-
self.min_impurity_split = None
|
|
335
|
-
self.splitter_mode = splitter_mode
|
|
336
|
-
# self._estimator = DecisionTreeClassifier()
|
|
337
|
-
|
|
338
|
-
else:
|
|
339
|
-
|
|
340
|
-
def __init__(
|
|
341
|
-
self,
|
|
342
|
-
n_estimators=100,
|
|
343
|
-
criterion="gini",
|
|
344
|
-
max_depth=None,
|
|
345
|
-
min_samples_split=2,
|
|
346
|
-
min_samples_leaf=1,
|
|
347
|
-
min_weight_fraction_leaf=0.0,
|
|
348
|
-
max_features="auto",
|
|
349
|
-
max_leaf_nodes=None,
|
|
350
|
-
min_impurity_decrease=0.0,
|
|
351
|
-
min_impurity_split=None,
|
|
352
|
-
bootstrap=True,
|
|
353
|
-
oob_score=False,
|
|
354
|
-
n_jobs=None,
|
|
355
|
-
random_state=None,
|
|
356
|
-
verbose=0,
|
|
357
|
-
warm_start=False,
|
|
358
|
-
class_weight=None,
|
|
359
|
-
ccp_alpha=0.0,
|
|
360
|
-
max_samples=None,
|
|
361
|
-
max_bins=256,
|
|
362
|
-
min_bin_size=1,
|
|
363
|
-
splitter_mode="best",
|
|
364
|
-
):
|
|
365
|
-
super(RandomForestClassifier, self).__init__(
|
|
366
|
-
n_estimators=n_estimators,
|
|
367
|
-
criterion=criterion,
|
|
368
|
-
max_depth=max_depth,
|
|
369
|
-
min_samples_split=min_samples_split,
|
|
370
|
-
min_samples_leaf=min_samples_leaf,
|
|
371
|
-
min_weight_fraction_leaf=min_weight_fraction_leaf,
|
|
372
|
-
max_features=max_features,
|
|
373
|
-
max_leaf_nodes=max_leaf_nodes,
|
|
374
|
-
min_impurity_decrease=min_impurity_decrease,
|
|
375
|
-
min_impurity_split=min_impurity_split,
|
|
376
|
-
bootstrap=bootstrap,
|
|
377
|
-
oob_score=oob_score,
|
|
378
|
-
n_jobs=n_jobs,
|
|
379
|
-
random_state=random_state,
|
|
380
|
-
verbose=verbose,
|
|
381
|
-
warm_start=warm_start,
|
|
382
|
-
class_weight=class_weight,
|
|
383
|
-
ccp_alpha=ccp_alpha,
|
|
384
|
-
max_samples=max_samples,
|
|
385
|
-
)
|
|
386
|
-
self.warm_start = warm_start
|
|
387
|
-
self.ccp_alpha = ccp_alpha
|
|
388
|
-
self.max_samples = max_samples
|
|
389
|
-
self.max_bins = max_bins
|
|
390
|
-
self.min_bin_size = min_bin_size
|
|
391
|
-
self.min_impurity_split = None
|
|
392
|
-
self.splitter_mode = splitter_mode
|
|
393
|
-
# self._estimator = DecisionTreeClassifier()
|
|
394
|
-
|
|
395
|
-
def fit(self, X, y, sample_weight=None):
|
|
396
|
-
"""
|
|
397
|
-
Build a forest of trees from the training set (X, y).
|
|
398
|
-
|
|
399
|
-
Parameters
|
|
400
|
-
----------
|
|
401
|
-
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
402
|
-
The training input samples. Internally, its dtype will be converted
|
|
403
|
-
to ``dtype=np.float32``. If a sparse matrix is provided, it will be
|
|
404
|
-
converted into a sparse ``csc_matrix``.
|
|
405
|
-
|
|
406
|
-
y : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
407
|
-
The target values (class labels in classification, real numbers in
|
|
408
|
-
regression).
|
|
409
|
-
|
|
410
|
-
sample_weight : array-like of shape (n_samples,), default=None
|
|
411
|
-
Sample weights. If None, then samples are equally weighted. Splits
|
|
412
|
-
that would create child nodes with net zero or negative weight are
|
|
413
|
-
ignored while searching for a split in each node. In the case of
|
|
414
|
-
classification, splits are also ignored if they would result in any
|
|
415
|
-
single class carrying a negative weight in either child node.
|
|
416
|
-
|
|
417
|
-
Returns
|
|
418
|
-
-------
|
|
419
|
-
self : object
|
|
420
|
-
"""
|
|
421
|
-
dispatch(
|
|
422
|
-
self,
|
|
423
|
-
"fit",
|
|
424
|
-
{
|
|
425
|
-
"onedal": self.__class__._onedal_fit,
|
|
426
|
-
"sklearn": sklearn_RandomForestClassifier.fit,
|
|
427
|
-
},
|
|
428
|
-
X,
|
|
429
|
-
y,
|
|
430
|
-
sample_weight,
|
|
431
|
-
)
|
|
432
|
-
return self
|
|
433
|
-
|
|
434
|
-
def _onedal_ready(self, patching_status, X, y, sample_weight):
|
|
435
|
-
if sp.issparse(y):
|
|
436
|
-
raise ValueError("sparse multilabel-indicator for y is not supported.")
|
|
437
|
-
if not self.bootstrap and self.max_samples is not None:
|
|
438
|
-
raise ValueError(
|
|
439
|
-
"`max_sample` cannot be set if `bootstrap=False`. "
|
|
440
|
-
"Either switch to `bootstrap=True` or set "
|
|
441
|
-
"`max_sample=None`."
|
|
442
|
-
)
|
|
443
|
-
if not self.bootstrap and self.oob_score:
|
|
444
|
-
raise ValueError("Out of bag estimation only available" " if bootstrap=True")
|
|
445
|
-
if sklearn_check_version("1.2"):
|
|
446
|
-
self._validate_params()
|
|
447
|
-
else:
|
|
448
|
-
self._check_parameters()
|
|
449
|
-
|
|
450
|
-
patching_status.and_conditions(
|
|
451
|
-
[
|
|
452
|
-
(
|
|
453
|
-
self.oob_score
|
|
454
|
-
and daal_check_version((2021, "P", 500))
|
|
455
|
-
or not self.oob_score,
|
|
456
|
-
"OOB score is only supported starting from 2021.5 version of oneDAL.",
|
|
457
|
-
),
|
|
458
|
-
(not sp.issparse(X), "X is sparse. Sparse input is not supported."),
|
|
459
|
-
(
|
|
460
|
-
self.ccp_alpha == 0.0,
|
|
461
|
-
f"Non-zero 'ccp_alpha' ({self.ccp_alpha}) is not supported.",
|
|
462
|
-
),
|
|
463
|
-
(
|
|
464
|
-
self.criterion == "gini",
|
|
465
|
-
f"'{self.criterion}' criterion is not supported. "
|
|
466
|
-
"Only 'gini' criterion is supported.",
|
|
467
|
-
),
|
|
468
|
-
(self.warm_start is False, "Warm start is not supported."),
|
|
469
|
-
(
|
|
470
|
-
self.n_estimators <= 6024,
|
|
471
|
-
"More than 6024 estimators is not supported.",
|
|
472
|
-
),
|
|
473
|
-
(
|
|
474
|
-
getattr(self, "monotonic_cst", None) is None,
|
|
475
|
-
"Monitonic constrains are not supported.",
|
|
476
|
-
),
|
|
477
|
-
(
|
|
478
|
-
self.class_weight != "balanced_subsample",
|
|
479
|
-
f"'{self.class_weight}' `class_weight` is not supported. ",
|
|
480
|
-
),
|
|
481
|
-
]
|
|
482
|
-
)
|
|
483
|
-
|
|
484
|
-
if patching_status.get_status():
|
|
485
|
-
if sklearn_check_version("1.4"):
|
|
486
|
-
try:
|
|
487
|
-
_assert_all_finite(X)
|
|
488
|
-
correct_finiteness = True
|
|
489
|
-
except ValueError:
|
|
490
|
-
correct_finiteness = False
|
|
491
|
-
else:
|
|
492
|
-
correct_finiteness = True
|
|
493
|
-
|
|
494
|
-
patching_status.and_conditions(
|
|
495
|
-
[
|
|
496
|
-
(
|
|
497
|
-
correct_finiteness,
|
|
498
|
-
f"Non-correct X finiteness for sklearn v1.4.",
|
|
499
|
-
),
|
|
500
|
-
]
|
|
501
|
-
)
|
|
502
|
-
|
|
503
|
-
if patching_status.get_status():
|
|
504
|
-
if sklearn_check_version("1.0"):
|
|
505
|
-
self._check_feature_names(X, reset=True)
|
|
506
|
-
X = check_array(X, dtype=[np.float32, np.float64])
|
|
507
|
-
y = np.asarray(y)
|
|
508
|
-
y = np.atleast_1d(y)
|
|
509
|
-
if y.ndim == 2 and y.shape[1] == 1:
|
|
510
|
-
warnings.warn(
|
|
511
|
-
"A column-vector y was passed when a 1d array was"
|
|
512
|
-
" expected. Please change the shape of y to "
|
|
513
|
-
"(n_samples,), for example using ravel().",
|
|
514
|
-
DataConversionWarning,
|
|
515
|
-
stacklevel=2,
|
|
516
|
-
)
|
|
517
|
-
check_consistent_length(X, y)
|
|
518
|
-
y = make2d(y)
|
|
519
|
-
self.n_outputs_ = y.shape[1]
|
|
520
|
-
# TODO: Fix to support integers as input
|
|
521
|
-
patching_status.and_conditions(
|
|
522
|
-
[
|
|
523
|
-
(
|
|
524
|
-
self.n_outputs_ == 1,
|
|
525
|
-
f"Number of outputs ({self.n_outputs_}) is not 1.",
|
|
526
|
-
),
|
|
527
|
-
(
|
|
528
|
-
y.dtype in [np.float32, np.float64, np.int32, np.int64],
|
|
529
|
-
f"Datatype ({y.dtype}) for y is not supported.",
|
|
530
|
-
),
|
|
531
|
-
]
|
|
532
|
-
)
|
|
533
|
-
|
|
534
|
-
return patching_status, X, y, sample_weight
|
|
535
|
-
|
|
536
|
-
@wrap_output_data
|
|
537
|
-
def predict(self, X):
|
|
538
|
-
"""
|
|
539
|
-
Predict class for X.
|
|
540
|
-
|
|
541
|
-
The predicted class of an input sample is a vote by the trees in
|
|
542
|
-
the forest, weighted by their probability estimates. That is,
|
|
543
|
-
the predicted class is the one with highest mean probability
|
|
544
|
-
estimate across the trees.
|
|
545
|
-
|
|
546
|
-
Parameters
|
|
547
|
-
----------
|
|
548
|
-
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
549
|
-
The input samples. Internally, its dtype will be converted to
|
|
550
|
-
``dtype=np.float32``. If a sparse matrix is provided, it will be
|
|
551
|
-
converted into a sparse ``csr_matrix``.
|
|
552
|
-
|
|
553
|
-
Returns
|
|
554
|
-
-------
|
|
555
|
-
y : ndarray of shape (n_samples,) or (n_samples, n_outputs)
|
|
556
|
-
The predicted classes.
|
|
557
|
-
"""
|
|
558
|
-
return dispatch(
|
|
559
|
-
self,
|
|
560
|
-
"predict",
|
|
561
|
-
{
|
|
562
|
-
"onedal": self.__class__._onedal_predict,
|
|
563
|
-
"sklearn": sklearn_RandomForestClassifier.predict,
|
|
564
|
-
},
|
|
565
|
-
X,
|
|
566
|
-
)
|
|
567
|
-
|
|
568
|
-
@wrap_output_data
|
|
569
|
-
def predict_proba(self, X):
|
|
570
|
-
"""
|
|
571
|
-
Predict class probabilities for X.
|
|
572
|
-
|
|
573
|
-
The predicted class probabilities of an input sample are computed as
|
|
574
|
-
the mean predicted class probabilities of the trees in the forest.
|
|
575
|
-
The class probability of a single tree is the fraction of samples of
|
|
576
|
-
the same class in a leaf.
|
|
577
|
-
|
|
578
|
-
Parameters
|
|
579
|
-
----------
|
|
580
|
-
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
581
|
-
The input samples. Internally, its dtype will be converted to
|
|
582
|
-
``dtype=np.float32``. If a sparse matrix is provided, it will be
|
|
583
|
-
converted into a sparse ``csr_matrix``.
|
|
584
|
-
|
|
585
|
-
Returns
|
|
586
|
-
-------
|
|
587
|
-
p : ndarray of shape (n_samples, n_classes), or a list of n_outputs
|
|
588
|
-
such arrays if n_outputs > 1.
|
|
589
|
-
The class probabilities of the input samples. The order of the
|
|
590
|
-
classes corresponds to that in the attribute :term:`classes_`.
|
|
591
|
-
"""
|
|
592
|
-
# TODO:
|
|
593
|
-
# _check_proba()
|
|
594
|
-
# self._check_proba()
|
|
595
|
-
if sklearn_check_version("1.0"):
|
|
596
|
-
self._check_feature_names(X, reset=False)
|
|
597
|
-
if hasattr(self, "n_features_in_"):
|
|
598
|
-
try:
|
|
599
|
-
num_features = _num_features(X)
|
|
600
|
-
except TypeError:
|
|
601
|
-
num_features = _num_samples(X)
|
|
602
|
-
if num_features != self.n_features_in_:
|
|
603
|
-
raise ValueError(
|
|
604
|
-
(
|
|
605
|
-
f"X has {num_features} features, "
|
|
606
|
-
f"but RandomForestClassifier is expecting "
|
|
607
|
-
f"{self.n_features_in_} features as input"
|
|
608
|
-
)
|
|
609
|
-
)
|
|
610
|
-
return dispatch(
|
|
611
|
-
self,
|
|
612
|
-
"predict_proba",
|
|
613
|
-
{
|
|
614
|
-
"onedal": self.__class__._onedal_predict_proba,
|
|
615
|
-
"sklearn": sklearn_RandomForestClassifier.predict_proba,
|
|
616
|
-
},
|
|
617
|
-
X,
|
|
618
|
-
)
|
|
619
|
-
|
|
620
|
-
if sklearn_check_version("1.0"):
|
|
621
|
-
|
|
622
|
-
@deprecated(
|
|
623
|
-
"Attribute `n_features_` was deprecated in version 1.0 and will be "
|
|
624
|
-
"removed in 1.2. Use `n_features_in_` instead."
|
|
625
|
-
)
|
|
626
|
-
@property
|
|
627
|
-
def n_features_(self):
|
|
628
|
-
return self.n_features_in_
|
|
629
|
-
|
|
630
|
-
def _estimators_(self):
|
|
631
|
-
check_is_fitted(self, "_onedal_model")
|
|
632
|
-
classes_ = self.classes_[0]
|
|
633
|
-
n_classes_ = (
|
|
634
|
-
self.n_classes_ if isinstance(self.n_classes_, int) else self.n_classes_[0]
|
|
635
|
-
)
|
|
636
|
-
# convert model to estimators
|
|
637
|
-
params = {
|
|
638
|
-
"criterion": self.criterion,
|
|
639
|
-
"max_depth": self.max_depth,
|
|
640
|
-
"min_samples_split": self.min_samples_split,
|
|
641
|
-
"min_samples_leaf": self.min_samples_leaf,
|
|
642
|
-
"min_weight_fraction_leaf": self.min_weight_fraction_leaf,
|
|
643
|
-
"max_features": self.max_features,
|
|
644
|
-
"max_leaf_nodes": self.max_leaf_nodes,
|
|
645
|
-
"min_impurity_decrease": self.min_impurity_decrease,
|
|
646
|
-
"random_state": None,
|
|
647
|
-
}
|
|
648
|
-
if not sklearn_check_version("1.0"):
|
|
649
|
-
params["min_impurity_split"] = self.min_impurity_split
|
|
650
|
-
est = DecisionTreeClassifier(**params)
|
|
651
|
-
# we need to set est.tree_ field with Trees constructed from Intel(R)
|
|
652
|
-
# oneAPI Data Analytics Library solution
|
|
653
|
-
estimators_ = []
|
|
654
|
-
random_state_checked = check_random_state(self.random_state)
|
|
655
|
-
|
|
656
|
-
for i in range(self.n_estimators):
|
|
657
|
-
est_i = clone(est)
|
|
658
|
-
est_i.set_params(
|
|
659
|
-
random_state=random_state_checked.randint(np.iinfo(np.int32).max)
|
|
660
|
-
)
|
|
661
|
-
if sklearn_check_version("1.0"):
|
|
662
|
-
est_i.n_features_in_ = self.n_features_in_
|
|
663
|
-
else:
|
|
664
|
-
est_i.n_features_ = self.n_features_in_
|
|
665
|
-
est_i.n_outputs_ = self.n_outputs_
|
|
666
|
-
est_i.classes_ = classes_
|
|
667
|
-
est_i.n_classes_ = n_classes_
|
|
668
|
-
tree_i_state_class = get_tree_state_cls(self._onedal_model, i, n_classes_)
|
|
669
|
-
tree_i_state_dict = {
|
|
670
|
-
"max_depth": tree_i_state_class.max_depth,
|
|
671
|
-
"node_count": tree_i_state_class.node_count,
|
|
672
|
-
"nodes": check_tree_nodes(tree_i_state_class.node_ar),
|
|
673
|
-
"values": tree_i_state_class.value_ar,
|
|
674
|
-
}
|
|
675
|
-
est_i.tree_ = Tree(
|
|
676
|
-
self.n_features_in_,
|
|
677
|
-
np.array([n_classes_], dtype=np.intp),
|
|
678
|
-
self.n_outputs_,
|
|
679
|
-
)
|
|
680
|
-
est_i.tree_.__setstate__(tree_i_state_dict)
|
|
681
|
-
estimators_.append(est_i)
|
|
682
|
-
|
|
683
|
-
self._cached_estimators_ = estimators_
|
|
684
|
-
|
|
685
|
-
def _onedal_cpu_supported(self, method_name, *data):
|
|
686
|
-
class_name = self.__class__.__name__
|
|
687
|
-
patching_status = PatchingConditionsChain(
|
|
688
|
-
f"sklearn.ensemble.{class_name}.{method_name}"
|
|
689
|
-
)
|
|
690
|
-
if method_name == "fit":
|
|
691
|
-
patching_status, X, y, sample_weight = self._onedal_ready(
|
|
692
|
-
patching_status, *data
|
|
693
|
-
)
|
|
694
|
-
if self.splitter_mode == "random":
|
|
695
|
-
warnings.warn(
|
|
696
|
-
"'random' splitter mode supports GPU devices only "
|
|
697
|
-
"and requires oneDAL version >= 2023.1.1. "
|
|
698
|
-
"Using 'best' mode instead.",
|
|
699
|
-
RuntimeWarning,
|
|
700
|
-
)
|
|
701
|
-
self.splitter_mode = "best"
|
|
702
|
-
if patching_status.get_status():
|
|
703
|
-
patching_status.and_conditions(
|
|
704
|
-
[
|
|
705
|
-
(
|
|
706
|
-
self.oob_score
|
|
707
|
-
and daal_check_version((2023, "P", 101))
|
|
708
|
-
or not self.oob_score,
|
|
709
|
-
"OOB score is only supported starting from 2023.1.1 version of oneDAL.",
|
|
710
|
-
),
|
|
711
|
-
(
|
|
712
|
-
not sp.issparse(X),
|
|
713
|
-
"X is sparse. Sparse input is not supported.",
|
|
714
|
-
),
|
|
715
|
-
(
|
|
716
|
-
not sp.issparse(y),
|
|
717
|
-
"y is sparse. Sparse input is not supported.",
|
|
718
|
-
),
|
|
719
|
-
(
|
|
720
|
-
not sp.issparse(sample_weight),
|
|
721
|
-
"`sample_weight` is sparse. Sparse input is not supported.",
|
|
722
|
-
),
|
|
723
|
-
(
|
|
724
|
-
self.ccp_alpha == 0.0,
|
|
725
|
-
f"Non-zero 'ccp_alpha' ({self.ccp_alpha}) is not supported.",
|
|
726
|
-
),
|
|
727
|
-
(self.warm_start is False, "Warm start is not supported."),
|
|
728
|
-
(
|
|
729
|
-
self.n_estimators <= 6024,
|
|
730
|
-
"More than 6024 estimators is not supported.",
|
|
731
|
-
),
|
|
732
|
-
(
|
|
733
|
-
self.n_outputs_ == 1,
|
|
734
|
-
f"Number of outputs ({self.n_outputs_}) is not 1.",
|
|
735
|
-
),
|
|
736
|
-
]
|
|
737
|
-
)
|
|
738
|
-
|
|
739
|
-
elif method_name in ["predict", "predict_proba"]:
|
|
740
|
-
X = data[0]
|
|
741
|
-
|
|
742
|
-
patching_status.and_conditions(
|
|
743
|
-
[
|
|
744
|
-
(hasattr(self, "_onedal_model"), "oneDAL model was not trained."),
|
|
745
|
-
(not sp.issparse(X), "X is sparse. Sparse input is not supported."),
|
|
746
|
-
(self.warm_start is False, "Warm start is not supported."),
|
|
747
|
-
(
|
|
748
|
-
daal_check_version((2021, "P", 400)),
|
|
749
|
-
"RandomForestClassifier inference only supported starting from oneDAL version 2021.4",
|
|
750
|
-
),
|
|
751
|
-
]
|
|
752
|
-
)
|
|
753
|
-
if hasattr(self, "n_outputs_"):
|
|
754
|
-
patching_status.and_conditions(
|
|
755
|
-
[
|
|
756
|
-
(
|
|
757
|
-
self.n_outputs_ == 1,
|
|
758
|
-
f"Number of outputs ({self.n_outputs_}) is not 1.",
|
|
759
|
-
),
|
|
760
|
-
]
|
|
761
|
-
)
|
|
762
|
-
|
|
763
|
-
else:
|
|
764
|
-
raise RuntimeError(
|
|
765
|
-
f"Unknown method {method_name} in {self.__class__.__name__}"
|
|
766
|
-
)
|
|
767
|
-
return patching_status
|
|
768
|
-
|
|
769
|
-
def _onedal_gpu_supported(self, method_name, *data):
|
|
770
|
-
class_name = self.__class__.__name__
|
|
771
|
-
patching_status = PatchingConditionsChain(
|
|
772
|
-
f"sklearn.ensemble.{class_name}.{method_name}"
|
|
773
|
-
)
|
|
774
|
-
if method_name == "fit":
|
|
775
|
-
patching_status, X, y, sample_weight = self._onedal_ready(
|
|
776
|
-
patching_status, *data
|
|
777
|
-
)
|
|
778
|
-
if self.splitter_mode == "random" and not daal_check_version(
|
|
779
|
-
(2023, "P", 101)
|
|
780
|
-
):
|
|
781
|
-
warnings.warn(
|
|
782
|
-
"'random' splitter mode requires OneDAL >= 2023.1.1. "
|
|
783
|
-
"Using 'best' mode instead.",
|
|
784
|
-
RuntimeWarning,
|
|
785
|
-
)
|
|
786
|
-
self.splitter_mode = "best"
|
|
787
|
-
|
|
788
|
-
if patching_status.get_status():
|
|
789
|
-
patching_status.and_conditions(
|
|
790
|
-
[
|
|
791
|
-
(
|
|
792
|
-
not self.oob_score,
|
|
793
|
-
"OOB score is not supported.",
|
|
794
|
-
),
|
|
795
|
-
(
|
|
796
|
-
not sp.issparse(X),
|
|
797
|
-
"X is sparse. Sparse input is not supported.",
|
|
798
|
-
),
|
|
799
|
-
(
|
|
800
|
-
not sp.issparse(y),
|
|
801
|
-
"y is sparse. Sparse input is not supported.",
|
|
802
|
-
),
|
|
803
|
-
(not sample_weight, "`sample_weight` is not supported."),
|
|
804
|
-
(
|
|
805
|
-
self.ccp_alpha == 0.0,
|
|
806
|
-
f"Non-zero 'ccp_alpha' ({self.ccp_alpha}) is not supported.",
|
|
807
|
-
),
|
|
808
|
-
(self.warm_start is False, "Warm start is not supported."),
|
|
809
|
-
(
|
|
810
|
-
self.n_estimators <= 6024,
|
|
811
|
-
"More than 6024 estimators is not supported.",
|
|
812
|
-
),
|
|
813
|
-
(
|
|
814
|
-
self.n_outputs_ == 1,
|
|
815
|
-
f"Number of outputs ({self.n_outputs_}) is not 1.",
|
|
816
|
-
),
|
|
817
|
-
]
|
|
818
|
-
)
|
|
819
|
-
elif method_name in ["predict", "predict_proba"]:
|
|
820
|
-
X = data[0]
|
|
821
|
-
|
|
822
|
-
patching_status.and_conditions(
|
|
823
|
-
[
|
|
824
|
-
(hasattr(self, "_onedal_model"), "oneDAL model was not trained."),
|
|
825
|
-
(not sp.issparse(X), "X is sparse. Sparse input is not supported."),
|
|
826
|
-
(self.warm_start is False, "Warm start is not supported."),
|
|
827
|
-
(
|
|
828
|
-
daal_check_version((2021, "P", 400)),
|
|
829
|
-
"RandomForestClassifier inference only supported starting from oneDAL version 2021.4",
|
|
830
|
-
),
|
|
831
|
-
]
|
|
832
|
-
)
|
|
833
|
-
if hasattr(self, "n_outputs_"):
|
|
834
|
-
patching_status.and_conditions(
|
|
835
|
-
[
|
|
836
|
-
(
|
|
837
|
-
self.n_outputs_ == 1,
|
|
838
|
-
f"Number of outputs ({self.n_outputs_}) is not 1.",
|
|
839
|
-
),
|
|
840
|
-
]
|
|
841
|
-
)
|
|
842
|
-
|
|
843
|
-
else:
|
|
844
|
-
raise RuntimeError(
|
|
845
|
-
f"Unknown method {method_name} in {self.__class__.__name__}"
|
|
846
|
-
)
|
|
847
|
-
|
|
848
|
-
return patching_status
|
|
849
|
-
|
|
850
|
-
def _onedal_fit(self, X, y, sample_weight=None, queue=None):
|
|
851
|
-
if sklearn_check_version("1.2"):
|
|
852
|
-
X, y = self._validate_data(
|
|
853
|
-
X,
|
|
854
|
-
y,
|
|
855
|
-
multi_output=False,
|
|
856
|
-
accept_sparse=False,
|
|
857
|
-
dtype=[np.float64, np.float32],
|
|
858
|
-
force_all_finite=not sklearn_check_version("1.4"),
|
|
859
|
-
)
|
|
860
|
-
else:
|
|
861
|
-
X, y = check_X_y(
|
|
862
|
-
X,
|
|
863
|
-
y,
|
|
864
|
-
accept_sparse=False,
|
|
865
|
-
dtype=[np.float64, np.float32],
|
|
866
|
-
multi_output=False,
|
|
867
|
-
)
|
|
868
|
-
|
|
869
|
-
if sample_weight is not None:
|
|
870
|
-
sample_weight = self.check_sample_weight(sample_weight, X)
|
|
871
|
-
|
|
872
|
-
y = np.atleast_1d(y)
|
|
873
|
-
if y.ndim == 2 and y.shape[1] == 1:
|
|
874
|
-
warnings.warn(
|
|
875
|
-
"A column-vector y was passed when a 1d array was"
|
|
876
|
-
" expected. Please change the shape of y to "
|
|
877
|
-
"(n_samples,), for example using ravel().",
|
|
878
|
-
DataConversionWarning,
|
|
879
|
-
stacklevel=2,
|
|
880
|
-
)
|
|
881
|
-
if y.ndim == 1:
|
|
882
|
-
# reshape is necessary to preserve the data contiguity against vs
|
|
883
|
-
# [:, np.newaxis] that does not.
|
|
884
|
-
y = np.reshape(y, (-1, 1))
|
|
885
|
-
|
|
886
|
-
y, expanded_class_weight = self._validate_y_class_weight(y)
|
|
887
|
-
|
|
888
|
-
n_classes_ = self.n_classes_[0]
|
|
889
|
-
self.n_features_in_ = X.shape[1]
|
|
890
|
-
if not sklearn_check_version("1.0"):
|
|
891
|
-
self.n_features_ = self.n_features_in_
|
|
892
|
-
|
|
893
|
-
if expanded_class_weight is not None:
|
|
894
|
-
if sample_weight is not None:
|
|
895
|
-
sample_weight = sample_weight * expanded_class_weight
|
|
896
|
-
else:
|
|
897
|
-
sample_weight = expanded_class_weight
|
|
898
|
-
if sample_weight is not None:
|
|
899
|
-
sample_weight = [sample_weight]
|
|
900
|
-
|
|
901
|
-
if n_classes_ < 2:
|
|
902
|
-
raise ValueError("Training data only contain information about one class.")
|
|
903
|
-
|
|
904
|
-
if self.oob_score:
|
|
905
|
-
err = "out_of_bag_error_accuracy|out_of_bag_error_decision_function"
|
|
906
|
-
else:
|
|
907
|
-
err = "none"
|
|
908
|
-
|
|
909
|
-
onedal_params = {
|
|
910
|
-
"n_estimators": self.n_estimators,
|
|
911
|
-
"criterion": self.criterion,
|
|
912
|
-
"max_depth": self.max_depth,
|
|
913
|
-
"min_samples_split": self.min_samples_split,
|
|
914
|
-
"min_samples_leaf": self.min_samples_leaf,
|
|
915
|
-
"min_weight_fraction_leaf": self.min_weight_fraction_leaf,
|
|
916
|
-
"max_features": self.max_features,
|
|
917
|
-
"max_leaf_nodes": self.max_leaf_nodes,
|
|
918
|
-
"min_impurity_decrease": self.min_impurity_decrease,
|
|
919
|
-
"min_impurity_split": self.min_impurity_split,
|
|
920
|
-
"bootstrap": self.bootstrap,
|
|
921
|
-
"oob_score": self.oob_score,
|
|
922
|
-
"n_jobs": self.n_jobs,
|
|
923
|
-
"random_state": self.random_state,
|
|
924
|
-
"verbose": self.verbose,
|
|
925
|
-
"warm_start": self.warm_start,
|
|
926
|
-
"error_metric_mode": err,
|
|
927
|
-
"variable_importance_mode": "mdi",
|
|
928
|
-
"class_weight": self.class_weight,
|
|
929
|
-
"max_bins": self.max_bins,
|
|
930
|
-
"min_bin_size": self.min_bin_size,
|
|
931
|
-
"max_samples": self.max_samples,
|
|
932
|
-
}
|
|
933
|
-
if daal_check_version((2023, "P", 101)):
|
|
934
|
-
onedal_params["splitter_mode"] = self.splitter_mode
|
|
935
|
-
|
|
936
|
-
# Lazy evaluation of estimators_
|
|
937
|
-
self._cached_estimators_ = None
|
|
938
|
-
|
|
939
|
-
# Compute
|
|
940
|
-
self._onedal_estimator = self._onedal_classifier(**onedal_params)
|
|
941
|
-
self._onedal_estimator.fit(X, y, sample_weight, queue=queue)
|
|
942
|
-
|
|
943
|
-
self._save_attributes()
|
|
944
|
-
if sklearn_check_version("1.2"):
|
|
945
|
-
self._estimator = DecisionTreeClassifier()
|
|
946
|
-
|
|
947
|
-
# Decapsulate classes_ attributes
|
|
948
|
-
self.n_classes_ = self.n_classes_[0]
|
|
949
|
-
self.classes_ = self.classes_[0]
|
|
950
|
-
return self
|
|
951
|
-
|
|
952
|
-
def _onedal_predict(self, X, queue=None):
|
|
953
|
-
X = check_array(X, dtype=[np.float32, np.float64])
|
|
954
|
-
check_is_fitted(self, "_onedal_model")
|
|
955
|
-
|
|
956
|
-
if sklearn_check_version("1.0"):
|
|
957
|
-
self._check_feature_names(X, reset=False)
|
|
958
|
-
|
|
959
|
-
res = self._onedal_estimator.predict(X, queue=queue)
|
|
960
|
-
return np.take(self.classes_, res.ravel().astype(np.int64, casting="unsafe"))
|
|
961
|
-
|
|
962
|
-
def _onedal_predict_proba(self, X, queue=None):
|
|
963
|
-
X = check_array(X, dtype=[np.float64, np.float32])
|
|
964
|
-
check_is_fitted(self, "_onedal_model")
|
|
965
|
-
|
|
966
|
-
if sklearn_check_version("0.23"):
|
|
967
|
-
self._check_n_features(X, reset=False)
|
|
968
|
-
if sklearn_check_version("1.0"):
|
|
969
|
-
self._check_feature_names(X, reset=False)
|
|
970
|
-
return self._onedal_estimator.predict_proba(X, queue=queue)
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
class RandomForestRegressor(sklearn_RandomForestRegressor, BaseRandomForest):
|
|
974
|
-
__doc__ = sklearn_RandomForestRegressor.__doc__
|
|
975
|
-
|
|
976
|
-
if sklearn_check_version("1.2"):
|
|
977
|
-
_parameter_constraints: dict = {
|
|
978
|
-
**sklearn_RandomForestRegressor._parameter_constraints,
|
|
979
|
-
"max_bins": [Interval(numbers.Integral, 2, None, closed="left")],
|
|
980
|
-
"min_bin_size": [Interval(numbers.Integral, 1, None, closed="left")],
|
|
981
|
-
"splitter_mode": [StrOptions({"best", "random"})],
|
|
982
|
-
}
|
|
983
|
-
|
|
984
|
-
if sklearn_check_version("1.4"):
|
|
985
|
-
|
|
986
|
-
def __init__(
|
|
987
|
-
self,
|
|
988
|
-
n_estimators=100,
|
|
989
|
-
*,
|
|
990
|
-
criterion="squared_error",
|
|
991
|
-
max_depth=None,
|
|
992
|
-
min_samples_split=2,
|
|
993
|
-
min_samples_leaf=1,
|
|
994
|
-
min_weight_fraction_leaf=0.0,
|
|
995
|
-
max_features=1.0,
|
|
996
|
-
max_leaf_nodes=None,
|
|
997
|
-
min_impurity_decrease=0.0,
|
|
998
|
-
bootstrap=True,
|
|
999
|
-
oob_score=False,
|
|
1000
|
-
n_jobs=None,
|
|
1001
|
-
random_state=None,
|
|
1002
|
-
verbose=0,
|
|
1003
|
-
warm_start=False,
|
|
1004
|
-
ccp_alpha=0.0,
|
|
1005
|
-
max_samples=None,
|
|
1006
|
-
monotonic_cst=None,
|
|
1007
|
-
max_bins=256,
|
|
1008
|
-
min_bin_size=1,
|
|
1009
|
-
splitter_mode="best",
|
|
1010
|
-
):
|
|
1011
|
-
super(RandomForestRegressor, self).__init__(
|
|
1012
|
-
n_estimators=n_estimators,
|
|
1013
|
-
criterion=criterion,
|
|
1014
|
-
max_depth=max_depth,
|
|
1015
|
-
min_samples_split=min_samples_split,
|
|
1016
|
-
min_samples_leaf=min_samples_leaf,
|
|
1017
|
-
min_weight_fraction_leaf=min_weight_fraction_leaf,
|
|
1018
|
-
max_features=max_features,
|
|
1019
|
-
max_leaf_nodes=max_leaf_nodes,
|
|
1020
|
-
min_impurity_decrease=min_impurity_decrease,
|
|
1021
|
-
bootstrap=bootstrap,
|
|
1022
|
-
oob_score=oob_score,
|
|
1023
|
-
n_jobs=n_jobs,
|
|
1024
|
-
random_state=random_state,
|
|
1025
|
-
verbose=verbose,
|
|
1026
|
-
warm_start=warm_start,
|
|
1027
|
-
monotonic_cst=monotonic_cst,
|
|
1028
|
-
)
|
|
1029
|
-
self.warm_start = warm_start
|
|
1030
|
-
self.ccp_alpha = ccp_alpha
|
|
1031
|
-
self.max_samples = max_samples
|
|
1032
|
-
self.monotonic_cst = monotonic_cst
|
|
1033
|
-
self.max_bins = max_bins
|
|
1034
|
-
self.min_bin_size = min_bin_size
|
|
1035
|
-
self.min_impurity_split = None
|
|
1036
|
-
self.splitter_mode = splitter_mode
|
|
1037
|
-
|
|
1038
|
-
elif sklearn_check_version("1.0"):
|
|
1039
|
-
|
|
1040
|
-
def __init__(
|
|
1041
|
-
self,
|
|
1042
|
-
n_estimators=100,
|
|
1043
|
-
*,
|
|
1044
|
-
criterion="squared_error",
|
|
1045
|
-
max_depth=None,
|
|
1046
|
-
min_samples_split=2,
|
|
1047
|
-
min_samples_leaf=1,
|
|
1048
|
-
min_weight_fraction_leaf=0.0,
|
|
1049
|
-
max_features=1.0 if sklearn_check_version("1.1") else "auto",
|
|
1050
|
-
max_leaf_nodes=None,
|
|
1051
|
-
min_impurity_decrease=0.0,
|
|
1052
|
-
bootstrap=True,
|
|
1053
|
-
oob_score=False,
|
|
1054
|
-
n_jobs=None,
|
|
1055
|
-
random_state=None,
|
|
1056
|
-
verbose=0,
|
|
1057
|
-
warm_start=False,
|
|
1058
|
-
ccp_alpha=0.0,
|
|
1059
|
-
max_samples=None,
|
|
1060
|
-
max_bins=256,
|
|
1061
|
-
min_bin_size=1,
|
|
1062
|
-
splitter_mode="best",
|
|
1063
|
-
):
|
|
1064
|
-
super(RandomForestRegressor, self).__init__(
|
|
1065
|
-
n_estimators=n_estimators,
|
|
1066
|
-
criterion=criterion,
|
|
1067
|
-
max_depth=max_depth,
|
|
1068
|
-
min_samples_split=min_samples_split,
|
|
1069
|
-
min_samples_leaf=min_samples_leaf,
|
|
1070
|
-
min_weight_fraction_leaf=min_weight_fraction_leaf,
|
|
1071
|
-
max_features=max_features,
|
|
1072
|
-
max_leaf_nodes=max_leaf_nodes,
|
|
1073
|
-
min_impurity_decrease=min_impurity_decrease,
|
|
1074
|
-
bootstrap=bootstrap,
|
|
1075
|
-
oob_score=oob_score,
|
|
1076
|
-
n_jobs=n_jobs,
|
|
1077
|
-
random_state=random_state,
|
|
1078
|
-
verbose=verbose,
|
|
1079
|
-
warm_start=warm_start,
|
|
1080
|
-
)
|
|
1081
|
-
self.warm_start = warm_start
|
|
1082
|
-
self.ccp_alpha = ccp_alpha
|
|
1083
|
-
self.max_samples = max_samples
|
|
1084
|
-
self.max_bins = max_bins
|
|
1085
|
-
self.min_bin_size = min_bin_size
|
|
1086
|
-
self.min_impurity_split = None
|
|
1087
|
-
self.splitter_mode = splitter_mode
|
|
1088
|
-
|
|
1089
|
-
else:
|
|
1090
|
-
|
|
1091
|
-
def __init__(
|
|
1092
|
-
self,
|
|
1093
|
-
n_estimators=100,
|
|
1094
|
-
*,
|
|
1095
|
-
criterion="mse",
|
|
1096
|
-
max_depth=None,
|
|
1097
|
-
min_samples_split=2,
|
|
1098
|
-
min_samples_leaf=1,
|
|
1099
|
-
min_weight_fraction_leaf=0.0,
|
|
1100
|
-
max_features="auto",
|
|
1101
|
-
max_leaf_nodes=None,
|
|
1102
|
-
min_impurity_decrease=0.0,
|
|
1103
|
-
min_impurity_split=None,
|
|
1104
|
-
bootstrap=True,
|
|
1105
|
-
oob_score=False,
|
|
1106
|
-
n_jobs=None,
|
|
1107
|
-
random_state=None,
|
|
1108
|
-
verbose=0,
|
|
1109
|
-
warm_start=False,
|
|
1110
|
-
ccp_alpha=0.0,
|
|
1111
|
-
max_samples=None,
|
|
1112
|
-
max_bins=256,
|
|
1113
|
-
min_bin_size=1,
|
|
1114
|
-
splitter_mode="best",
|
|
1115
|
-
):
|
|
1116
|
-
super(RandomForestRegressor, self).__init__(
|
|
1117
|
-
n_estimators=n_estimators,
|
|
1118
|
-
criterion=criterion,
|
|
1119
|
-
max_depth=max_depth,
|
|
1120
|
-
min_samples_split=min_samples_split,
|
|
1121
|
-
min_samples_leaf=min_samples_leaf,
|
|
1122
|
-
min_weight_fraction_leaf=min_weight_fraction_leaf,
|
|
1123
|
-
max_features=max_features,
|
|
1124
|
-
max_leaf_nodes=max_leaf_nodes,
|
|
1125
|
-
min_impurity_decrease=min_impurity_decrease,
|
|
1126
|
-
min_impurity_split=min_impurity_split,
|
|
1127
|
-
bootstrap=bootstrap,
|
|
1128
|
-
oob_score=oob_score,
|
|
1129
|
-
n_jobs=n_jobs,
|
|
1130
|
-
random_state=random_state,
|
|
1131
|
-
verbose=verbose,
|
|
1132
|
-
warm_start=warm_start,
|
|
1133
|
-
ccp_alpha=ccp_alpha,
|
|
1134
|
-
max_samples=max_samples,
|
|
1135
|
-
)
|
|
1136
|
-
self.warm_start = warm_start
|
|
1137
|
-
self.ccp_alpha = ccp_alpha
|
|
1138
|
-
self.max_samples = max_samples
|
|
1139
|
-
self.max_bins = max_bins
|
|
1140
|
-
self.min_bin_size = min_bin_size
|
|
1141
|
-
self.min_impurity_split = None
|
|
1142
|
-
self.splitter_mode = splitter_mode
|
|
1143
|
-
|
|
1144
|
-
def _estimators_(self):
|
|
1145
|
-
check_is_fitted(self, "_onedal_model")
|
|
1146
|
-
# convert model to estimators
|
|
1147
|
-
params = {
|
|
1148
|
-
"criterion": self.criterion,
|
|
1149
|
-
"max_depth": self.max_depth,
|
|
1150
|
-
"min_samples_split": self.min_samples_split,
|
|
1151
|
-
"min_samples_leaf": self.min_samples_leaf,
|
|
1152
|
-
"min_weight_fraction_leaf": self.min_weight_fraction_leaf,
|
|
1153
|
-
"max_features": self.max_features,
|
|
1154
|
-
"max_leaf_nodes": self.max_leaf_nodes,
|
|
1155
|
-
"min_impurity_decrease": self.min_impurity_decrease,
|
|
1156
|
-
"random_state": None,
|
|
1157
|
-
}
|
|
1158
|
-
if not sklearn_check_version("1.0"):
|
|
1159
|
-
params["min_impurity_split"] = self.min_impurity_split
|
|
1160
|
-
est = DecisionTreeRegressor(**params)
|
|
1161
|
-
# we need to set est.tree_ field with Trees constructed from Intel(R)
|
|
1162
|
-
# oneAPI Data Analytics Library solution
|
|
1163
|
-
estimators_ = []
|
|
1164
|
-
random_state_checked = check_random_state(self.random_state)
|
|
1165
|
-
|
|
1166
|
-
for i in range(self.n_estimators):
|
|
1167
|
-
est_i = clone(est)
|
|
1168
|
-
est_i.set_params(
|
|
1169
|
-
random_state=random_state_checked.randint(np.iinfo(np.int32).max)
|
|
1170
|
-
)
|
|
1171
|
-
if sklearn_check_version("1.0"):
|
|
1172
|
-
est_i.n_features_in_ = self.n_features_in_
|
|
1173
|
-
else:
|
|
1174
|
-
est_i.n_features_ = self.n_features_in_
|
|
1175
|
-
est_i.n_classes_ = 1
|
|
1176
|
-
est_i.n_outputs_ = self.n_outputs_
|
|
1177
|
-
tree_i_state_class = get_tree_state_reg(self._onedal_model, i)
|
|
1178
|
-
tree_i_state_dict = {
|
|
1179
|
-
"max_depth": tree_i_state_class.max_depth,
|
|
1180
|
-
"node_count": tree_i_state_class.node_count,
|
|
1181
|
-
"nodes": check_tree_nodes(tree_i_state_class.node_ar),
|
|
1182
|
-
"values": tree_i_state_class.value_ar,
|
|
1183
|
-
}
|
|
1184
|
-
|
|
1185
|
-
est_i.tree_ = Tree(
|
|
1186
|
-
self.n_features_in_, np.array([1], dtype=np.intp), self.n_outputs_
|
|
1187
|
-
)
|
|
1188
|
-
est_i.tree_.__setstate__(tree_i_state_dict)
|
|
1189
|
-
estimators_.append(est_i)
|
|
1190
|
-
self._cached_estimators_ = estimators_
|
|
1191
|
-
|
|
1192
|
-
def _onedal_ready(self, patching_status, X, y, sample_weight):
|
|
1193
|
-
# TODO:
|
|
1194
|
-
# move some common checks for both devices here.
|
|
1195
|
-
|
|
1196
|
-
# We have to get `n_outputs_` before dispatching
|
|
1197
|
-
# oneDAL requirements: Number of outputs `n_outputs_` should be 1.
|
|
1198
|
-
y = np.asarray(y)
|
|
1199
|
-
|
|
1200
|
-
if y.ndim == 1:
|
|
1201
|
-
# reshape is necessary to preserve the data contiguity against vs
|
|
1202
|
-
# [:, np.newaxis] that does not.
|
|
1203
|
-
y = np.reshape(y, (-1, 1))
|
|
1204
|
-
self.n_outputs_ = y.shape[1]
|
|
1205
|
-
patching_status.and_conditions(
|
|
1206
|
-
[
|
|
1207
|
-
(
|
|
1208
|
-
self.n_outputs_ == 1,
|
|
1209
|
-
f"Number of outputs ({self.n_outputs_}) is not 1.",
|
|
1210
|
-
),
|
|
1211
|
-
(not sp.issparse(X), "X is sparse. Sparse input is not supported."),
|
|
1212
|
-
]
|
|
1213
|
-
)
|
|
1214
|
-
|
|
1215
|
-
if sklearn_check_version("1.4"):
|
|
1216
|
-
try:
|
|
1217
|
-
_assert_all_finite(X)
|
|
1218
|
-
correct_finiteness = True
|
|
1219
|
-
except ValueError:
|
|
1220
|
-
correct_finiteness = False
|
|
1221
|
-
patching_status.and_conditions(
|
|
1222
|
-
[
|
|
1223
|
-
(
|
|
1224
|
-
correct_finiteness,
|
|
1225
|
-
f"Non-correct X finiteness for sklearn v1.4.",
|
|
1226
|
-
),
|
|
1227
|
-
]
|
|
1228
|
-
)
|
|
1229
|
-
return patching_status, X, y, sample_weight
|
|
1230
|
-
|
|
1231
|
-
def _onedal_cpu_supported(self, method_name, *data):
|
|
1232
|
-
class_name = self.__class__.__name__
|
|
1233
|
-
patching_status = PatchingConditionsChain(
|
|
1234
|
-
f"sklearn.ensemble.{class_name}.{method_name}"
|
|
1235
|
-
)
|
|
1236
|
-
if method_name == "fit":
|
|
1237
|
-
patching_status, X, y, sample_weight = self._onedal_ready(
|
|
1238
|
-
patching_status, *data
|
|
1239
|
-
)
|
|
1240
|
-
if self.splitter_mode == "random":
|
|
1241
|
-
warnings.warn(
|
|
1242
|
-
"'random' splitter mode supports GPU devices only "
|
|
1243
|
-
"and requires oneDAL version >= 2023.1.1. "
|
|
1244
|
-
"Using 'best' mode instead.",
|
|
1245
|
-
RuntimeWarning,
|
|
1246
|
-
)
|
|
1247
|
-
self.splitter_mode = "best"
|
|
1248
|
-
|
|
1249
|
-
patching_status.and_conditions(
|
|
1250
|
-
[
|
|
1251
|
-
(
|
|
1252
|
-
self.oob_score
|
|
1253
|
-
and daal_check_version((2023, "P", 101))
|
|
1254
|
-
or not self.oob_score,
|
|
1255
|
-
"OOB score is only supported starting from 2023.1.1 version of oneDAL.",
|
|
1256
|
-
),
|
|
1257
|
-
(
|
|
1258
|
-
self.criterion in ["mse", "squared_error"],
|
|
1259
|
-
f"'{self.criterion}' criterion is not supported. "
|
|
1260
|
-
"Only 'mse' and 'squared_error' criterions are supported.",
|
|
1261
|
-
),
|
|
1262
|
-
(not sp.issparse(X), "X is sparse. Sparse input is not supported."),
|
|
1263
|
-
(not sp.issparse(y), "y is sparse. Sparse input is not supported."),
|
|
1264
|
-
(
|
|
1265
|
-
not sp.issparse(sample_weight),
|
|
1266
|
-
"`sample_weight` is sparse. Sparse input is not supported.",
|
|
1267
|
-
),
|
|
1268
|
-
(
|
|
1269
|
-
self.ccp_alpha == 0.0,
|
|
1270
|
-
f"Non-zero 'ccp_alpha' ({self.ccp_alpha}) is not supported.",
|
|
1271
|
-
),
|
|
1272
|
-
(self.warm_start is False, "Warm start is not supported."),
|
|
1273
|
-
(
|
|
1274
|
-
self.n_estimators <= 6024,
|
|
1275
|
-
"More than 6024 estimators is not supported.",
|
|
1276
|
-
),
|
|
1277
|
-
(
|
|
1278
|
-
self.n_outputs_ == 1,
|
|
1279
|
-
f"Number of outputs ({self.n_outputs_}) is not 1.",
|
|
1280
|
-
),
|
|
1281
|
-
(
|
|
1282
|
-
getattr(self, "monotonic_cst", None) is None,
|
|
1283
|
-
"Monotonic constrains are not supported.",
|
|
1284
|
-
),
|
|
1285
|
-
]
|
|
1286
|
-
)
|
|
1287
|
-
elif method_name == "predict":
|
|
1288
|
-
X = data[0]
|
|
1289
|
-
|
|
1290
|
-
patching_status.and_conditions(
|
|
1291
|
-
[
|
|
1292
|
-
(hasattr(self, "_onedal_model"), "oneDAL model was not trained."),
|
|
1293
|
-
(not sp.issparse(X), "X is sparse. Sparse input is not supported."),
|
|
1294
|
-
(self.warm_start is False, "Warm start is not supported."),
|
|
1295
|
-
(
|
|
1296
|
-
daal_check_version((2021, "P", 400)),
|
|
1297
|
-
"RandomForestRegressor inference only supported starting from oneDAL version 2021.4",
|
|
1298
|
-
),
|
|
1299
|
-
]
|
|
1300
|
-
)
|
|
1301
|
-
if hasattr(self, "n_outputs_"):
|
|
1302
|
-
patching_status.and_conditions(
|
|
1303
|
-
[
|
|
1304
|
-
(
|
|
1305
|
-
self.n_outputs_ == 1,
|
|
1306
|
-
f"Number of outputs ({self.n_outputs_}) is not 1.",
|
|
1307
|
-
),
|
|
1308
|
-
]
|
|
1309
|
-
)
|
|
1310
|
-
else:
|
|
1311
|
-
raise RuntimeError(
|
|
1312
|
-
f"Unknown method {method_name} in {self.__class__.__name__}"
|
|
1313
|
-
)
|
|
1314
|
-
return patching_status
|
|
1315
|
-
|
|
1316
|
-
def _onedal_gpu_supported(self, method_name, *data):
|
|
1317
|
-
class_name = self.__class__.__name__
|
|
1318
|
-
patching_status = PatchingConditionsChain(
|
|
1319
|
-
f"sklearn.ensemble.{class_name}.{method_name}"
|
|
1320
|
-
)
|
|
1321
|
-
if method_name == "fit":
|
|
1322
|
-
patching_status, X, y, sample_weight = self._onedal_ready(
|
|
1323
|
-
patching_status, *data
|
|
1324
|
-
)
|
|
1325
|
-
if self.splitter_mode == "random" and not daal_check_version(
|
|
1326
|
-
(2023, "P", 101)
|
|
1327
|
-
):
|
|
1328
|
-
warnings.warn(
|
|
1329
|
-
"'random' splitter mode requires OneDAL >= 2023.1.1. "
|
|
1330
|
-
"Using 'best' mode instead.",
|
|
1331
|
-
RuntimeWarning,
|
|
1332
|
-
)
|
|
1333
|
-
self.splitter_mode = "best"
|
|
1334
|
-
|
|
1335
|
-
patching_status.and_conditions(
|
|
1336
|
-
[
|
|
1337
|
-
(
|
|
1338
|
-
self.oob_score,
|
|
1339
|
-
"OOB score is not supported.",
|
|
1340
|
-
),
|
|
1341
|
-
(
|
|
1342
|
-
self.criterion in ["mse", "squared_error"],
|
|
1343
|
-
f"'{self.criterion}' criterion is not supported. "
|
|
1344
|
-
"Only 'mse' and 'squared_error' criterions are supported.",
|
|
1345
|
-
),
|
|
1346
|
-
(not sp.issparse(X), "X is sparse. Sparse input is not supported."),
|
|
1347
|
-
(not sp.issparse(y), "y is sparse. Sparse input is not supported."),
|
|
1348
|
-
(
|
|
1349
|
-
not sp.issparse(sample_weight),
|
|
1350
|
-
"`sample_weight` is sparse. Sparse input is not supported.",
|
|
1351
|
-
),
|
|
1352
|
-
(
|
|
1353
|
-
self.ccp_alpha == 0.0,
|
|
1354
|
-
f"Non-zero 'ccp_alpha' ({self.ccp_alpha}) is not supported.",
|
|
1355
|
-
),
|
|
1356
|
-
(self.warm_start is False, "Warm start is not supported."),
|
|
1357
|
-
(
|
|
1358
|
-
self.n_estimators <= 6024,
|
|
1359
|
-
"More than 6024 estimators is not supported.",
|
|
1360
|
-
),
|
|
1361
|
-
(
|
|
1362
|
-
self.n_outputs_ == 1,
|
|
1363
|
-
f"Number of outputs ({self.n_outputs_}) is not 1.",
|
|
1364
|
-
),
|
|
1365
|
-
(
|
|
1366
|
-
getattr(self, "monotonic_cst", None) is None,
|
|
1367
|
-
"Monotonic constrains are not supported.",
|
|
1368
|
-
),
|
|
1369
|
-
]
|
|
1370
|
-
)
|
|
1371
|
-
elif method_name == "predict":
|
|
1372
|
-
X = data[0]
|
|
1373
|
-
patching_status.and_conditions(
|
|
1374
|
-
[
|
|
1375
|
-
(hasattr(self, "_onedal_model"), "oneDAL model was not trained."),
|
|
1376
|
-
(not sp.issparse(X), "X is sparse. Sparse input is not supported."),
|
|
1377
|
-
(self.warm_start is False, "Warm start is not supported."),
|
|
1378
|
-
(
|
|
1379
|
-
daal_check_version((2021, "P", 400)),
|
|
1380
|
-
"RandomForestRegressor inference only supported starting from oneDAL version 2021.4",
|
|
1381
|
-
),
|
|
1382
|
-
]
|
|
1383
|
-
)
|
|
1384
|
-
if hasattr(self, "n_outputs_"):
|
|
1385
|
-
patching_status.and_conditions(
|
|
1386
|
-
[
|
|
1387
|
-
(
|
|
1388
|
-
self.n_outputs_ == 1,
|
|
1389
|
-
f"Number of outputs ({self.n_outputs_}) is not 1.",
|
|
1390
|
-
),
|
|
1391
|
-
]
|
|
1392
|
-
)
|
|
1393
|
-
else:
|
|
1394
|
-
raise RuntimeError(
|
|
1395
|
-
f"Unknown method {method_name} in {self.__class__.__name__}"
|
|
1396
|
-
)
|
|
1397
|
-
return patching_status
|
|
1398
|
-
|
|
1399
|
-
def _onedal_fit(self, X, y, sample_weight=None, queue=None):
|
|
1400
|
-
if sp.issparse(y):
|
|
1401
|
-
raise ValueError("sparse multilabel-indicator for y is not supported.")
|
|
1402
|
-
if sklearn_check_version("1.2"):
|
|
1403
|
-
self._validate_params()
|
|
1404
|
-
else:
|
|
1405
|
-
self._check_parameters()
|
|
1406
|
-
if sample_weight is not None:
|
|
1407
|
-
sample_weight = self.check_sample_weight(sample_weight, X)
|
|
1408
|
-
if sklearn_check_version("1.0"):
|
|
1409
|
-
self._check_feature_names(X, reset=True)
|
|
1410
|
-
X = check_array(
|
|
1411
|
-
X,
|
|
1412
|
-
dtype=[np.float64, np.float32],
|
|
1413
|
-
force_all_finite=not sklearn_check_version("1.4"),
|
|
1414
|
-
)
|
|
1415
|
-
y = np.atleast_1d(np.asarray(y))
|
|
1416
|
-
y = check_array(y, ensure_2d=False, dtype=X.dtype)
|
|
1417
|
-
check_consistent_length(X, y)
|
|
1418
|
-
self.n_features_in_ = X.shape[1]
|
|
1419
|
-
if not sklearn_check_version("1.0"):
|
|
1420
|
-
self.n_features_ = self.n_features_in_
|
|
1421
|
-
|
|
1422
|
-
if self.oob_score:
|
|
1423
|
-
err = "out_of_bag_error_r2|out_of_bag_error_prediction"
|
|
1424
|
-
else:
|
|
1425
|
-
err = "none"
|
|
1426
|
-
|
|
1427
|
-
onedal_params = {
|
|
1428
|
-
"n_estimators": self.n_estimators,
|
|
1429
|
-
"criterion": self.criterion,
|
|
1430
|
-
"max_depth": self.max_depth,
|
|
1431
|
-
"min_samples_split": self.min_samples_split,
|
|
1432
|
-
"min_samples_leaf": self.min_samples_leaf,
|
|
1433
|
-
"min_weight_fraction_leaf": self.min_weight_fraction_leaf,
|
|
1434
|
-
"max_features": self.max_features,
|
|
1435
|
-
"max_leaf_nodes": self.max_leaf_nodes,
|
|
1436
|
-
"min_impurity_decrease": self.min_impurity_decrease,
|
|
1437
|
-
"bootstrap": self.bootstrap,
|
|
1438
|
-
"oob_score": self.oob_score,
|
|
1439
|
-
"n_jobs": self.n_jobs,
|
|
1440
|
-
"random_state": self.random_state,
|
|
1441
|
-
"verbose": self.verbose,
|
|
1442
|
-
"warm_start": self.warm_start,
|
|
1443
|
-
"error_metric_mode": err,
|
|
1444
|
-
"variable_importance_mode": "mdi",
|
|
1445
|
-
"max_samples": self.max_samples,
|
|
1446
|
-
}
|
|
1447
|
-
if daal_check_version((2023, "P", 101)):
|
|
1448
|
-
onedal_params["splitter_mode"] = self.splitter_mode
|
|
1449
|
-
|
|
1450
|
-
# Lazy evaluation of estimators_
|
|
1451
|
-
self._cached_estimators_ = None
|
|
1452
|
-
|
|
1453
|
-
# Compute
|
|
1454
|
-
self._onedal_estimator = self._onedal_regressor(**onedal_params)
|
|
1455
|
-
self._onedal_estimator.fit(X, y, sample_weight, queue=queue)
|
|
1456
|
-
|
|
1457
|
-
self._save_attributes()
|
|
1458
|
-
if sklearn_check_version("1.2"):
|
|
1459
|
-
self._estimator = DecisionTreeRegressor()
|
|
1460
|
-
|
|
1461
|
-
return self
|
|
1462
|
-
|
|
1463
|
-
def _onedal_predict(self, X, queue=None):
|
|
1464
|
-
X = check_array(X, dtype=[np.float32, np.float64])
|
|
1465
|
-
check_is_fitted(self, "_onedal_model")
|
|
1466
|
-
|
|
1467
|
-
if sklearn_check_version("1.0"):
|
|
1468
|
-
self._check_feature_names(X, reset=False)
|
|
1469
|
-
|
|
1470
|
-
return self._onedal_estimator.predict(X, queue=queue)
|
|
1471
|
-
|
|
1472
|
-
def fit(self, X, y, sample_weight=None):
|
|
1473
|
-
"""
|
|
1474
|
-
Build a forest of trees from the training set (X, y).
|
|
1475
|
-
|
|
1476
|
-
Parameters
|
|
1477
|
-
----------
|
|
1478
|
-
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
1479
|
-
The training input samples. Internally, its dtype will be converted
|
|
1480
|
-
to ``dtype=np.float32``. If a sparse matrix is provided, it will be
|
|
1481
|
-
converted into a sparse ``csc_matrix``.
|
|
1482
|
-
|
|
1483
|
-
y : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
1484
|
-
The target values (class labels in classification, real numbers in
|
|
1485
|
-
regression).
|
|
1486
|
-
|
|
1487
|
-
sample_weight : array-like of shape (n_samples,), default=None
|
|
1488
|
-
Sample weights. If None, then samples are equally weighted. Splits
|
|
1489
|
-
that would create child nodes with net zero or negative weight are
|
|
1490
|
-
ignored while searching for a split in each node. In the case of
|
|
1491
|
-
classification, splits are also ignored if they would result in any
|
|
1492
|
-
single class carrying a negative weight in either child node.
|
|
1493
|
-
|
|
1494
|
-
Returns
|
|
1495
|
-
-------
|
|
1496
|
-
self : object
|
|
1497
|
-
"""
|
|
1498
|
-
if not self.bootstrap and self.max_samples is not None:
|
|
1499
|
-
raise ValueError(
|
|
1500
|
-
"`max_sample` cannot be set if `bootstrap=False`. "
|
|
1501
|
-
"Either switch to `bootstrap=True` or set "
|
|
1502
|
-
"`max_sample=None`."
|
|
1503
|
-
)
|
|
1504
|
-
dispatch(
|
|
1505
|
-
self,
|
|
1506
|
-
"fit",
|
|
1507
|
-
{
|
|
1508
|
-
"onedal": self.__class__._onedal_fit,
|
|
1509
|
-
"sklearn": sklearn_RandomForestRegressor.fit,
|
|
1510
|
-
},
|
|
1511
|
-
X,
|
|
1512
|
-
y,
|
|
1513
|
-
sample_weight,
|
|
1514
|
-
)
|
|
1515
|
-
return self
|
|
1516
|
-
|
|
1517
|
-
@wrap_output_data
|
|
1518
|
-
def predict(self, X):
|
|
1519
|
-
"""
|
|
1520
|
-
Predict class for X.
|
|
1521
|
-
|
|
1522
|
-
The predicted class of an input sample is a vote by the trees in
|
|
1523
|
-
the forest, weighted by their probability estimates. That is,
|
|
1524
|
-
the predicted class is the one with highest mean probability
|
|
1525
|
-
estimate across the trees.
|
|
1526
|
-
|
|
1527
|
-
Parameters
|
|
1528
|
-
----------
|
|
1529
|
-
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
1530
|
-
The input samples. Internally, its dtype will be converted to
|
|
1531
|
-
``dtype=np.float32``. If a sparse matrix is provided, it will be
|
|
1532
|
-
converted into a sparse ``csr_matrix``.
|
|
1533
|
-
|
|
1534
|
-
Returns
|
|
1535
|
-
-------
|
|
1536
|
-
y : ndarray of shape (n_samples,) or (n_samples, n_outputs)
|
|
1537
|
-
The predicted classes.
|
|
1538
|
-
"""
|
|
1539
|
-
return dispatch(
|
|
1540
|
-
self,
|
|
1541
|
-
"predict",
|
|
1542
|
-
{
|
|
1543
|
-
"onedal": self.__class__._onedal_predict,
|
|
1544
|
-
"sklearn": sklearn_RandomForestRegressor.predict,
|
|
1545
|
-
},
|
|
1546
|
-
X,
|
|
1547
|
-
)
|
|
1548
|
-
|
|
1549
|
-
if sklearn_check_version("1.0"):
|
|
1550
|
-
|
|
1551
|
-
@deprecated(
|
|
1552
|
-
"Attribute `n_features_` was deprecated in version 1.0 and will be "
|
|
1553
|
-
"removed in 1.2. Use `n_features_in_` instead."
|
|
1554
|
-
)
|
|
1555
|
-
@property
|
|
1556
|
-
def n_features_(self):
|
|
1557
|
-
return self.n_features_in_
|