scikit-learn-intelex 2023.2.1__py311-none-win_amd64.whl → 2024.0.1__py311-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/__init__.py +2 -2
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/__main__.py +16 -12
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/_config.py +2 -2
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/_device_offload.py +90 -56
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/_utils.py +95 -0
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/basic_statistics/__init__.py +3 -3
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/basic_statistics/basic_statistics.py +2 -2
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/cluster/__init__.py +4 -4
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/cluster/dbscan.py +187 -0
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/cluster/k_means.py +2 -2
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/cluster/tests/test_dbscan.py +12 -6
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/cluster/tests/test_kmeans.py +5 -5
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/decomposition/__init__.py +3 -3
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/decomposition/pca.py +2 -2
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/decomposition/tests/test_pca.py +5 -4
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/dispatcher.py +102 -72
- {scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview → scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex}/ensemble/__init__.py +12 -4
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/ensemble/_forest.py +1947 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/ensemble/tests/test_forest.py +118 -0
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/glob/__main__.py +31 -16
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/glob/dispatcher.py +21 -14
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/__init__.py +10 -10
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/coordinate_descent.py +2 -2
- {scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview → scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex}/linear_model/linear.py +173 -83
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/logistic_path.py +3 -3
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/ridge.py +2 -2
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/tests/test_linear.py +23 -7
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/linear_model/tests/test_logreg.py +4 -3
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/manifold/__init__.py +3 -3
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/manifold/t_sne.py +2 -2
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/manifold/tests/test_tsne.py +4 -3
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/metrics/__init__.py +5 -5
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/metrics/pairwise.py +2 -2
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/metrics/ranking.py +2 -2
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/metrics/tests/test_metrics.py +8 -6
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/model_selection/__init__.py +3 -3
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/model_selection/split.py +2 -2
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/model_selection/tests/test_model_selection.py +6 -3
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/__init__.py +9 -5
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/common.py +100 -77
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/knn_classification.py +331 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/knn_regression.py +307 -0
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/knn_unsupervised.py +116 -58
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/neighbors/lof.py +118 -56
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/neighbors/tests/test_neighbors.py +85 -0
- {scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/decomposition → scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview}/__init__.py +18 -20
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/preview/cluster/__init__.py +3 -3
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/preview/cluster/_common.py +7 -7
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/preview/cluster/k_means.py +104 -73
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/linear_model/linear.py → scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/decomposition/__init__.py +4 -1
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/preview/decomposition/pca.py +128 -100
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/linear_model/tests/test_preview_linear.py → scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/preview/decomposition/tests/test_preview_pca.py +18 -16
- {scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/linear_model → scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd}/__init__.py +24 -22
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/basic_statistics/__init__.py +3 -3
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/basic_statistics/basic_statistics.py +2 -2
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/cluster/__init__.py +11 -5
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/spmd/cluster/dbscan.py +50 -0
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/cluster/kmeans.py +2 -2
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/decomposition/__init__.py +3 -3
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/decomposition/pca.py +2 -2
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/ensemble/__init__.py +3 -3
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/ensemble/forest.py +16 -14
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/__init__.py +3 -3
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/linear_model/linear_model.py +2 -2
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/neighbors/__init__.py +3 -3
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/spmd/neighbors/neighbors.py +3 -3
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/__init__.py +11 -8
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/_common.py +56 -56
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/nusvc.py +110 -55
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/nusvr.py +65 -31
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/svc.py +136 -78
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/svm/svr.py +65 -31
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/svm/tests/test_svm.py +102 -0
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/_models_info.py +170 -0
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_config.py +9 -8
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_memory_usage.py +63 -69
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_monkeypatch.py +55 -53
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/test_parallel.py +50 -0
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/test_patching.py +8 -7
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/tests/test_run_to_run_stability_tests.py +428 -0
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/tests/utils/_launch_algorithms.py +39 -39
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/utils/__init__.py +3 -3
- scikit_learn_intelex-2024.0.1.data/data/Lib/site-packages/sklearnex/utils/parallel.py +59 -0
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/utils/validation.py +2 -2
- {scikit_learn_intelex-2023.2.1.dist-info → scikit_learn_intelex-2024.0.1.dist-info}/METADATA +34 -35
- scikit_learn_intelex-2024.0.1.dist-info/RECORD +90 -0
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/_utils.py +0 -82
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/cluster/dbscan.py +0 -18
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/ensemble/__init__.py +0 -20
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/ensemble/forest.py +0 -18
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/ensemble/tests/test_forest.py +0 -46
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/neighbors/knn_classification.py +0 -228
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/neighbors/knn_regression.py +0 -213
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/neighbors/tests/test_neighbors.py +0 -57
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/__init__.py +0 -18
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/decomposition/tests/test_preview_pca.py +0 -28
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/ensemble/extra_trees.py +0 -1261
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/ensemble/forest.py +0 -1155
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/ensemble/tests/test_preview_ensemble.py +0 -67
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/linear_model/_common.py +0 -66
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/spmd/__init__.py +0 -23
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/svm/tests/test_svm.py +0 -63
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/tests/_models_info.py +0 -159
- scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/tests/test_run_to_run_stability_tests.py +0 -383
- scikit_learn_intelex-2023.2.1.dist-info/RECORD +0 -95
- {scikit_learn_intelex-2023.2.1.data → scikit_learn_intelex-2024.0.1.data}/data/Lib/site-packages/sklearnex/doc/third-party-programs.txt +0 -0
- {scikit_learn_intelex-2023.2.1.dist-info → scikit_learn_intelex-2024.0.1.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2023.2.1.dist-info → scikit_learn_intelex-2024.0.1.dist-info}/WHEEL +0 -0
- {scikit_learn_intelex-2023.2.1.dist-info → scikit_learn_intelex-2024.0.1.dist-info}/top_level.txt +0 -0
scikit_learn_intelex-2023.2.1.data/data/Lib/site-packages/sklearnex/preview/ensemble/forest.py
DELETED
|
@@ -1,1155 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# ===============================================================================
|
|
3
|
-
# Copyright 2021 Intel Corporation
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
16
|
-
# ===============================================================================
|
|
17
|
-
|
|
18
|
-
from daal4py.sklearn._utils import (
|
|
19
|
-
daal_check_version, sklearn_check_version,
|
|
20
|
-
make2d, check_tree_nodes
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
import numpy as np
|
|
24
|
-
|
|
25
|
-
import numbers
|
|
26
|
-
|
|
27
|
-
import warnings
|
|
28
|
-
|
|
29
|
-
from abc import ABC
|
|
30
|
-
|
|
31
|
-
from sklearn.exceptions import DataConversionWarning
|
|
32
|
-
|
|
33
|
-
from ..._config import get_config
|
|
34
|
-
from ..._device_offload import dispatch, wrap_output_data
|
|
35
|
-
|
|
36
|
-
from sklearn.ensemble import RandomForestClassifier as sklearn_RandomForestClassifier
|
|
37
|
-
from sklearn.ensemble import RandomForestRegressor as sklearn_RandomForestRegressor
|
|
38
|
-
|
|
39
|
-
from sklearn.utils.validation import (
|
|
40
|
-
check_is_fitted,
|
|
41
|
-
check_consistent_length,
|
|
42
|
-
check_array,
|
|
43
|
-
check_X_y)
|
|
44
|
-
|
|
45
|
-
from onedal.datatypes import _num_features, _num_samples
|
|
46
|
-
|
|
47
|
-
from sklearn.utils import check_random_state, deprecated
|
|
48
|
-
|
|
49
|
-
from sklearn.base import clone
|
|
50
|
-
|
|
51
|
-
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
|
|
52
|
-
from sklearn.tree._tree import Tree
|
|
53
|
-
|
|
54
|
-
from onedal.ensemble import RandomForestClassifier as onedal_RandomForestClassifier
|
|
55
|
-
from onedal.ensemble import RandomForestRegressor as onedal_RandomForestRegressor
|
|
56
|
-
from onedal.primitives import get_tree_state_cls, get_tree_state_reg
|
|
57
|
-
|
|
58
|
-
from scipy import sparse as sp
|
|
59
|
-
|
|
60
|
-
if sklearn_check_version('1.2'):
|
|
61
|
-
from sklearn.utils._param_validation import Interval, StrOptions
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
class BaseRandomForest(ABC):
|
|
65
|
-
def _fit_proba(self, X, y, sample_weight=None, queue=None):
|
|
66
|
-
params = self.get_params()
|
|
67
|
-
self.__class__(**params)
|
|
68
|
-
|
|
69
|
-
# We use stock metaestimators below, so the only way
|
|
70
|
-
# to pass a queue is using config_context.
|
|
71
|
-
cfg = get_config()
|
|
72
|
-
cfg['target_offload'] = queue
|
|
73
|
-
|
|
74
|
-
def _save_attributes(self):
|
|
75
|
-
self._onedal_model = self._onedal_estimator._onedal_model
|
|
76
|
-
# TODO:
|
|
77
|
-
# update for regression
|
|
78
|
-
if self.oob_score:
|
|
79
|
-
self.oob_score_ = self._onedal_estimator.oob_score_
|
|
80
|
-
self.oob_prediction_ = self._onedal_estimator.oob_prediction_
|
|
81
|
-
return self
|
|
82
|
-
|
|
83
|
-
def _onedal_classifier(self, **onedal_params):
|
|
84
|
-
return onedal_RandomForestClassifier(**onedal_params)
|
|
85
|
-
|
|
86
|
-
def _onedal_regressor(self, **onedal_params):
|
|
87
|
-
return onedal_RandomForestRegressor(**onedal_params)
|
|
88
|
-
|
|
89
|
-
# TODO:
|
|
90
|
-
# move to onedal modul.
|
|
91
|
-
def _check_parameters(self):
|
|
92
|
-
if not self.bootstrap and self.max_samples is not None:
|
|
93
|
-
raise ValueError(
|
|
94
|
-
"`max_sample` cannot be set if `bootstrap=False`. "
|
|
95
|
-
"Either switch to `bootstrap=True` or set "
|
|
96
|
-
"`max_sample=None`."
|
|
97
|
-
)
|
|
98
|
-
if isinstance(self.min_samples_leaf, numbers.Integral):
|
|
99
|
-
if not 1 <= self.min_samples_leaf:
|
|
100
|
-
raise ValueError("min_samples_leaf must be at least 1 "
|
|
101
|
-
"or in (0, 0.5], got %s"
|
|
102
|
-
% self.min_samples_leaf)
|
|
103
|
-
else: # float
|
|
104
|
-
if not 0. < self.min_samples_leaf <= 0.5:
|
|
105
|
-
raise ValueError("min_samples_leaf must be at least 1 "
|
|
106
|
-
"or in (0, 0.5], got %s"
|
|
107
|
-
% self.min_samples_leaf)
|
|
108
|
-
if isinstance(self.min_samples_split, numbers.Integral):
|
|
109
|
-
if not 2 <= self.min_samples_split:
|
|
110
|
-
raise ValueError("min_samples_split must be an integer "
|
|
111
|
-
"greater than 1 or a float in (0.0, 1.0]; "
|
|
112
|
-
"got the integer %s"
|
|
113
|
-
% self.min_samples_split)
|
|
114
|
-
else: # float
|
|
115
|
-
if not 0. < self.min_samples_split <= 1.:
|
|
116
|
-
raise ValueError("min_samples_split must be an integer "
|
|
117
|
-
"greater than 1 or a float in (0.0, 1.0]; "
|
|
118
|
-
"got the float %s"
|
|
119
|
-
% self.min_samples_split)
|
|
120
|
-
if not 0 <= self.min_weight_fraction_leaf <= 0.5:
|
|
121
|
-
raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
|
|
122
|
-
if self.min_impurity_split is not None:
|
|
123
|
-
warnings.warn("The min_impurity_split parameter is deprecated. "
|
|
124
|
-
"Its default value has changed from 1e-7 to 0 in "
|
|
125
|
-
"version 0.23, and it will be removed in 0.25. "
|
|
126
|
-
"Use the min_impurity_decrease parameter instead.",
|
|
127
|
-
FutureWarning)
|
|
128
|
-
|
|
129
|
-
if self.min_impurity_split < 0.:
|
|
130
|
-
raise ValueError("min_impurity_split must be greater than "
|
|
131
|
-
"or equal to 0")
|
|
132
|
-
if self.min_impurity_decrease < 0.:
|
|
133
|
-
raise ValueError("min_impurity_decrease must be greater than "
|
|
134
|
-
"or equal to 0")
|
|
135
|
-
if self.max_leaf_nodes is not None:
|
|
136
|
-
if not isinstance(self.max_leaf_nodes, numbers.Integral):
|
|
137
|
-
raise ValueError(
|
|
138
|
-
"max_leaf_nodes must be integral number but was "
|
|
139
|
-
"%r" %
|
|
140
|
-
self.max_leaf_nodes)
|
|
141
|
-
if self.max_leaf_nodes < 2:
|
|
142
|
-
raise ValueError(
|
|
143
|
-
("max_leaf_nodes {0} must be either None "
|
|
144
|
-
"or larger than 1").format(
|
|
145
|
-
self.max_leaf_nodes))
|
|
146
|
-
if isinstance(self.max_bins, numbers.Integral):
|
|
147
|
-
if not 2 <= self.max_bins:
|
|
148
|
-
raise ValueError("max_bins must be at least 2, got %s"
|
|
149
|
-
% self.max_bins)
|
|
150
|
-
else:
|
|
151
|
-
raise ValueError("max_bins must be integral number but was "
|
|
152
|
-
"%r" % self.max_bins)
|
|
153
|
-
if isinstance(self.min_bin_size, numbers.Integral):
|
|
154
|
-
if not 1 <= self.min_bin_size:
|
|
155
|
-
raise ValueError("min_bin_size must be at least 1, got %s"
|
|
156
|
-
% self.min_bin_size)
|
|
157
|
-
else:
|
|
158
|
-
raise ValueError("min_bin_size must be integral number but was "
|
|
159
|
-
"%r" % self.min_bin_size)
|
|
160
|
-
|
|
161
|
-
def check_sample_weight(self, sample_weight, X, dtype=None):
|
|
162
|
-
n_samples = _num_samples(X)
|
|
163
|
-
|
|
164
|
-
if dtype is not None and dtype not in [np.float32, np.float64]:
|
|
165
|
-
dtype = np.float64
|
|
166
|
-
|
|
167
|
-
if sample_weight is None:
|
|
168
|
-
sample_weight = np.ones(n_samples, dtype=dtype)
|
|
169
|
-
elif isinstance(sample_weight, numbers.Number):
|
|
170
|
-
sample_weight = np.full(n_samples, sample_weight, dtype=dtype)
|
|
171
|
-
else:
|
|
172
|
-
if dtype is None:
|
|
173
|
-
dtype = [np.float64, np.float32]
|
|
174
|
-
sample_weight = check_array(
|
|
175
|
-
sample_weight,
|
|
176
|
-
accept_sparse=False,
|
|
177
|
-
ensure_2d=False,
|
|
178
|
-
dtype=dtype,
|
|
179
|
-
order="C")
|
|
180
|
-
if sample_weight.ndim != 1:
|
|
181
|
-
raise ValueError("Sample weights must be 1D array or scalar")
|
|
182
|
-
|
|
183
|
-
if sample_weight.shape != (n_samples,):
|
|
184
|
-
raise ValueError("sample_weight.shape == {}, expected {}!"
|
|
185
|
-
.format(sample_weight.shape, (n_samples,)))
|
|
186
|
-
return sample_weight
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
class RandomForestClassifier(sklearn_RandomForestClassifier, BaseRandomForest):
|
|
190
|
-
__doc__ = sklearn_RandomForestClassifier.__doc__
|
|
191
|
-
|
|
192
|
-
if sklearn_check_version('1.2'):
|
|
193
|
-
_parameter_constraints: dict = {
|
|
194
|
-
**sklearn_RandomForestClassifier._parameter_constraints,
|
|
195
|
-
"max_bins": [Interval(numbers.Integral, 2, None, closed="left")],
|
|
196
|
-
"min_bin_size": [Interval(numbers.Integral, 1, None, closed="left")],
|
|
197
|
-
"splitter_mode": [StrOptions({"best", "random"})]
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
if sklearn_check_version('1.0'):
|
|
201
|
-
def __init__(
|
|
202
|
-
self,
|
|
203
|
-
n_estimators=100,
|
|
204
|
-
criterion="gini",
|
|
205
|
-
max_depth=None,
|
|
206
|
-
min_samples_split=2,
|
|
207
|
-
min_samples_leaf=1,
|
|
208
|
-
min_weight_fraction_leaf=0.,
|
|
209
|
-
max_features='sqrt' if sklearn_check_version('1.1') else 'auto',
|
|
210
|
-
max_leaf_nodes=None,
|
|
211
|
-
min_impurity_decrease=0.,
|
|
212
|
-
bootstrap=True,
|
|
213
|
-
oob_score=False,
|
|
214
|
-
n_jobs=None,
|
|
215
|
-
random_state=None,
|
|
216
|
-
verbose=0,
|
|
217
|
-
warm_start=False,
|
|
218
|
-
class_weight=None,
|
|
219
|
-
ccp_alpha=0.0,
|
|
220
|
-
max_samples=None,
|
|
221
|
-
max_bins=256,
|
|
222
|
-
min_bin_size=1,
|
|
223
|
-
splitter_mode='best'):
|
|
224
|
-
super(RandomForestClassifier, self).__init__(
|
|
225
|
-
n_estimators=n_estimators,
|
|
226
|
-
criterion=criterion,
|
|
227
|
-
max_depth=max_depth,
|
|
228
|
-
min_samples_split=min_samples_split,
|
|
229
|
-
min_samples_leaf=min_samples_leaf,
|
|
230
|
-
min_weight_fraction_leaf=min_weight_fraction_leaf,
|
|
231
|
-
max_features=max_features,
|
|
232
|
-
max_leaf_nodes=max_leaf_nodes,
|
|
233
|
-
min_impurity_decrease=min_impurity_decrease,
|
|
234
|
-
bootstrap=bootstrap,
|
|
235
|
-
oob_score=oob_score,
|
|
236
|
-
n_jobs=n_jobs,
|
|
237
|
-
random_state=random_state,
|
|
238
|
-
verbose=verbose,
|
|
239
|
-
warm_start=warm_start,
|
|
240
|
-
class_weight=class_weight
|
|
241
|
-
)
|
|
242
|
-
self.warm_start = warm_start
|
|
243
|
-
self.ccp_alpha = ccp_alpha
|
|
244
|
-
self.max_samples = max_samples
|
|
245
|
-
self.max_bins = max_bins
|
|
246
|
-
self.min_bin_size = min_bin_size
|
|
247
|
-
self.min_impurity_split = None
|
|
248
|
-
self.splitter_mode = splitter_mode
|
|
249
|
-
# self._estimator = DecisionTreeClassifier()
|
|
250
|
-
else:
|
|
251
|
-
def __init__(self,
|
|
252
|
-
n_estimators=100,
|
|
253
|
-
criterion="gini",
|
|
254
|
-
max_depth=None,
|
|
255
|
-
min_samples_split=2,
|
|
256
|
-
min_samples_leaf=1,
|
|
257
|
-
min_weight_fraction_leaf=0.,
|
|
258
|
-
max_features="auto",
|
|
259
|
-
max_leaf_nodes=None,
|
|
260
|
-
min_impurity_decrease=0.,
|
|
261
|
-
min_impurity_split=None,
|
|
262
|
-
bootstrap=True,
|
|
263
|
-
oob_score=False,
|
|
264
|
-
n_jobs=None,
|
|
265
|
-
random_state=None,
|
|
266
|
-
verbose=0,
|
|
267
|
-
warm_start=False,
|
|
268
|
-
class_weight=None,
|
|
269
|
-
ccp_alpha=0.0,
|
|
270
|
-
max_samples=None,
|
|
271
|
-
max_bins=256,
|
|
272
|
-
min_bin_size=1,
|
|
273
|
-
splitter_mode='best'):
|
|
274
|
-
super(RandomForestClassifier, self).__init__(
|
|
275
|
-
n_estimators=n_estimators,
|
|
276
|
-
criterion=criterion,
|
|
277
|
-
max_depth=max_depth,
|
|
278
|
-
min_samples_split=min_samples_split,
|
|
279
|
-
min_samples_leaf=min_samples_leaf,
|
|
280
|
-
min_weight_fraction_leaf=min_weight_fraction_leaf,
|
|
281
|
-
max_features=max_features,
|
|
282
|
-
max_leaf_nodes=max_leaf_nodes,
|
|
283
|
-
min_impurity_decrease=min_impurity_decrease,
|
|
284
|
-
min_impurity_split=min_impurity_split,
|
|
285
|
-
bootstrap=bootstrap,
|
|
286
|
-
oob_score=oob_score,
|
|
287
|
-
n_jobs=n_jobs,
|
|
288
|
-
random_state=random_state,
|
|
289
|
-
verbose=verbose,
|
|
290
|
-
warm_start=warm_start,
|
|
291
|
-
class_weight=class_weight,
|
|
292
|
-
ccp_alpha=ccp_alpha,
|
|
293
|
-
max_samples=max_samples
|
|
294
|
-
)
|
|
295
|
-
self.warm_start = warm_start
|
|
296
|
-
self.ccp_alpha = ccp_alpha
|
|
297
|
-
self.max_samples = max_samples
|
|
298
|
-
self.max_bins = max_bins
|
|
299
|
-
self.min_bin_size = min_bin_size
|
|
300
|
-
self.min_impurity_split = None
|
|
301
|
-
self.splitter_mode = splitter_mode
|
|
302
|
-
# self._estimator = DecisionTreeClassifier()
|
|
303
|
-
|
|
304
|
-
def fit(self, X, y, sample_weight=None):
|
|
305
|
-
"""
|
|
306
|
-
Build a forest of trees from the training set (X, y).
|
|
307
|
-
|
|
308
|
-
Parameters
|
|
309
|
-
----------
|
|
310
|
-
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
311
|
-
The training input samples. Internally, its dtype will be converted
|
|
312
|
-
to ``dtype=np.float32``. If a sparse matrix is provided, it will be
|
|
313
|
-
converted into a sparse ``csc_matrix``.
|
|
314
|
-
|
|
315
|
-
y : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
316
|
-
The target values (class labels in classification, real numbers in
|
|
317
|
-
regression).
|
|
318
|
-
|
|
319
|
-
sample_weight : array-like of shape (n_samples,), default=None
|
|
320
|
-
Sample weights. If None, then samples are equally weighted. Splits
|
|
321
|
-
that would create child nodes with net zero or negative weight are
|
|
322
|
-
ignored while searching for a split in each node. In the case of
|
|
323
|
-
classification, splits are also ignored if they would result in any
|
|
324
|
-
single class carrying a negative weight in either child node.
|
|
325
|
-
|
|
326
|
-
Returns
|
|
327
|
-
-------
|
|
328
|
-
self : object
|
|
329
|
-
"""
|
|
330
|
-
dispatch(self, 'fit', {
|
|
331
|
-
'onedal': self.__class__._onedal_fit,
|
|
332
|
-
'sklearn': sklearn_RandomForestClassifier.fit,
|
|
333
|
-
}, X, y, sample_weight)
|
|
334
|
-
return self
|
|
335
|
-
|
|
336
|
-
def _onedal_ready(self, X, y, sample_weight):
|
|
337
|
-
if sp.issparse(y):
|
|
338
|
-
raise ValueError(
|
|
339
|
-
"sparse multilabel-indicator for y is not supported."
|
|
340
|
-
)
|
|
341
|
-
if not self.bootstrap and self.max_samples is not None:
|
|
342
|
-
raise ValueError(
|
|
343
|
-
"`max_sample` cannot be set if `bootstrap=False`. "
|
|
344
|
-
"Either switch to `bootstrap=True` or set "
|
|
345
|
-
"`max_sample=None`."
|
|
346
|
-
)
|
|
347
|
-
if not self.bootstrap and self.oob_score:
|
|
348
|
-
raise ValueError("Out of bag estimation only available"
|
|
349
|
-
" if bootstrap=True")
|
|
350
|
-
if sklearn_check_version("1.2"):
|
|
351
|
-
self._validate_params()
|
|
352
|
-
else:
|
|
353
|
-
self._check_parameters()
|
|
354
|
-
|
|
355
|
-
correct_sparsity = not sp.issparse(X)
|
|
356
|
-
correct_ccp_alpha = self.ccp_alpha == 0.0
|
|
357
|
-
correct_criterion = self.criterion == "gini"
|
|
358
|
-
correct_warm_start = self.warm_start is False
|
|
359
|
-
|
|
360
|
-
if daal_check_version((2021, 'P', 500)):
|
|
361
|
-
correct_oob_score = not self.oob_score
|
|
362
|
-
else:
|
|
363
|
-
correct_oob_score = self.oob_score
|
|
364
|
-
|
|
365
|
-
ready = all([correct_oob_score,
|
|
366
|
-
correct_sparsity,
|
|
367
|
-
correct_ccp_alpha,
|
|
368
|
-
correct_criterion,
|
|
369
|
-
correct_warm_start])
|
|
370
|
-
if ready:
|
|
371
|
-
if sklearn_check_version("1.0"):
|
|
372
|
-
self._check_feature_names(X, reset=True)
|
|
373
|
-
X = check_array(X, dtype=[np.float32, np.float64])
|
|
374
|
-
y = np.asarray(y)
|
|
375
|
-
y = np.atleast_1d(y)
|
|
376
|
-
if y.ndim == 2 and y.shape[1] == 1:
|
|
377
|
-
warnings.warn(
|
|
378
|
-
"A column-vector y was passed when a 1d array was"
|
|
379
|
-
" expected. Please change the shape of y to "
|
|
380
|
-
"(n_samples,), for example using ravel().",
|
|
381
|
-
DataConversionWarning,
|
|
382
|
-
stacklevel=2)
|
|
383
|
-
check_consistent_length(X, y)
|
|
384
|
-
|
|
385
|
-
y = make2d(y)
|
|
386
|
-
self.n_outputs_ = y.shape[1]
|
|
387
|
-
ready = ready and self.n_outputs_ == 1
|
|
388
|
-
# TODO: Fix to support integers as input
|
|
389
|
-
ready = ready and (y.dtype in [np.float32, np.float64, np.int32, np.int64])
|
|
390
|
-
|
|
391
|
-
return ready, X, y, sample_weight
|
|
392
|
-
|
|
393
|
-
@wrap_output_data
|
|
394
|
-
def predict(self, X):
|
|
395
|
-
"""
|
|
396
|
-
Predict class for X.
|
|
397
|
-
|
|
398
|
-
The predicted class of an input sample is a vote by the trees in
|
|
399
|
-
the forest, weighted by their probability estimates. That is,
|
|
400
|
-
the predicted class is the one with highest mean probability
|
|
401
|
-
estimate across the trees.
|
|
402
|
-
|
|
403
|
-
Parameters
|
|
404
|
-
----------
|
|
405
|
-
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
406
|
-
The input samples. Internally, its dtype will be converted to
|
|
407
|
-
``dtype=np.float32``. If a sparse matrix is provided, it will be
|
|
408
|
-
converted into a sparse ``csr_matrix``.
|
|
409
|
-
|
|
410
|
-
Returns
|
|
411
|
-
-------
|
|
412
|
-
y : ndarray of shape (n_samples,) or (n_samples, n_outputs)
|
|
413
|
-
The predicted classes.
|
|
414
|
-
"""
|
|
415
|
-
return dispatch(self, 'predict', {
|
|
416
|
-
'onedal': self.__class__._onedal_predict,
|
|
417
|
-
'sklearn': sklearn_RandomForestClassifier.predict,
|
|
418
|
-
}, X)
|
|
419
|
-
|
|
420
|
-
@wrap_output_data
|
|
421
|
-
def predict_proba(self, X):
|
|
422
|
-
"""
|
|
423
|
-
Predict class probabilities for X.
|
|
424
|
-
|
|
425
|
-
The predicted class probabilities of an input sample are computed as
|
|
426
|
-
the mean predicted class probabilities of the trees in the forest.
|
|
427
|
-
The class probability of a single tree is the fraction of samples of
|
|
428
|
-
the same class in a leaf.
|
|
429
|
-
|
|
430
|
-
Parameters
|
|
431
|
-
----------
|
|
432
|
-
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
433
|
-
The input samples. Internally, its dtype will be converted to
|
|
434
|
-
``dtype=np.float32``. If a sparse matrix is provided, it will be
|
|
435
|
-
converted into a sparse ``csr_matrix``.
|
|
436
|
-
|
|
437
|
-
Returns
|
|
438
|
-
-------
|
|
439
|
-
p : ndarray of shape (n_samples, n_classes), or a list of n_outputs
|
|
440
|
-
such arrays if n_outputs > 1.
|
|
441
|
-
The class probabilities of the input samples. The order of the
|
|
442
|
-
classes corresponds to that in the attribute :term:`classes_`.
|
|
443
|
-
"""
|
|
444
|
-
# TODO:
|
|
445
|
-
# _check_proba()
|
|
446
|
-
# self._check_proba()
|
|
447
|
-
if sklearn_check_version("1.0"):
|
|
448
|
-
self._check_feature_names(X, reset=False)
|
|
449
|
-
if hasattr(self, 'n_features_in_'):
|
|
450
|
-
try:
|
|
451
|
-
num_features = _num_features(X)
|
|
452
|
-
except TypeError:
|
|
453
|
-
num_features = _num_samples(X)
|
|
454
|
-
if num_features != self.n_features_in_:
|
|
455
|
-
raise ValueError(
|
|
456
|
-
(f'X has {num_features} features, '
|
|
457
|
-
f'but RandomForestClassifier is expecting '
|
|
458
|
-
f'{self.n_features_in_} features as input'))
|
|
459
|
-
return dispatch(self, 'predict_proba', {
|
|
460
|
-
'onedal': self.__class__._onedal_predict_proba,
|
|
461
|
-
'sklearn': sklearn_RandomForestClassifier.predict_proba,
|
|
462
|
-
}, X)
|
|
463
|
-
|
|
464
|
-
if sklearn_check_version('1.0'):
|
|
465
|
-
@deprecated(
|
|
466
|
-
"Attribute `n_features_` was deprecated in version 1.0 and will be "
|
|
467
|
-
"removed in 1.2. Use `n_features_in_` instead.")
|
|
468
|
-
@property
|
|
469
|
-
def n_features_(self):
|
|
470
|
-
return self.n_features_in_
|
|
471
|
-
|
|
472
|
-
@property
|
|
473
|
-
def _estimators_(self):
|
|
474
|
-
if hasattr(self, '_cached_estimators_'):
|
|
475
|
-
if self._cached_estimators_:
|
|
476
|
-
return self._cached_estimators_
|
|
477
|
-
if sklearn_check_version('0.22'):
|
|
478
|
-
check_is_fitted(self)
|
|
479
|
-
else:
|
|
480
|
-
check_is_fitted(self, '_onedal_model')
|
|
481
|
-
classes_ = self.classes_[0]
|
|
482
|
-
n_classes_ = self.n_classes_[0]
|
|
483
|
-
# convert model to estimators
|
|
484
|
-
params = {
|
|
485
|
-
'criterion': self.criterion,
|
|
486
|
-
'max_depth': self.max_depth,
|
|
487
|
-
'min_samples_split': self.min_samples_split,
|
|
488
|
-
'min_samples_leaf': self.min_samples_leaf,
|
|
489
|
-
'min_weight_fraction_leaf': self.min_weight_fraction_leaf,
|
|
490
|
-
'max_features': self.max_features,
|
|
491
|
-
'max_leaf_nodes': self.max_leaf_nodes,
|
|
492
|
-
'min_impurity_decrease': self.min_impurity_decrease,
|
|
493
|
-
'random_state': None,
|
|
494
|
-
}
|
|
495
|
-
if not sklearn_check_version('1.0'):
|
|
496
|
-
params['min_impurity_split'] = self.min_impurity_split
|
|
497
|
-
est = DecisionTreeClassifier(**params)
|
|
498
|
-
# we need to set est.tree_ field with Trees constructed from Intel(R)
|
|
499
|
-
# oneAPI Data Analytics Library solution
|
|
500
|
-
estimators_ = []
|
|
501
|
-
random_state_checked = check_random_state(self.random_state)
|
|
502
|
-
for i in range(self.n_estimators):
|
|
503
|
-
est_i = clone(est)
|
|
504
|
-
est_i.set_params(
|
|
505
|
-
random_state=random_state_checked.randint(
|
|
506
|
-
np.iinfo(
|
|
507
|
-
np.int32).max))
|
|
508
|
-
if sklearn_check_version('1.0'):
|
|
509
|
-
est_i.n_features_in_ = self.n_features_in_
|
|
510
|
-
else:
|
|
511
|
-
est_i.n_features_ = self.n_features_in_
|
|
512
|
-
est_i.n_outputs_ = self.n_outputs_
|
|
513
|
-
est_i.classes_ = classes_
|
|
514
|
-
est_i.n_classes_ = n_classes_
|
|
515
|
-
tree_i_state_class = get_tree_state_cls(
|
|
516
|
-
self._onedal_model, i, n_classes_)
|
|
517
|
-
tree_i_state_dict = {
|
|
518
|
-
'max_depth': tree_i_state_class.max_depth,
|
|
519
|
-
'node_count': tree_i_state_class.node_count,
|
|
520
|
-
'nodes': check_tree_nodes(tree_i_state_class.node_ar),
|
|
521
|
-
'values': tree_i_state_class.value_ar}
|
|
522
|
-
est_i.tree_ = Tree(
|
|
523
|
-
self.n_features_in_,
|
|
524
|
-
np.array(
|
|
525
|
-
[n_classes_],
|
|
526
|
-
dtype=np.intp),
|
|
527
|
-
self.n_outputs_)
|
|
528
|
-
est_i.tree_.__setstate__(tree_i_state_dict)
|
|
529
|
-
estimators_.append(est_i)
|
|
530
|
-
|
|
531
|
-
self._cached_estimators_ = estimators_
|
|
532
|
-
return estimators_
|
|
533
|
-
|
|
534
|
-
def _onedal_cpu_supported(self, method_name, *data):
|
|
535
|
-
if method_name == 'fit':
|
|
536
|
-
ready, X, y, sample_weight = self._onedal_ready(*data)
|
|
537
|
-
if self.splitter_mode == 'random':
|
|
538
|
-
warnings.warn("'random' splitter mode supports GPU devices only "
|
|
539
|
-
"and requires oneDAL version >= 2023.1.1. "
|
|
540
|
-
"Using 'best' mode instead.", RuntimeWarning)
|
|
541
|
-
self.splitter_mode = 'best'
|
|
542
|
-
if not ready:
|
|
543
|
-
return False
|
|
544
|
-
elif sp.issparse(X):
|
|
545
|
-
return False
|
|
546
|
-
elif sp.issparse(y):
|
|
547
|
-
return False
|
|
548
|
-
elif sp.issparse(sample_weight):
|
|
549
|
-
return False
|
|
550
|
-
elif not self.ccp_alpha == 0.0:
|
|
551
|
-
return False
|
|
552
|
-
elif self.warm_start:
|
|
553
|
-
return False
|
|
554
|
-
elif self.oob_score and not daal_check_version((2023, 'P', 101)):
|
|
555
|
-
return False
|
|
556
|
-
elif not self.n_outputs_ == 1:
|
|
557
|
-
return False
|
|
558
|
-
elif hasattr(self, 'estimators_'):
|
|
559
|
-
return False
|
|
560
|
-
else:
|
|
561
|
-
return True
|
|
562
|
-
if method_name in ['predict', 'predict_proba']:
|
|
563
|
-
X = data[0]
|
|
564
|
-
if not hasattr(self, '_onedal_model'):
|
|
565
|
-
return False
|
|
566
|
-
elif sp.issparse(X):
|
|
567
|
-
return False
|
|
568
|
-
elif not (hasattr(self, 'n_outputs_') and self.n_outputs_ == 1):
|
|
569
|
-
return False
|
|
570
|
-
elif not daal_check_version((2021, 'P', 400)):
|
|
571
|
-
return False
|
|
572
|
-
elif self.warm_start:
|
|
573
|
-
return False
|
|
574
|
-
else:
|
|
575
|
-
return True
|
|
576
|
-
raise RuntimeError(
|
|
577
|
-
f'Unknown method {method_name} in {self.__class__.__name__}')
|
|
578
|
-
|
|
579
|
-
def _onedal_gpu_supported(self, method_name, *data):
|
|
580
|
-
if method_name == 'fit':
|
|
581
|
-
ready, X, y, sample_weight = self._onedal_ready(*data)
|
|
582
|
-
if self.splitter_mode == 'random' and \
|
|
583
|
-
not daal_check_version((2023, 'P', 101)):
|
|
584
|
-
warnings.warn("'random' splitter mode requires OneDAL >= 2023.1.1. "
|
|
585
|
-
"Using 'best' mode instead.", RuntimeWarning)
|
|
586
|
-
self.splitter_mode = 'best'
|
|
587
|
-
if not ready:
|
|
588
|
-
return False
|
|
589
|
-
elif sp.issparse(X):
|
|
590
|
-
return False
|
|
591
|
-
elif sp.issparse(y):
|
|
592
|
-
return False
|
|
593
|
-
elif sp.issparse(sample_weight):
|
|
594
|
-
return False
|
|
595
|
-
elif sample_weight is not None: # `sample_weight` is not supported.
|
|
596
|
-
return False
|
|
597
|
-
elif not self.ccp_alpha == 0.0:
|
|
598
|
-
return False
|
|
599
|
-
elif self.warm_start:
|
|
600
|
-
return False
|
|
601
|
-
elif self.oob_score:
|
|
602
|
-
return False
|
|
603
|
-
elif not self.n_outputs_ == 1:
|
|
604
|
-
return False
|
|
605
|
-
elif hasattr(self, 'estimators_'):
|
|
606
|
-
return False
|
|
607
|
-
else:
|
|
608
|
-
return True
|
|
609
|
-
if method_name in ['predict', 'predict_proba']:
|
|
610
|
-
X = data[0]
|
|
611
|
-
if not hasattr(self, '_onedal_model'):
|
|
612
|
-
return False
|
|
613
|
-
elif sp.issparse(X):
|
|
614
|
-
return False
|
|
615
|
-
elif not (hasattr(self, 'n_outputs_') and self.n_outputs_ == 1):
|
|
616
|
-
return False
|
|
617
|
-
elif not daal_check_version((2021, 'P', 400)):
|
|
618
|
-
return False
|
|
619
|
-
elif self.warm_start:
|
|
620
|
-
return False
|
|
621
|
-
else:
|
|
622
|
-
return True
|
|
623
|
-
raise RuntimeError(
|
|
624
|
-
f'Unknown method {method_name} in {self.__class__.__name__}')
|
|
625
|
-
|
|
626
|
-
def _onedal_fit(self, X, y, sample_weight=None, queue=None):
|
|
627
|
-
if sklearn_check_version('1.2'):
|
|
628
|
-
X, y = self._validate_data(
|
|
629
|
-
X, y, multi_output=False, accept_sparse=False,
|
|
630
|
-
dtype=[np.float64, np.float32]
|
|
631
|
-
)
|
|
632
|
-
else:
|
|
633
|
-
X, y = check_X_y(
|
|
634
|
-
X, y, accept_sparse=False, dtype=[np.float64, np.float32],
|
|
635
|
-
multi_output=False
|
|
636
|
-
)
|
|
637
|
-
|
|
638
|
-
if sample_weight is not None:
|
|
639
|
-
sample_weight = self.check_sample_weight(sample_weight, X)
|
|
640
|
-
|
|
641
|
-
y = np.atleast_1d(y)
|
|
642
|
-
if y.ndim == 2 and y.shape[1] == 1:
|
|
643
|
-
warnings.warn(
|
|
644
|
-
"A column-vector y was passed when a 1d array was"
|
|
645
|
-
" expected. Please change the shape of y to "
|
|
646
|
-
"(n_samples,), for example using ravel().",
|
|
647
|
-
DataConversionWarning,
|
|
648
|
-
stacklevel=2,
|
|
649
|
-
)
|
|
650
|
-
if y.ndim == 1:
|
|
651
|
-
# reshape is necessary to preserve the data contiguity against vs
|
|
652
|
-
# [:, np.newaxis] that does not.
|
|
653
|
-
y = np.reshape(y, (-1, 1))
|
|
654
|
-
|
|
655
|
-
y, expanded_class_weight = self._validate_y_class_weight(y)
|
|
656
|
-
|
|
657
|
-
n_classes_ = self.n_classes_[0]
|
|
658
|
-
self.n_features_in_ = X.shape[1]
|
|
659
|
-
if not sklearn_check_version('1.0'):
|
|
660
|
-
self.n_features_ = self.n_features_in_
|
|
661
|
-
|
|
662
|
-
if expanded_class_weight is not None:
|
|
663
|
-
if sample_weight is not None:
|
|
664
|
-
sample_weight = sample_weight * expanded_class_weight
|
|
665
|
-
else:
|
|
666
|
-
sample_weight = expanded_class_weight
|
|
667
|
-
if sample_weight is not None:
|
|
668
|
-
sample_weight = [sample_weight]
|
|
669
|
-
|
|
670
|
-
if n_classes_ < 2:
|
|
671
|
-
raise ValueError(
|
|
672
|
-
"Training data only contain information about one class.")
|
|
673
|
-
|
|
674
|
-
if self.oob_score:
|
|
675
|
-
err = 'out_of_bag_error_accuracy|out_of_bag_error_decision_function'
|
|
676
|
-
else:
|
|
677
|
-
err = 'none'
|
|
678
|
-
|
|
679
|
-
onedal_params = {
|
|
680
|
-
'n_estimators': self.n_estimators,
|
|
681
|
-
'criterion': self.criterion,
|
|
682
|
-
'max_depth': self.max_depth,
|
|
683
|
-
'min_samples_split': self.min_samples_split,
|
|
684
|
-
'min_samples_leaf': self.min_samples_leaf,
|
|
685
|
-
'min_weight_fraction_leaf': self.min_weight_fraction_leaf,
|
|
686
|
-
'max_features': self.max_features,
|
|
687
|
-
'max_leaf_nodes': self.max_leaf_nodes,
|
|
688
|
-
'min_impurity_decrease': self.min_impurity_decrease,
|
|
689
|
-
'min_impurity_split': self.min_impurity_split,
|
|
690
|
-
'bootstrap': self.bootstrap,
|
|
691
|
-
'oob_score': self.oob_score,
|
|
692
|
-
'n_jobs': self.n_jobs,
|
|
693
|
-
'random_state': self.random_state,
|
|
694
|
-
'verbose': self.verbose,
|
|
695
|
-
'warm_start': self.warm_start,
|
|
696
|
-
'error_metric_mode': err,
|
|
697
|
-
'variable_importance_mode': 'mdi',
|
|
698
|
-
'class_weight': self.class_weight,
|
|
699
|
-
'max_bins': self.max_bins,
|
|
700
|
-
'min_bin_size': self.min_bin_size,
|
|
701
|
-
'max_samples': self.max_samples
|
|
702
|
-
}
|
|
703
|
-
if daal_check_version((2023, 'P', 101)):
|
|
704
|
-
onedal_params['splitter_mode'] = self.splitter_mode
|
|
705
|
-
self._cached_estimators_ = None
|
|
706
|
-
|
|
707
|
-
# Compute
|
|
708
|
-
self._onedal_estimator = self._onedal_classifier(**onedal_params)
|
|
709
|
-
self._onedal_estimator.fit(X, y, sample_weight, queue=queue)
|
|
710
|
-
|
|
711
|
-
self._save_attributes()
|
|
712
|
-
if sklearn_check_version("1.2"):
|
|
713
|
-
self._estimator = DecisionTreeClassifier()
|
|
714
|
-
self.estimators_ = self._estimators_
|
|
715
|
-
# Decapsulate classes_ attributes
|
|
716
|
-
self.n_classes_ = self.n_classes_[0]
|
|
717
|
-
self.classes_ = self.classes_[0]
|
|
718
|
-
return self
|
|
719
|
-
|
|
720
|
-
def _onedal_predict(self, X, queue=None):
|
|
721
|
-
X = check_array(X, dtype=[np.float32, np.float64])
|
|
722
|
-
check_is_fitted(self)
|
|
723
|
-
if sklearn_check_version("1.0"):
|
|
724
|
-
self._check_feature_names(X, reset=False)
|
|
725
|
-
|
|
726
|
-
res = self._onedal_estimator.predict(X, queue=queue)
|
|
727
|
-
return np.take(self.classes_,
|
|
728
|
-
res.ravel().astype(np.int64, casting='unsafe'))
|
|
729
|
-
|
|
730
|
-
def _onedal_predict_proba(self, X, queue=None):
|
|
731
|
-
X = check_array(X, dtype=[np.float64, np.float32])
|
|
732
|
-
check_is_fitted(self)
|
|
733
|
-
if sklearn_check_version('0.23'):
|
|
734
|
-
self._check_n_features(X, reset=False)
|
|
735
|
-
if sklearn_check_version("1.0"):
|
|
736
|
-
self._check_feature_names(X, reset=False)
|
|
737
|
-
return self._onedal_estimator.predict_proba(X, queue=queue)
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
class RandomForestRegressor(sklearn_RandomForestRegressor, BaseRandomForest):
|
|
741
|
-
__doc__ = sklearn_RandomForestRegressor.__doc__
|
|
742
|
-
|
|
743
|
-
if sklearn_check_version('1.2'):
|
|
744
|
-
_parameter_constraints: dict = {
|
|
745
|
-
**sklearn_RandomForestRegressor._parameter_constraints,
|
|
746
|
-
"max_bins": [Interval(numbers.Integral, 2, None, closed="left")],
|
|
747
|
-
"min_bin_size": [Interval(numbers.Integral, 1, None, closed="left")],
|
|
748
|
-
"splitter_mode": [StrOptions({"best", "random"})]
|
|
749
|
-
}
|
|
750
|
-
|
|
751
|
-
if sklearn_check_version('1.0'):
|
|
752
|
-
def __init__(
|
|
753
|
-
self,
|
|
754
|
-
n_estimators=100,
|
|
755
|
-
*,
|
|
756
|
-
criterion="squared_error",
|
|
757
|
-
max_depth=None,
|
|
758
|
-
min_samples_split=2,
|
|
759
|
-
min_samples_leaf=1,
|
|
760
|
-
min_weight_fraction_leaf=0.,
|
|
761
|
-
max_features=1.0 if sklearn_check_version('1.1') else 'auto',
|
|
762
|
-
max_leaf_nodes=None,
|
|
763
|
-
min_impurity_decrease=0.,
|
|
764
|
-
bootstrap=True,
|
|
765
|
-
oob_score=False,
|
|
766
|
-
n_jobs=None,
|
|
767
|
-
random_state=None,
|
|
768
|
-
verbose=0,
|
|
769
|
-
warm_start=False,
|
|
770
|
-
ccp_alpha=0.0,
|
|
771
|
-
max_samples=None,
|
|
772
|
-
max_bins=256,
|
|
773
|
-
min_bin_size=1,
|
|
774
|
-
splitter_mode='best'):
|
|
775
|
-
super(RandomForestRegressor, self).__init__(
|
|
776
|
-
n_estimators=n_estimators,
|
|
777
|
-
criterion=criterion,
|
|
778
|
-
max_depth=max_depth,
|
|
779
|
-
min_samples_split=min_samples_split,
|
|
780
|
-
min_samples_leaf=min_samples_leaf,
|
|
781
|
-
min_weight_fraction_leaf=min_weight_fraction_leaf,
|
|
782
|
-
max_features=max_features,
|
|
783
|
-
max_leaf_nodes=max_leaf_nodes,
|
|
784
|
-
min_impurity_decrease=min_impurity_decrease,
|
|
785
|
-
bootstrap=bootstrap,
|
|
786
|
-
oob_score=oob_score,
|
|
787
|
-
n_jobs=n_jobs,
|
|
788
|
-
random_state=random_state,
|
|
789
|
-
verbose=verbose,
|
|
790
|
-
warm_start=warm_start
|
|
791
|
-
)
|
|
792
|
-
self.warm_start = warm_start
|
|
793
|
-
self.ccp_alpha = ccp_alpha
|
|
794
|
-
self.max_samples = max_samples
|
|
795
|
-
self.max_bins = max_bins
|
|
796
|
-
self.min_bin_size = min_bin_size
|
|
797
|
-
self.min_impurity_split = None
|
|
798
|
-
self.splitter_mode = splitter_mode
|
|
799
|
-
else:
|
|
800
|
-
def __init__(self,
|
|
801
|
-
n_estimators=100, *,
|
|
802
|
-
criterion="mse",
|
|
803
|
-
max_depth=None,
|
|
804
|
-
min_samples_split=2,
|
|
805
|
-
min_samples_leaf=1,
|
|
806
|
-
min_weight_fraction_leaf=0.,
|
|
807
|
-
max_features="auto",
|
|
808
|
-
max_leaf_nodes=None,
|
|
809
|
-
min_impurity_decrease=0.,
|
|
810
|
-
min_impurity_split=None,
|
|
811
|
-
bootstrap=True,
|
|
812
|
-
oob_score=False,
|
|
813
|
-
n_jobs=None,
|
|
814
|
-
random_state=None,
|
|
815
|
-
verbose=0,
|
|
816
|
-
warm_start=False,
|
|
817
|
-
ccp_alpha=0.0,
|
|
818
|
-
max_samples=None,
|
|
819
|
-
max_bins=256,
|
|
820
|
-
min_bin_size=1,
|
|
821
|
-
splitter_mode='best'):
|
|
822
|
-
super(RandomForestRegressor, self).__init__(
|
|
823
|
-
n_estimators=n_estimators,
|
|
824
|
-
criterion=criterion,
|
|
825
|
-
max_depth=max_depth,
|
|
826
|
-
min_samples_split=min_samples_split,
|
|
827
|
-
min_samples_leaf=min_samples_leaf,
|
|
828
|
-
min_weight_fraction_leaf=min_weight_fraction_leaf,
|
|
829
|
-
max_features=max_features,
|
|
830
|
-
max_leaf_nodes=max_leaf_nodes,
|
|
831
|
-
min_impurity_decrease=min_impurity_decrease,
|
|
832
|
-
min_impurity_split=min_impurity_split,
|
|
833
|
-
bootstrap=bootstrap,
|
|
834
|
-
oob_score=oob_score,
|
|
835
|
-
n_jobs=n_jobs,
|
|
836
|
-
random_state=random_state,
|
|
837
|
-
verbose=verbose,
|
|
838
|
-
warm_start=warm_start,
|
|
839
|
-
ccp_alpha=ccp_alpha,
|
|
840
|
-
max_samples=max_samples
|
|
841
|
-
)
|
|
842
|
-
self.warm_start = warm_start
|
|
843
|
-
self.ccp_alpha = ccp_alpha
|
|
844
|
-
self.max_samples = max_samples
|
|
845
|
-
self.max_bins = max_bins
|
|
846
|
-
self.min_bin_size = min_bin_size
|
|
847
|
-
self.min_impurity_split = None
|
|
848
|
-
self.splitter_mode = splitter_mode
|
|
849
|
-
|
|
850
|
-
@property
|
|
851
|
-
def _estimators_(self):
|
|
852
|
-
if hasattr(self, '_cached_estimators_'):
|
|
853
|
-
if self._cached_estimators_:
|
|
854
|
-
return self._cached_estimators_
|
|
855
|
-
if sklearn_check_version('0.22'):
|
|
856
|
-
check_is_fitted(self)
|
|
857
|
-
else:
|
|
858
|
-
check_is_fitted(self, '_onedal_model')
|
|
859
|
-
# convert model to estimators
|
|
860
|
-
params = {
|
|
861
|
-
'criterion': self.criterion,
|
|
862
|
-
'max_depth': self.max_depth,
|
|
863
|
-
'min_samples_split': self.min_samples_split,
|
|
864
|
-
'min_samples_leaf': self.min_samples_leaf,
|
|
865
|
-
'min_weight_fraction_leaf': self.min_weight_fraction_leaf,
|
|
866
|
-
'max_features': self.max_features,
|
|
867
|
-
'max_leaf_nodes': self.max_leaf_nodes,
|
|
868
|
-
'min_impurity_decrease': self.min_impurity_decrease,
|
|
869
|
-
'random_state': None,
|
|
870
|
-
}
|
|
871
|
-
if not sklearn_check_version('1.0'):
|
|
872
|
-
params['min_impurity_split'] = self.min_impurity_split
|
|
873
|
-
est = DecisionTreeRegressor(**params)
|
|
874
|
-
# we need to set est.tree_ field with Trees constructed from Intel(R)
|
|
875
|
-
# oneAPI Data Analytics Library solution
|
|
876
|
-
estimators_ = []
|
|
877
|
-
random_state_checked = check_random_state(self.random_state)
|
|
878
|
-
for i in range(self.n_estimators):
|
|
879
|
-
est_i = clone(est)
|
|
880
|
-
est_i.set_params(
|
|
881
|
-
random_state=random_state_checked.randint(
|
|
882
|
-
np.iinfo(
|
|
883
|
-
np.int32).max))
|
|
884
|
-
if sklearn_check_version('1.0'):
|
|
885
|
-
est_i.n_features_in_ = self.n_features_in_
|
|
886
|
-
else:
|
|
887
|
-
est_i.n_features_ = self.n_features_in_
|
|
888
|
-
est_i.n_classes_ = 1
|
|
889
|
-
est_i.n_outputs_ = self.n_outputs_
|
|
890
|
-
tree_i_state_class = get_tree_state_reg(
|
|
891
|
-
self._onedal_model, i)
|
|
892
|
-
tree_i_state_dict = {
|
|
893
|
-
'max_depth': tree_i_state_class.max_depth,
|
|
894
|
-
'node_count': tree_i_state_class.node_count,
|
|
895
|
-
'nodes': check_tree_nodes(tree_i_state_class.node_ar),
|
|
896
|
-
'values': tree_i_state_class.value_ar}
|
|
897
|
-
|
|
898
|
-
est_i.tree_ = Tree(
|
|
899
|
-
self.n_features_in_, np.array(
|
|
900
|
-
[1], dtype=np.intp), self.n_outputs_)
|
|
901
|
-
est_i.tree_.__setstate__(tree_i_state_dict)
|
|
902
|
-
estimators_.append(est_i)
|
|
903
|
-
|
|
904
|
-
return estimators_
|
|
905
|
-
|
|
906
|
-
def _onedal_ready(self, X, y, sample_weight):
|
|
907
|
-
# TODO:
|
|
908
|
-
# move some common checks for both devices here.
|
|
909
|
-
|
|
910
|
-
# We have to get `n_outputs_` before dispatching
|
|
911
|
-
# oneDAL requirements: Number of outputs `n_outputs_` should be 1.
|
|
912
|
-
y = np.asarray(y)
|
|
913
|
-
|
|
914
|
-
if y.ndim == 1:
|
|
915
|
-
# reshape is necessary to preserve the data contiguity against vs
|
|
916
|
-
# [:, np.newaxis] that does not.
|
|
917
|
-
y = np.reshape(y, (-1, 1))
|
|
918
|
-
self.n_outputs_ = y.shape[1]
|
|
919
|
-
ready = self.n_outputs_ == 1
|
|
920
|
-
return ready, X, y, sample_weight
|
|
921
|
-
|
|
922
|
-
def _onedal_cpu_supported(self, method_name, *data):
|
|
923
|
-
if method_name == 'fit':
|
|
924
|
-
ready, X, y, sample_weight = self._onedal_ready(*data)
|
|
925
|
-
if self.splitter_mode == 'random':
|
|
926
|
-
warnings.warn("'random' splitter mode supports GPU devices only "
|
|
927
|
-
"and requires oneDAL version >= 2023.1.1. "
|
|
928
|
-
"Using 'best' mode instead.", RuntimeWarning)
|
|
929
|
-
self.splitter_mode = 'best'
|
|
930
|
-
if not ready:
|
|
931
|
-
return False
|
|
932
|
-
elif not (self.oob_score and daal_check_version(
|
|
933
|
-
(2021, 'P', 500)) or not self.oob_score):
|
|
934
|
-
return False
|
|
935
|
-
elif self.criterion not in ["mse", "squared_error"]:
|
|
936
|
-
return False
|
|
937
|
-
elif sp.issparse(X):
|
|
938
|
-
return False
|
|
939
|
-
elif sp.issparse(y):
|
|
940
|
-
return False
|
|
941
|
-
elif sp.issparse(sample_weight):
|
|
942
|
-
return False
|
|
943
|
-
elif not self.ccp_alpha == 0.0:
|
|
944
|
-
return False
|
|
945
|
-
elif self.warm_start:
|
|
946
|
-
return False
|
|
947
|
-
elif self.oob_score and not daal_check_version((2023, 'P', 101)):
|
|
948
|
-
return False
|
|
949
|
-
elif not self.n_outputs_ == 1:
|
|
950
|
-
return False
|
|
951
|
-
elif hasattr(self, 'estimators_'):
|
|
952
|
-
return False
|
|
953
|
-
else:
|
|
954
|
-
return True
|
|
955
|
-
if method_name == 'predict':
|
|
956
|
-
if not hasattr(self, '_onedal_model'):
|
|
957
|
-
return False
|
|
958
|
-
elif sp.issparse(data[0]):
|
|
959
|
-
return False
|
|
960
|
-
elif not (hasattr(self, 'n_outputs_') and self.n_outputs_ == 1):
|
|
961
|
-
return False
|
|
962
|
-
elif not daal_check_version((2021, 'P', 400)):
|
|
963
|
-
return False
|
|
964
|
-
elif self.warm_start:
|
|
965
|
-
return False
|
|
966
|
-
else:
|
|
967
|
-
return True
|
|
968
|
-
raise RuntimeError(
|
|
969
|
-
f'Unknown method {method_name} in {self.__class__.__name__}')
|
|
970
|
-
|
|
971
|
-
def _onedal_gpu_supported(self, method_name, *data):
|
|
972
|
-
if method_name == 'fit':
|
|
973
|
-
ready, X, y, sample_weight = self._onedal_ready(*data)
|
|
974
|
-
if self.splitter_mode == 'random' and \
|
|
975
|
-
not daal_check_version((2023, 'P', 101)):
|
|
976
|
-
warnings.warn("'random' splitter mode requires OneDAL >= 2023.1.1. "
|
|
977
|
-
"Using 'best' mode instead.", RuntimeWarning)
|
|
978
|
-
self.splitter_mode = 'best'
|
|
979
|
-
if not ready:
|
|
980
|
-
return False
|
|
981
|
-
elif not (self.oob_score and daal_check_version(
|
|
982
|
-
(2021, 'P', 500)) or not self.oob_score):
|
|
983
|
-
return False
|
|
984
|
-
elif self.criterion not in ["mse", "squared_error"]:
|
|
985
|
-
return False
|
|
986
|
-
elif sp.issparse(X):
|
|
987
|
-
return False
|
|
988
|
-
elif sp.issparse(y):
|
|
989
|
-
return False
|
|
990
|
-
elif sample_weight is not None: # `sample_weight` is not supported.
|
|
991
|
-
return False
|
|
992
|
-
elif not self.ccp_alpha == 0.0:
|
|
993
|
-
return False
|
|
994
|
-
elif self.warm_start:
|
|
995
|
-
return False
|
|
996
|
-
elif self.oob_score:
|
|
997
|
-
return False
|
|
998
|
-
elif hasattr(self, 'estimators_'):
|
|
999
|
-
return False
|
|
1000
|
-
else:
|
|
1001
|
-
return True
|
|
1002
|
-
if method_name == 'predict':
|
|
1003
|
-
X = data[0]
|
|
1004
|
-
if not hasattr(self, '_onedal_model'):
|
|
1005
|
-
return False
|
|
1006
|
-
elif sp.issparse(X):
|
|
1007
|
-
return False
|
|
1008
|
-
elif not (hasattr(self, 'n_outputs_') and self.n_outputs_ == 1):
|
|
1009
|
-
return False
|
|
1010
|
-
elif not daal_check_version((2021, 'P', 400)):
|
|
1011
|
-
return False
|
|
1012
|
-
elif self.warm_start:
|
|
1013
|
-
return False
|
|
1014
|
-
else:
|
|
1015
|
-
return True
|
|
1016
|
-
raise RuntimeError(
|
|
1017
|
-
f'Unknown method {method_name} in {self.__class__.__name__}')
|
|
1018
|
-
|
|
1019
|
-
def _onedal_fit(self, X, y, sample_weight=None, queue=None):
|
|
1020
|
-
if sp.issparse(y):
|
|
1021
|
-
raise ValueError(
|
|
1022
|
-
"sparse multilabel-indicator for y is not supported."
|
|
1023
|
-
)
|
|
1024
|
-
if sklearn_check_version("1.2"):
|
|
1025
|
-
self._validate_params()
|
|
1026
|
-
else:
|
|
1027
|
-
self._check_parameters()
|
|
1028
|
-
if sample_weight is not None:
|
|
1029
|
-
sample_weight = self.check_sample_weight(sample_weight, X)
|
|
1030
|
-
if sklearn_check_version("1.0"):
|
|
1031
|
-
self._check_feature_names(X, reset=True)
|
|
1032
|
-
X = check_array(X, dtype=[np.float64, np.float32])
|
|
1033
|
-
y = np.atleast_1d(np.asarray(y))
|
|
1034
|
-
y = check_array(y, ensure_2d=False, dtype=X.dtype)
|
|
1035
|
-
check_consistent_length(X, y)
|
|
1036
|
-
self.n_features_in_ = X.shape[1]
|
|
1037
|
-
if not sklearn_check_version('1.0'):
|
|
1038
|
-
self.n_features_ = self.n_features_in_
|
|
1039
|
-
rs_ = check_random_state(self.random_state)
|
|
1040
|
-
|
|
1041
|
-
if self.oob_score:
|
|
1042
|
-
err = 'out_of_bag_error_r2|out_of_bag_error_prediction'
|
|
1043
|
-
else:
|
|
1044
|
-
err = 'none'
|
|
1045
|
-
|
|
1046
|
-
onedal_params = {
|
|
1047
|
-
'n_estimators': self.n_estimators,
|
|
1048
|
-
'criterion': self.criterion,
|
|
1049
|
-
'max_depth': self.max_depth,
|
|
1050
|
-
'min_samples_split': self.min_samples_split,
|
|
1051
|
-
'min_samples_leaf': self.min_samples_leaf,
|
|
1052
|
-
'min_weight_fraction_leaf': self.min_weight_fraction_leaf,
|
|
1053
|
-
'max_features': self.max_features,
|
|
1054
|
-
'max_leaf_nodes': self.max_leaf_nodes,
|
|
1055
|
-
'min_impurity_decrease': self.min_impurity_decrease,
|
|
1056
|
-
'bootstrap': self.bootstrap,
|
|
1057
|
-
'oob_score': self.oob_score,
|
|
1058
|
-
'n_jobs': self.n_jobs,
|
|
1059
|
-
'random_state': rs_,
|
|
1060
|
-
'verbose': self.verbose,
|
|
1061
|
-
'warm_start': self.warm_start,
|
|
1062
|
-
'error_metric_mode': err,
|
|
1063
|
-
'variable_importance_mode': 'mdi',
|
|
1064
|
-
'max_samples': self.max_samples
|
|
1065
|
-
}
|
|
1066
|
-
if daal_check_version((2023, 'P', 101)):
|
|
1067
|
-
onedal_params['splitter_mode'] = self.splitter_mode
|
|
1068
|
-
self._cached_estimators_ = None
|
|
1069
|
-
self._onedal_estimator = self._onedal_regressor(**onedal_params)
|
|
1070
|
-
self._onedal_estimator.fit(X, y, sample_weight, queue=queue)
|
|
1071
|
-
|
|
1072
|
-
self._save_attributes()
|
|
1073
|
-
if sklearn_check_version("1.2"):
|
|
1074
|
-
self._estimator = DecisionTreeRegressor()
|
|
1075
|
-
self.estimators_ = self._estimators_
|
|
1076
|
-
return self
|
|
1077
|
-
|
|
1078
|
-
def _onedal_predict(self, X, queue=None):
|
|
1079
|
-
if sklearn_check_version("1.0"):
|
|
1080
|
-
self._check_feature_names(X, reset=False)
|
|
1081
|
-
X = self._validate_X_predict(X)
|
|
1082
|
-
return self._onedal_estimator.predict(X, queue=queue)
|
|
1083
|
-
|
|
1084
|
-
def fit(self, X, y, sample_weight=None):
|
|
1085
|
-
"""
|
|
1086
|
-
Build a forest of trees from the training set (X, y).
|
|
1087
|
-
|
|
1088
|
-
Parameters
|
|
1089
|
-
----------
|
|
1090
|
-
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
1091
|
-
The training input samples. Internally, its dtype will be converted
|
|
1092
|
-
to ``dtype=np.float32``. If a sparse matrix is provided, it will be
|
|
1093
|
-
converted into a sparse ``csc_matrix``.
|
|
1094
|
-
|
|
1095
|
-
y : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
1096
|
-
The target values (class labels in classification, real numbers in
|
|
1097
|
-
regression).
|
|
1098
|
-
|
|
1099
|
-
sample_weight : array-like of shape (n_samples,), default=None
|
|
1100
|
-
Sample weights. If None, then samples are equally weighted. Splits
|
|
1101
|
-
that would create child nodes with net zero or negative weight are
|
|
1102
|
-
ignored while searching for a split in each node. In the case of
|
|
1103
|
-
classification, splits are also ignored if they would result in any
|
|
1104
|
-
single class carrying a negative weight in either child node.
|
|
1105
|
-
|
|
1106
|
-
Returns
|
|
1107
|
-
-------
|
|
1108
|
-
self : object
|
|
1109
|
-
"""
|
|
1110
|
-
if not self.bootstrap and self.max_samples is not None:
|
|
1111
|
-
raise ValueError(
|
|
1112
|
-
"`max_sample` cannot be set if `bootstrap=False`. "
|
|
1113
|
-
"Either switch to `bootstrap=True` or set "
|
|
1114
|
-
"`max_sample=None`."
|
|
1115
|
-
)
|
|
1116
|
-
dispatch(self, 'fit', {
|
|
1117
|
-
'onedal': self.__class__._onedal_fit,
|
|
1118
|
-
'sklearn': sklearn_RandomForestRegressor.fit,
|
|
1119
|
-
}, X, y, sample_weight)
|
|
1120
|
-
return self
|
|
1121
|
-
|
|
1122
|
-
@wrap_output_data
|
|
1123
|
-
def predict(self, X):
|
|
1124
|
-
"""
|
|
1125
|
-
Predict class for X.
|
|
1126
|
-
|
|
1127
|
-
The predicted class of an input sample is a vote by the trees in
|
|
1128
|
-
the forest, weighted by their probability estimates. That is,
|
|
1129
|
-
the predicted class is the one with highest mean probability
|
|
1130
|
-
estimate across the trees.
|
|
1131
|
-
|
|
1132
|
-
Parameters
|
|
1133
|
-
----------
|
|
1134
|
-
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
1135
|
-
The input samples. Internally, its dtype will be converted to
|
|
1136
|
-
``dtype=np.float32``. If a sparse matrix is provided, it will be
|
|
1137
|
-
converted into a sparse ``csr_matrix``.
|
|
1138
|
-
|
|
1139
|
-
Returns
|
|
1140
|
-
-------
|
|
1141
|
-
y : ndarray of shape (n_samples,) or (n_samples, n_outputs)
|
|
1142
|
-
The predicted classes.
|
|
1143
|
-
"""
|
|
1144
|
-
return dispatch(self, 'predict', {
|
|
1145
|
-
'onedal': self.__class__._onedal_predict,
|
|
1146
|
-
'sklearn': sklearn_RandomForestRegressor.predict,
|
|
1147
|
-
}, X)
|
|
1148
|
-
|
|
1149
|
-
if sklearn_check_version('1.0'):
|
|
1150
|
-
@deprecated(
|
|
1151
|
-
"Attribute `n_features_` was deprecated in version 1.0 and will be "
|
|
1152
|
-
"removed in 1.2. Use `n_features_in_` instead.")
|
|
1153
|
-
@property
|
|
1154
|
-
def n_features_(self):
|
|
1155
|
-
return self.n_features_in_
|