scikit-learn-intelex 2025.0.0__py312-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (278) hide show
  1. daal4py/__init__.py +73 -0
  2. daal4py/__main__.py +58 -0
  3. daal4py/_daal4py.cpython-312-x86_64-linux-gnu.so +0 -0
  4. daal4py/doc/third-party-programs.txt +424 -0
  5. daal4py/mb/__init__.py +19 -0
  6. daal4py/mb/model_builders.py +377 -0
  7. daal4py/mpi_transceiver.cpython-312-x86_64-linux-gnu.so +0 -0
  8. daal4py/sklearn/__init__.py +40 -0
  9. daal4py/sklearn/_n_jobs_support.py +242 -0
  10. daal4py/sklearn/_utils.py +241 -0
  11. daal4py/sklearn/cluster/__init__.py +20 -0
  12. daal4py/sklearn/cluster/dbscan.py +165 -0
  13. daal4py/sklearn/cluster/k_means.py +597 -0
  14. daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
  15. daal4py/sklearn/decomposition/__init__.py +19 -0
  16. daal4py/sklearn/decomposition/_pca.py +524 -0
  17. daal4py/sklearn/ensemble/AdaBoostClassifier.py +192 -0
  18. daal4py/sklearn/ensemble/GBTDAAL.py +318 -0
  19. daal4py/sklearn/ensemble/__init__.py +27 -0
  20. daal4py/sklearn/ensemble/_forest.py +1397 -0
  21. daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
  22. daal4py/sklearn/linear_model/__init__.py +29 -0
  23. daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
  24. daal4py/sklearn/linear_model/_linear.py +272 -0
  25. daal4py/sklearn/linear_model/_ridge.py +325 -0
  26. daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
  27. daal4py/sklearn/linear_model/linear.py +17 -0
  28. daal4py/sklearn/linear_model/logistic_loss.py +195 -0
  29. daal4py/sklearn/linear_model/logistic_path.py +1026 -0
  30. daal4py/sklearn/linear_model/ridge.py +17 -0
  31. daal4py/sklearn/linear_model/tests/test_linear.py +196 -0
  32. daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
  33. daal4py/sklearn/manifold/__init__.py +19 -0
  34. daal4py/sklearn/manifold/_t_sne.py +405 -0
  35. daal4py/sklearn/metrics/__init__.py +20 -0
  36. daal4py/sklearn/metrics/_pairwise.py +155 -0
  37. daal4py/sklearn/metrics/_ranking.py +210 -0
  38. daal4py/sklearn/model_selection/__init__.py +19 -0
  39. daal4py/sklearn/model_selection/_split.py +309 -0
  40. daal4py/sklearn/model_selection/tests/test_split.py +56 -0
  41. daal4py/sklearn/monkeypatch/__init__.py +0 -0
  42. daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
  43. daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
  44. daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
  45. daal4py/sklearn/monkeypatch/tests/test_patching.py +87 -0
  46. daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +118 -0
  47. daal4py/sklearn/neighbors/__init__.py +21 -0
  48. daal4py/sklearn/neighbors/_base.py +503 -0
  49. daal4py/sklearn/neighbors/_classification.py +139 -0
  50. daal4py/sklearn/neighbors/_regression.py +74 -0
  51. daal4py/sklearn/neighbors/_unsupervised.py +55 -0
  52. daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
  53. daal4py/sklearn/svm/__init__.py +19 -0
  54. daal4py/sklearn/svm/svm.py +734 -0
  55. daal4py/sklearn/utils/__init__.py +21 -0
  56. daal4py/sklearn/utils/base.py +75 -0
  57. daal4py/sklearn/utils/tests/test_utils.py +51 -0
  58. daal4py/sklearn/utils/validation.py +693 -0
  59. onedal/__init__.py +83 -0
  60. onedal/_config.py +53 -0
  61. onedal/_device_offload.py +229 -0
  62. onedal/_onedal_py_dpc.cpython-312-x86_64-linux-gnu.so +0 -0
  63. onedal/_onedal_py_host.cpython-312-x86_64-linux-gnu.so +0 -0
  64. onedal/_onedal_py_spmd_dpc.cpython-312-x86_64-linux-gnu.so +0 -0
  65. onedal/basic_statistics/__init__.py +20 -0
  66. onedal/basic_statistics/basic_statistics.py +107 -0
  67. onedal/basic_statistics/incremental_basic_statistics.py +160 -0
  68. onedal/basic_statistics/tests/test_basic_statistics.py +298 -0
  69. onedal/basic_statistics/tests/test_incremental_basic_statistics.py +196 -0
  70. onedal/cluster/__init__.py +27 -0
  71. onedal/cluster/dbscan.py +110 -0
  72. onedal/cluster/kmeans.py +560 -0
  73. onedal/cluster/kmeans_init.py +115 -0
  74. onedal/cluster/tests/test_dbscan.py +125 -0
  75. onedal/cluster/tests/test_kmeans.py +88 -0
  76. onedal/cluster/tests/test_kmeans_init.py +93 -0
  77. onedal/common/_base.py +38 -0
  78. onedal/common/_estimator_checks.py +47 -0
  79. onedal/common/_mixin.py +62 -0
  80. onedal/common/_policy.py +59 -0
  81. onedal/common/_spmd_policy.py +30 -0
  82. onedal/common/hyperparameters.py +116 -0
  83. onedal/common/tests/test_policy.py +75 -0
  84. onedal/covariance/__init__.py +20 -0
  85. onedal/covariance/covariance.py +125 -0
  86. onedal/covariance/incremental_covariance.py +146 -0
  87. onedal/covariance/tests/test_covariance.py +50 -0
  88. onedal/covariance/tests/test_incremental_covariance.py +122 -0
  89. onedal/datatypes/__init__.py +19 -0
  90. onedal/datatypes/_data_conversion.py +95 -0
  91. onedal/datatypes/tests/test_data.py +235 -0
  92. onedal/decomposition/__init__.py +20 -0
  93. onedal/decomposition/incremental_pca.py +204 -0
  94. onedal/decomposition/pca.py +186 -0
  95. onedal/decomposition/tests/test_incremental_pca.py +198 -0
  96. onedal/ensemble/__init__.py +29 -0
  97. onedal/ensemble/forest.py +720 -0
  98. onedal/ensemble/tests/test_random_forest.py +97 -0
  99. onedal/linear_model/__init__.py +27 -0
  100. onedal/linear_model/incremental_linear_model.py +258 -0
  101. onedal/linear_model/linear_model.py +329 -0
  102. onedal/linear_model/logistic_regression.py +249 -0
  103. onedal/linear_model/tests/test_incremental_linear_regression.py +168 -0
  104. onedal/linear_model/tests/test_incremental_ridge_regression.py +107 -0
  105. onedal/linear_model/tests/test_linear_regression.py +149 -0
  106. onedal/linear_model/tests/test_logistic_regression.py +95 -0
  107. onedal/linear_model/tests/test_ridge.py +95 -0
  108. onedal/neighbors/__init__.py +19 -0
  109. onedal/neighbors/neighbors.py +778 -0
  110. onedal/neighbors/tests/test_knn_classification.py +49 -0
  111. onedal/primitives/__init__.py +27 -0
  112. onedal/primitives/get_tree.py +25 -0
  113. onedal/primitives/kernel_functions.py +153 -0
  114. onedal/primitives/tests/test_kernel_functions.py +159 -0
  115. onedal/spmd/__init__.py +25 -0
  116. onedal/spmd/_base.py +30 -0
  117. onedal/spmd/basic_statistics/__init__.py +20 -0
  118. onedal/spmd/basic_statistics/basic_statistics.py +30 -0
  119. onedal/spmd/basic_statistics/incremental_basic_statistics.py +69 -0
  120. onedal/spmd/cluster/__init__.py +28 -0
  121. onedal/spmd/cluster/dbscan.py +23 -0
  122. onedal/spmd/cluster/kmeans.py +56 -0
  123. onedal/spmd/covariance/__init__.py +20 -0
  124. onedal/spmd/covariance/covariance.py +26 -0
  125. onedal/spmd/covariance/incremental_covariance.py +82 -0
  126. onedal/spmd/decomposition/__init__.py +20 -0
  127. onedal/spmd/decomposition/incremental_pca.py +117 -0
  128. onedal/spmd/decomposition/pca.py +26 -0
  129. onedal/spmd/ensemble/__init__.py +19 -0
  130. onedal/spmd/ensemble/forest.py +28 -0
  131. onedal/spmd/linear_model/__init__.py +21 -0
  132. onedal/spmd/linear_model/incremental_linear_model.py +97 -0
  133. onedal/spmd/linear_model/linear_model.py +30 -0
  134. onedal/spmd/linear_model/logistic_regression.py +38 -0
  135. onedal/spmd/neighbors/__init__.py +19 -0
  136. onedal/spmd/neighbors/neighbors.py +75 -0
  137. onedal/svm/__init__.py +19 -0
  138. onedal/svm/svm.py +556 -0
  139. onedal/svm/tests/test_csr_svm.py +351 -0
  140. onedal/svm/tests/test_nusvc.py +204 -0
  141. onedal/svm/tests/test_nusvr.py +210 -0
  142. onedal/svm/tests/test_svc.py +168 -0
  143. onedal/svm/tests/test_svr.py +243 -0
  144. onedal/tests/test_common.py +41 -0
  145. onedal/tests/utils/_dataframes_support.py +168 -0
  146. onedal/tests/utils/_device_selection.py +107 -0
  147. onedal/utils/__init__.py +49 -0
  148. onedal/utils/_array_api.py +91 -0
  149. onedal/utils/validation.py +432 -0
  150. scikit_learn_intelex-2025.0.0.dist-info/LICENSE.txt +202 -0
  151. scikit_learn_intelex-2025.0.0.dist-info/METADATA +231 -0
  152. scikit_learn_intelex-2025.0.0.dist-info/RECORD +278 -0
  153. scikit_learn_intelex-2025.0.0.dist-info/WHEEL +5 -0
  154. scikit_learn_intelex-2025.0.0.dist-info/top_level.txt +3 -0
  155. sklearnex/__init__.py +65 -0
  156. sklearnex/__main__.py +58 -0
  157. sklearnex/_config.py +98 -0
  158. sklearnex/_device_offload.py +121 -0
  159. sklearnex/_utils.py +109 -0
  160. sklearnex/basic_statistics/__init__.py +20 -0
  161. sklearnex/basic_statistics/basic_statistics.py +140 -0
  162. sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
  163. sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
  164. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +384 -0
  165. sklearnex/cluster/__init__.py +20 -0
  166. sklearnex/cluster/dbscan.py +192 -0
  167. sklearnex/cluster/k_means.py +383 -0
  168. sklearnex/cluster/tests/test_dbscan.py +38 -0
  169. sklearnex/cluster/tests/test_kmeans.py +153 -0
  170. sklearnex/conftest.py +73 -0
  171. sklearnex/covariance/__init__.py +19 -0
  172. sklearnex/covariance/incremental_covariance.py +368 -0
  173. sklearnex/covariance/tests/test_incremental_covariance.py +226 -0
  174. sklearnex/decomposition/__init__.py +19 -0
  175. sklearnex/decomposition/pca.py +414 -0
  176. sklearnex/decomposition/tests/test_pca.py +58 -0
  177. sklearnex/dispatcher.py +543 -0
  178. sklearnex/doc/third-party-programs.txt +424 -0
  179. sklearnex/ensemble/__init__.py +29 -0
  180. sklearnex/ensemble/_forest.py +2016 -0
  181. sklearnex/ensemble/tests/test_forest.py +120 -0
  182. sklearnex/glob/__main__.py +72 -0
  183. sklearnex/glob/dispatcher.py +101 -0
  184. sklearnex/linear_model/__init__.py +32 -0
  185. sklearnex/linear_model/coordinate_descent.py +30 -0
  186. sklearnex/linear_model/incremental_linear.py +463 -0
  187. sklearnex/linear_model/incremental_ridge.py +418 -0
  188. sklearnex/linear_model/linear.py +302 -0
  189. sklearnex/linear_model/logistic_path.py +17 -0
  190. sklearnex/linear_model/logistic_regression.py +403 -0
  191. sklearnex/linear_model/ridge.py +24 -0
  192. sklearnex/linear_model/tests/test_incremental_linear.py +203 -0
  193. sklearnex/linear_model/tests/test_incremental_ridge.py +153 -0
  194. sklearnex/linear_model/tests/test_linear.py +142 -0
  195. sklearnex/linear_model/tests/test_logreg.py +134 -0
  196. sklearnex/manifold/__init__.py +19 -0
  197. sklearnex/manifold/t_sne.py +21 -0
  198. sklearnex/manifold/tests/test_tsne.py +26 -0
  199. sklearnex/metrics/__init__.py +23 -0
  200. sklearnex/metrics/pairwise.py +22 -0
  201. sklearnex/metrics/ranking.py +20 -0
  202. sklearnex/metrics/tests/test_metrics.py +39 -0
  203. sklearnex/model_selection/__init__.py +21 -0
  204. sklearnex/model_selection/split.py +22 -0
  205. sklearnex/model_selection/tests/test_model_selection.py +34 -0
  206. sklearnex/neighbors/__init__.py +27 -0
  207. sklearnex/neighbors/_lof.py +231 -0
  208. sklearnex/neighbors/common.py +310 -0
  209. sklearnex/neighbors/knn_classification.py +226 -0
  210. sklearnex/neighbors/knn_regression.py +203 -0
  211. sklearnex/neighbors/knn_unsupervised.py +170 -0
  212. sklearnex/neighbors/tests/test_neighbors.py +80 -0
  213. sklearnex/preview/__init__.py +17 -0
  214. sklearnex/preview/covariance/__init__.py +19 -0
  215. sklearnex/preview/covariance/covariance.py +133 -0
  216. sklearnex/preview/covariance/tests/test_covariance.py +66 -0
  217. sklearnex/preview/decomposition/__init__.py +19 -0
  218. sklearnex/preview/decomposition/incremental_pca.py +228 -0
  219. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  220. sklearnex/preview/linear_model/__init__.py +19 -0
  221. sklearnex/preview/linear_model/ridge.py +419 -0
  222. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  223. sklearnex/spmd/__init__.py +25 -0
  224. sklearnex/spmd/basic_statistics/__init__.py +20 -0
  225. sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
  226. sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
  227. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  228. sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +307 -0
  229. sklearnex/spmd/cluster/__init__.py +30 -0
  230. sklearnex/spmd/cluster/dbscan.py +50 -0
  231. sklearnex/spmd/cluster/kmeans.py +21 -0
  232. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  233. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  234. sklearnex/spmd/covariance/__init__.py +20 -0
  235. sklearnex/spmd/covariance/covariance.py +21 -0
  236. sklearnex/spmd/covariance/incremental_covariance.py +37 -0
  237. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  238. sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
  239. sklearnex/spmd/decomposition/__init__.py +20 -0
  240. sklearnex/spmd/decomposition/incremental_pca.py +30 -0
  241. sklearnex/spmd/decomposition/pca.py +21 -0
  242. sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
  243. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  244. sklearnex/spmd/ensemble/__init__.py +19 -0
  245. sklearnex/spmd/ensemble/forest.py +71 -0
  246. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  247. sklearnex/spmd/linear_model/__init__.py +21 -0
  248. sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
  249. sklearnex/spmd/linear_model/linear_model.py +21 -0
  250. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  251. sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +329 -0
  252. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  253. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +166 -0
  254. sklearnex/spmd/neighbors/__init__.py +19 -0
  255. sklearnex/spmd/neighbors/neighbors.py +25 -0
  256. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  257. sklearnex/svm/__init__.py +29 -0
  258. sklearnex/svm/_common.py +328 -0
  259. sklearnex/svm/nusvc.py +332 -0
  260. sklearnex/svm/nusvr.py +148 -0
  261. sklearnex/svm/svc.py +360 -0
  262. sklearnex/svm/svr.py +149 -0
  263. sklearnex/svm/tests/test_svm.py +93 -0
  264. sklearnex/tests/_utils.py +328 -0
  265. sklearnex/tests/_utils_spmd.py +198 -0
  266. sklearnex/tests/test_common.py +54 -0
  267. sklearnex/tests/test_config.py +43 -0
  268. sklearnex/tests/test_memory_usage.py +291 -0
  269. sklearnex/tests/test_monkeypatch.py +276 -0
  270. sklearnex/tests/test_n_jobs_support.py +103 -0
  271. sklearnex/tests/test_parallel.py +48 -0
  272. sklearnex/tests/test_patching.py +385 -0
  273. sklearnex/tests/test_run_to_run_stability.py +296 -0
  274. sklearnex/utils/__init__.py +19 -0
  275. sklearnex/utils/_array_api.py +82 -0
  276. sklearnex/utils/parallel.py +59 -0
  277. sklearnex/utils/tests/test_finite.py +89 -0
  278. sklearnex/utils/validation.py +17 -0
@@ -0,0 +1,192 @@
1
+ # ==============================================================================
2
+ # Copyright 2014 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ # daal4py AdaBoost (Adaptive Boosting) scikit-learn-compatible estimator class
18
+
19
+ import numbers
20
+
21
+ import numpy as np
22
+ from sklearn import preprocessing
23
+ from sklearn.base import BaseEstimator, ClassifierMixin
24
+ from sklearn.utils.multiclass import check_classification_targets
25
+ from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
26
+
27
+ import daal4py as d4p
28
+
29
+ from .._n_jobs_support import control_n_jobs
30
+ from .._utils import getFPType
31
+
32
+
33
+ @control_n_jobs(decorated_methods=["fit", "predict"])
34
+ class AdaBoostClassifier(BaseEstimator, ClassifierMixin):
35
+ def __init__(
36
+ self,
37
+ split_criterion="gini",
38
+ max_tree_depth=1,
39
+ min_observations_in_leaf_node=1,
40
+ max_iterations=100,
41
+ learning_rate=1.0,
42
+ accuracy_threshold=0.01,
43
+ ):
44
+ self.split_criterion = split_criterion
45
+ self.max_tree_depth = max_tree_depth
46
+ self.min_observations_in_leaf_node = min_observations_in_leaf_node
47
+ self.max_iterations = max_iterations
48
+ self.learning_rate = learning_rate
49
+ self.accuracy_threshold = accuracy_threshold
50
+
51
+ def fit(self, X, y):
52
+ if self.split_criterion not in ("gini", "infoGain"):
53
+ raise ValueError(
54
+ 'Parameter "split_criterion" must be ' '"gini" or "infoGain".'
55
+ )
56
+ if (
57
+ not isinstance(self.max_tree_depth, numbers.Integral)
58
+ or self.max_tree_depth < 0
59
+ ):
60
+ raise ValueError(
61
+ 'Parameter "max_tree_depth" must be ' "positive integer value or zero."
62
+ )
63
+ if (
64
+ not isinstance(self.min_observations_in_leaf_node, numbers.Integral)
65
+ or self.min_observations_in_leaf_node <= 0
66
+ ):
67
+ raise ValueError(
68
+ 'Parameter "min_observations_in_leaf_node" must be '
69
+ "non-zero positive integer value."
70
+ )
71
+ if (
72
+ not isinstance(self.max_iterations, numbers.Integral)
73
+ or self.max_iterations <= 0
74
+ ):
75
+ raise ValueError(
76
+ 'Parameter "max_iterations" must be ' "non-zero positive integer value."
77
+ )
78
+ if self.learning_rate <= 0:
79
+ raise ValueError(
80
+ 'Parameter "learning_rate" must be ' "non-zero positive value."
81
+ )
82
+ # it is not clear why it is so but we will get error from
83
+ # Intel(R) oneAPI Data Analytics
84
+ # Library otherwise
85
+ if self.accuracy_threshold < 0 and self.accuracy_threshold >= 1:
86
+ raise ValueError(
87
+ 'Parameter "accuracy_threshold" must be '
88
+ "more or equal to 0 and less than 1."
89
+ )
90
+
91
+ # Check that X and y have correct shape
92
+ X, y = check_X_y(X, y, y_numeric=False, dtype=[np.single, np.double])
93
+
94
+ check_classification_targets(y)
95
+
96
+ # Encode labels
97
+ le = preprocessing.LabelEncoder()
98
+ le.fit(y)
99
+ self.classes_ = le.classes_
100
+ y_ = le.transform(y)
101
+
102
+ # Convert to 2d array
103
+ y_ = y_.reshape((-1, 1))
104
+
105
+ self.n_classes_ = len(self.classes_)
106
+
107
+ self.n_features_in_ = X.shape[1]
108
+
109
+ # Classifier can't train when only one class is present.
110
+ # Trivial case
111
+ if self.n_classes_ == 1:
112
+ return self
113
+
114
+ # Define type of data
115
+ fptype = getFPType(X)
116
+
117
+ # Fit the model
118
+ tr = d4p.decision_tree_classification_training(
119
+ fptype=fptype,
120
+ nClasses=self.n_classes_,
121
+ # this parameter is strict upper bound in DAAL
122
+ maxTreeDepth=self.max_tree_depth + 1,
123
+ minObservationsInLeafNodes=self.min_observations_in_leaf_node,
124
+ splitCriterion=self.split_criterion,
125
+ pruning="none",
126
+ )
127
+
128
+ pr = d4p.decision_tree_classification_prediction(
129
+ fptype=fptype, nClasses=self.n_classes_
130
+ )
131
+
132
+ train_algo = d4p.adaboost_training(
133
+ fptype=fptype,
134
+ nClasses=self.n_classes_,
135
+ weakLearnerTraining=tr,
136
+ weakLearnerPrediction=pr,
137
+ maxIterations=self.max_iterations,
138
+ learningRate=self.learning_rate,
139
+ accuracyThreshold=self.accuracy_threshold,
140
+ )
141
+
142
+ train_result = train_algo.compute(X, y_)
143
+
144
+ # Store the model
145
+ self.daal_model_ = train_result.model
146
+
147
+ # Return the classifier
148
+ return self
149
+
150
+ def predict(self, X):
151
+ check_is_fitted(self)
152
+
153
+ # Input validation
154
+ X = check_array(X, dtype=[np.single, np.double])
155
+ if X.shape[1] != self.n_features_in_:
156
+ raise ValueError("Shape of input is different from what was seen in `fit`")
157
+
158
+ # Trivial case
159
+ if self.n_classes_ == 1:
160
+ return np.full(X.shape[0], self.classes_[0])
161
+
162
+ if not hasattr(self, "daal_model_"):
163
+ raise ValueError(
164
+ (
165
+ "The class {} instance does not have 'daal_model_' attribute set. "
166
+ "Call 'fit' with appropriate arguments before using this method."
167
+ ).format(type(self).__name__)
168
+ )
169
+
170
+ # Define type of data
171
+ fptype = getFPType(X)
172
+
173
+ pr = d4p.decision_tree_classification_prediction(
174
+ fptype=fptype, nClasses=self.n_classes_
175
+ )
176
+
177
+ # Prediction
178
+ predict_algo = d4p.adaboost_prediction(
179
+ fptype=fptype, nClasses=self.n_classes_, weakLearnerPrediction=pr
180
+ )
181
+ predict_result = predict_algo.compute(X, self.daal_model_)
182
+
183
+ prediction = predict_result.prediction
184
+
185
+ # in binary classification labels "-1, 1" are returned but "0, 1" are expected
186
+ if self.n_classes_ == 2:
187
+ prediction[prediction == -1] = 0
188
+
189
+ # Decode labels
190
+ le = preprocessing.LabelEncoder()
191
+ le.classes_ = self.classes_
192
+ return le.inverse_transform(prediction.ravel().astype(np.int64, copy=False))
@@ -0,0 +1,318 @@
1
+ # ==============================================================================
2
+ # Copyright 2014 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ # daal4py GBT scikit-learn-compatible estimator class
18
+
19
+ import numbers
20
+
21
+ import numpy as np
22
+ from sklearn import preprocessing
23
+ from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
24
+ from sklearn.utils import check_random_state
25
+ from sklearn.utils.multiclass import check_classification_targets
26
+ from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
27
+
28
+ import daal4py as d4p
29
+
30
+ from .._n_jobs_support import control_n_jobs
31
+ from .._utils import getFPType
32
+
33
+
34
+ class GBTDAALBase(BaseEstimator, d4p.mb.GBTDAALBaseModel):
35
+ def __init__(
36
+ self,
37
+ split_method="inexact",
38
+ max_iterations=50,
39
+ max_tree_depth=6,
40
+ shrinkage=0.3,
41
+ min_split_loss=0,
42
+ reg_lambda=1,
43
+ observations_per_tree_fraction=1,
44
+ features_per_node=0,
45
+ min_observations_in_leaf_node=5,
46
+ memory_saving_mode=False,
47
+ max_bins=256,
48
+ min_bin_size=5,
49
+ random_state=None,
50
+ ):
51
+ self.split_method = split_method
52
+ self.max_iterations = max_iterations
53
+ self.max_tree_depth = max_tree_depth
54
+ self.shrinkage = shrinkage
55
+ self.min_split_loss = min_split_loss
56
+ self.reg_lambda = reg_lambda
57
+ self.observations_per_tree_fraction = observations_per_tree_fraction
58
+ self.features_per_node = features_per_node
59
+ self.min_observations_in_leaf_node = min_observations_in_leaf_node
60
+ self.memory_saving_mode = memory_saving_mode
61
+ self.max_bins = max_bins
62
+ self.min_bin_size = min_bin_size
63
+ self.random_state = random_state
64
+
65
+ def _check_params(self):
66
+ if self.split_method not in ("inexact", "exact"):
67
+ raise ValueError('Parameter "split_method" must be ' '"inexact" or "exact".')
68
+ if (
69
+ not isinstance(self.max_iterations, numbers.Integral)
70
+ or self.max_iterations <= 0
71
+ ):
72
+ raise ValueError(
73
+ 'Parameter "max_iterations" must be ' "non-zero positive integer value."
74
+ )
75
+ if (
76
+ not isinstance(self.max_tree_depth, numbers.Integral)
77
+ or self.max_tree_depth < 0
78
+ ):
79
+ raise ValueError(
80
+ 'Parameter "max_tree_depth" must be ' "positive integer value or zero."
81
+ )
82
+ if self.shrinkage < 0 or self.shrinkage >= 1:
83
+ raise ValueError(
84
+ 'Parameter "shrinkage" must be ' "more or equal to 0 and less than 1."
85
+ )
86
+ if self.min_split_loss < 0:
87
+ raise ValueError(
88
+ 'Parameter "min_split_loss" must be ' "more or equal to zero."
89
+ )
90
+ if self.reg_lambda < 0:
91
+ raise ValueError('Parameter "reg_lambda" must be ' "more or equal to zero.")
92
+ if (
93
+ self.observations_per_tree_fraction <= 0
94
+ or self.observations_per_tree_fraction > 1
95
+ ):
96
+ raise ValueError(
97
+ 'Parameter "observations_per_tree_fraction" must be '
98
+ "more than 0 and less or equal to 1."
99
+ )
100
+ if (
101
+ not isinstance(self.features_per_node, numbers.Integral)
102
+ or self.features_per_node < 0
103
+ ):
104
+ raise ValueError(
105
+ 'Parameter "features_per_node" must be ' "positive integer value or zero."
106
+ )
107
+ if (
108
+ not isinstance(self.min_observations_in_leaf_node, numbers.Integral)
109
+ or self.min_observations_in_leaf_node <= 0
110
+ ):
111
+ raise ValueError(
112
+ 'Parameter "min_observations_in_leaf_node" must be '
113
+ "non-zero positive integer value."
114
+ )
115
+ if not (isinstance(self.memory_saving_mode, bool)):
116
+ raise ValueError('Parameter "memory_saving_mode" must be ' "boolean value.")
117
+ if not isinstance(self.max_bins, numbers.Integral) or self.max_bins <= 0:
118
+ raise ValueError(
119
+ 'Parameter "max_bins" must be ' "non-zero positive integer value."
120
+ )
121
+ if not isinstance(self.min_bin_size, numbers.Integral) or self.min_bin_size <= 0:
122
+ raise ValueError(
123
+ 'Parameter "min_bin_size" must be ' "non-zero positive integer value."
124
+ )
125
+
126
+ allow_nan_ = False
127
+
128
+ def _more_tags(self):
129
+ return {"allow_nan": self.allow_nan_}
130
+
131
+
132
+ @control_n_jobs(decorated_methods=["fit", "predict"])
133
+ class GBTDAALClassifier(GBTDAALBase, ClassifierMixin):
134
+ def fit(self, X, y):
135
+ # Check the algorithm parameters
136
+ self._check_params()
137
+
138
+ # Check that X and y have correct shape
139
+ X, y = check_X_y(X, y, y_numeric=False, dtype=[np.single, np.double])
140
+
141
+ check_classification_targets(y)
142
+
143
+ # Encode labels
144
+ le = preprocessing.LabelEncoder()
145
+ le.fit(y)
146
+ self.classes_ = le.classes_
147
+ y_ = le.transform(y)
148
+
149
+ # Convert to 2d array
150
+ y_ = y_.reshape((-1, 1))
151
+
152
+ self.n_outputs_ = y_.shape[1]
153
+
154
+ self.n_classes_ = len(self.classes_)
155
+
156
+ self.n_features_in_ = X.shape[1]
157
+
158
+ # Classifier can't train when only one class is present.
159
+ # Trivial case
160
+ if self.n_classes_ == 1:
161
+ return self
162
+
163
+ # Get random seed
164
+ rs_ = check_random_state(self.random_state)
165
+ seed_ = rs_.randint(0, np.iinfo("i").max)
166
+
167
+ # Define type of data
168
+ fptype = getFPType(X)
169
+
170
+ # Fit the model
171
+ train_algo = d4p.gbt_classification_training(
172
+ fptype=fptype,
173
+ nClasses=self.n_classes_,
174
+ splitMethod=self.split_method,
175
+ maxIterations=self.max_iterations,
176
+ maxTreeDepth=self.max_tree_depth,
177
+ shrinkage=self.shrinkage,
178
+ minSplitLoss=self.min_split_loss,
179
+ lambda_=self.reg_lambda,
180
+ observationsPerTreeFraction=self.observations_per_tree_fraction,
181
+ featuresPerNode=self.features_per_node,
182
+ minObservationsInLeafNode=self.min_observations_in_leaf_node,
183
+ memorySavingMode=self.memory_saving_mode,
184
+ maxBins=self.max_bins,
185
+ minBinSize=self.min_bin_size,
186
+ engine=d4p.engines_mcg59(seed=seed_),
187
+ )
188
+ train_result = train_algo.compute(X, y_)
189
+
190
+ # Store the model
191
+ self.daal_model_ = train_result.model
192
+
193
+ # Return the classifier
194
+ return self
195
+
196
+ def _predict(
197
+ self, X, resultsToEvaluate, pred_contribs=False, pred_interactions=False
198
+ ):
199
+ # Input validation
200
+ if not self.allow_nan_:
201
+ X = check_array(X, dtype=[np.single, np.double])
202
+ else:
203
+ X = check_array(X, dtype=[np.single, np.double], force_all_finite="allow-nan")
204
+
205
+ # Check is fit had been called
206
+ check_is_fitted(self, ["n_features_in_", "n_classes_"])
207
+
208
+ # Trivial case
209
+ if self.n_classes_ == 1:
210
+ return np.full(X.shape[0], self.classes_[0])
211
+
212
+ fptype = getFPType(X)
213
+ predict_result = self._predict_classification(
214
+ X, fptype, resultsToEvaluate, pred_contribs, pred_interactions
215
+ )
216
+
217
+ if resultsToEvaluate == "computeClassLabels" and not (
218
+ pred_contribs or pred_interactions
219
+ ):
220
+ # Decode labels
221
+ le = preprocessing.LabelEncoder()
222
+ le.classes_ = self.classes_
223
+ return le.inverse_transform(predict_result)
224
+ return predict_result
225
+
226
+ def predict(self, X, pred_contribs=False, pred_interactions=False):
227
+ return self._predict(X, "computeClassLabels", pred_contribs, pred_interactions)
228
+
229
+ def predict_proba(self, X):
230
+ return self._predict(X, "computeClassProbabilities")
231
+
232
+ def predict_log_proba(self, X):
233
+ proba = self.predict_proba(X)
234
+
235
+ if self.n_outputs_ == 1:
236
+ return np.log(proba)
237
+
238
+ for k in range(self.n_outputs_):
239
+ proba[k] = np.log(proba[k])
240
+
241
+ return proba
242
+
243
+ @staticmethod
244
+ def convert_model(model):
245
+ gbm = GBTDAALClassifier()
246
+ gbm._convert_model(model)
247
+
248
+ gbm.classes_ = model.classes_
249
+ gbm.allow_nan_ = True
250
+ return gbm
251
+
252
+
253
+ @control_n_jobs(decorated_methods=["fit", "predict"])
254
+ class GBTDAALRegressor(GBTDAALBase, RegressorMixin):
255
+ def fit(self, X, y):
256
+ # Check the algorithm parameters
257
+ self._check_params()
258
+
259
+ # Check that X and y have correct shape
260
+ X, y = check_X_y(X, y, y_numeric=True, dtype=[np.single, np.double])
261
+
262
+ # Convert to 2d array
263
+ y_ = y.reshape((-1, 1))
264
+
265
+ self.n_features_in_ = X.shape[1]
266
+
267
+ # Get random seed
268
+ rs_ = check_random_state(self.random_state)
269
+ seed_ = rs_.randint(0, np.iinfo("i").max)
270
+
271
+ # Define type of data
272
+ fptype = getFPType(X)
273
+
274
+ # Fit the model
275
+ train_algo = d4p.gbt_regression_training(
276
+ fptype=fptype,
277
+ splitMethod=self.split_method,
278
+ maxIterations=self.max_iterations,
279
+ maxTreeDepth=self.max_tree_depth,
280
+ shrinkage=self.shrinkage,
281
+ minSplitLoss=self.min_split_loss,
282
+ lambda_=self.reg_lambda,
283
+ observationsPerTreeFraction=self.observations_per_tree_fraction,
284
+ featuresPerNode=self.features_per_node,
285
+ minObservationsInLeafNode=self.min_observations_in_leaf_node,
286
+ memorySavingMode=self.memory_saving_mode,
287
+ maxBins=self.max_bins,
288
+ minBinSize=self.min_bin_size,
289
+ engine=d4p.engines_mcg59(seed=seed_),
290
+ )
291
+ train_result = train_algo.compute(X, y_)
292
+
293
+ # Store the model
294
+ self.daal_model_ = train_result.model
295
+
296
+ # Return the classifier
297
+ return self
298
+
299
+ def predict(self, X, pred_contribs=False, pred_interactions=False):
300
+ # Input validation
301
+ if not self.allow_nan_:
302
+ X = check_array(X, dtype=[np.single, np.double])
303
+ else:
304
+ X = check_array(X, dtype=[np.single, np.double], force_all_finite="allow-nan")
305
+
306
+ # Check is fit had been called
307
+ check_is_fitted(self, ["n_features_in_"])
308
+
309
+ fptype = getFPType(X)
310
+ return self._predict_regression(X, fptype, pred_contribs, pred_interactions)
311
+
312
+ @staticmethod
313
+ def convert_model(model):
314
+ gbm = GBTDAALRegressor()
315
+ gbm._convert_model(model)
316
+
317
+ gbm.allow_nan_ = True
318
+ return gbm
@@ -0,0 +1,27 @@
1
+ # ==============================================================================
2
+ # Copyright 2014 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ from ._forest import RandomForestClassifier, RandomForestRegressor
18
+ from .AdaBoostClassifier import AdaBoostClassifier
19
+ from .GBTDAAL import GBTDAALClassifier, GBTDAALRegressor
20
+
21
+ __all__ = [
22
+ "RandomForestClassifier",
23
+ "RandomForestRegressor",
24
+ "GBTDAALClassifier",
25
+ "GBTDAALRegressor",
26
+ "AdaBoostClassifier",
27
+ ]