obliquetree 1.0.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of obliquetree might be problematic. Click here for more details.

obliquetree/_pywrap.py ADDED
@@ -0,0 +1,730 @@
1
+ from __future__ import annotations
2
+
3
+ from .src.base import TreeClassifier
4
+
5
+ from typing import List, Optional
6
+ from numpy.typing import ArrayLike, NDArray
7
+ import numpy as np
8
+ from math import comb
9
+ import warnings
10
+
11
+
12
+ def formatwarning(message, category, filename, lineno, line=None, **kwargs):
13
+ return f"UserWarning: {message}\n"
14
+
15
+
16
+ warnings.formatwarning = formatwarning
17
+
18
+
19
+ class BaseTree(TreeClassifier):
20
+ """
21
+ Base class for decision tree classifiers and regressors.
22
+
23
+ This class provides foundational functionality for building decision trees,
24
+ including parameter validation, data preprocessing, and interfacing with the
25
+ underlying `TreeClassifier`. It handles both classification and regression
26
+ tasks based on the `task` parameter.
27
+
28
+ Parameters
29
+ ----------
30
+ task : bool
31
+ - If `True`, construct regression tree.
32
+ - If `False`, construct classification tree.
33
+
34
+ max_depth : int
35
+ Maximum depth of the tree. Controls model complexity and prevents overfitting.
36
+
37
+ - If `-1`: Expands until leaves are pure or contain fewer than `min_samples_split` samples.
38
+ - If `int > 0`: Limits the tree to the specified depth.
39
+
40
+ min_samples_leaf : int
41
+ Minimum number of samples required at leaf nodes.
42
+
43
+ min_samples_split : int
44
+ Minimum number of samples required to split an internal node.
45
+
46
+ min_impurity_decrease : float
47
+ Minimum required decrease in impurity to create a split.
48
+
49
+ ccp_alpha : float
50
+ Complexity parameter for Minimal Cost-Complexity Pruning.
51
+
52
+ categories : List[int]
53
+ Indices of categorical features in the dataset.
54
+
55
+ use_oblique : bool
56
+ - If `True`, enables oblique splits using linear combinations of features.
57
+ - If `False`, uses traditional axis-aligned splits only.
58
+
59
+ random_state : int
60
+ Seed for random number generation in oblique splits.
61
+
62
+ - Only used when `use_oblique=True`.
63
+
64
+ n_pair : int
65
+ Number of features to combine in oblique splits.
66
+
67
+ - Only used when `use_oblique=True`.
68
+
69
+ gamma : float
70
+ Separation strength parameter for oblique splits.
71
+
72
+ - Only used when `use_oblique=True`.
73
+
74
+ max_iter : int
75
+ Maximum iterations for L-BFGS optimization in oblique splits.
76
+
77
+ - Only used when `use_oblique=True`.
78
+
79
+ relative_change : float
80
+ Early stopping threshold for L-BFGS optimization.
81
+
82
+ - Only used when `use_oblique=True`.
83
+ """
84
+
85
+ def __init__(
86
+ self,
87
+ task: bool,
88
+ max_depth: int,
89
+ min_samples_leaf: int,
90
+ min_samples_split: int,
91
+ min_impurity_decrease: float,
92
+ ccp_alpha: float,
93
+ categories: Optional[List[int]],
94
+ use_oblique: bool,
95
+ random_state: Optional[int],
96
+ n_pair: int,
97
+ gamma: float,
98
+ max_iter: int,
99
+ relative_change: float,
100
+ ) -> None:
101
+ # Validate and assign parameters
102
+ self.task = task
103
+ self.use_oblique = self._validate_use_oblique(use_oblique)
104
+ self.max_depth = self._validate_max_depth(max_depth)
105
+ self.min_samples_leaf = self._validate_min_samples_leaf(min_samples_leaf)
106
+ self.min_samples_split = self._validate_min_samples_split(min_samples_split)
107
+ self.min_impurity_decrease = self._validate_min_impurity_decrease(
108
+ min_impurity_decrease
109
+ )
110
+ self.ccp_alpha = self._validate_ccp_alpha(ccp_alpha)
111
+ self.n_pair = self._validate_n_pair(n_pair)
112
+ self.gamma = self._validate_gamma(gamma)
113
+ self.max_iter = self._validate_max_iter(max_iter)
114
+ self.relative_change = self._validate_relative_change(
115
+ relative_change, self.use_oblique
116
+ )
117
+ self.random_state = self._validate_random_state(random_state)
118
+ self.categories = self._validate_categories(categories)
119
+ self._fit = False
120
+
121
+ # Initialize the TreeClassifier
122
+ super().__init__(
123
+ self.max_depth,
124
+ self.min_samples_leaf,
125
+ self.min_samples_split,
126
+ self.min_impurity_decrease,
127
+ self.random_state,
128
+ self.n_pair,
129
+ self.gamma,
130
+ self.max_iter,
131
+ self.relative_change,
132
+ self.categories,
133
+ self.ccp_alpha,
134
+ self.use_oblique,
135
+ self.task,
136
+ 1,
137
+ )
138
+
139
+ def __getstate__(self):
140
+ """Return the state for pickling."""
141
+ state = super().__getstate__()
142
+ state["_fit"] = self._fit
143
+
144
+ return state
145
+
146
+ def __setstate__(self, state):
147
+ """Restore the state from pickle."""
148
+ # Extract special attributes
149
+ _fit = state.pop("_fit", False)
150
+ super().__setstate__(state)
151
+
152
+ # Restore state directly without re-initialization
153
+ self.__dict__.update(state)
154
+ self._fit = _fit
155
+
156
+ def __repr__(self):
157
+ param_str = (
158
+ f"use_oblique={getattr(self, 'use_oblique', None)}, "
159
+ f"max_depth={getattr(self, 'max_depth', None)}, "
160
+ f"min_samples_leaf={getattr(self, 'min_samples_leaf', None)}, "
161
+ f"min_samples_split={getattr(self, 'min_samples_split', None)}, "
162
+ f"min_impurity_decrease={getattr(self, 'min_impurity_decrease', None)}, "
163
+ f"ccp_alpha={getattr(self, 'ccp_alpha', None)}, "
164
+ f"categories={getattr(self, 'categories', None)}, "
165
+ f"random_state={getattr(self, 'random_state', None)}, "
166
+ f"n_pair={getattr(self, 'n_pair', None)}, "
167
+ f"gamma={getattr(self, 'gamma', None)}, "
168
+ f"max_iter={getattr(self, 'max_iter', None)}, "
169
+ f"relative_change={getattr(self, 'relative_change', None)}"
170
+ )
171
+ return f"{self.__class__.__name__}({param_str})"
172
+
173
+ def _validate_max_depth(self, max_depth: int) -> int:
174
+ if not isinstance(max_depth, int):
175
+ raise ValueError("max_depth must be an integer")
176
+ if max_depth < -1:
177
+ raise ValueError("max_depth must be >= -1")
178
+ return 255 if max_depth == -1 else min(max_depth, 255)
179
+
180
+ def _validate_min_samples_leaf(self, min_samples_leaf: int) -> int:
181
+ if not isinstance(min_samples_leaf, int):
182
+ raise ValueError("min_samples_leaf must be an integer")
183
+ if min_samples_leaf < 1:
184
+ raise ValueError("min_samples_leaf must be >= 1")
185
+ return min_samples_leaf
186
+
187
+ def _validate_min_samples_split(self, min_samples_split: int) -> int:
188
+ if not isinstance(min_samples_split, int):
189
+ raise ValueError("min_samples_split must be an integer")
190
+ if min_samples_split < 2:
191
+ raise ValueError("min_samples_split must be >= 2")
192
+ return min_samples_split
193
+
194
+ def _validate_min_impurity_decrease(self, min_impurity_decrease: float) -> float:
195
+ if not isinstance(min_impurity_decrease, (int, float)):
196
+ raise ValueError("min_impurity_decrease must be a number")
197
+ if min_impurity_decrease < 0.0:
198
+ raise ValueError("min_impurity_decrease must be >= 0.0")
199
+ return float(min_impurity_decrease)
200
+
201
+ def _validate_ccp_alpha(self, ccp_alpha: float) -> float:
202
+ if not isinstance(ccp_alpha, (int, float)):
203
+ raise ValueError("ccp_alpha must be a number")
204
+ if ccp_alpha < 0.0:
205
+ raise ValueError("ccp_alpha must be >= 0.0")
206
+ return float(ccp_alpha)
207
+
208
+ def _validate_n_pair(self, n_pair: int) -> int:
209
+ if not isinstance(n_pair, int):
210
+ raise ValueError("n_pair must be an integer")
211
+ if n_pair < 2:
212
+ raise ValueError("n_pair must be >= 2")
213
+ return n_pair
214
+
215
+ def _validate_gamma(self, gamma: float) -> float:
216
+ if not isinstance(gamma, (int, float)):
217
+ raise ValueError("gamma must be a number")
218
+ if gamma <= 0.0:
219
+ raise ValueError("gamma must be > 0.0")
220
+ return float(gamma)
221
+
222
+ def _validate_max_iter(self, max_iter: int) -> int:
223
+ if not isinstance(max_iter, int):
224
+ raise ValueError("max_iter must be an integer")
225
+ if max_iter < 1:
226
+ raise ValueError("max_iter must be >= 1")
227
+ return max_iter
228
+
229
+ def _validate_relative_change(
230
+ self, relative_change: float, use_oblique: bool
231
+ ) -> float:
232
+ if not isinstance(relative_change, (int, float)):
233
+ raise ValueError("relative_change must be a number")
234
+ if relative_change < 0.0:
235
+ raise ValueError("relative_change must be >= 0.0")
236
+ if use_oblique and relative_change <= 1e-5:
237
+ warnings.warn(
238
+ "relative_change is set very low. This may prolong the oblique training time."
239
+ )
240
+ return float(relative_change)
241
+
242
+ def _validate_random_state(self, random_state: Optional[int]) -> int:
243
+ if random_state is not None and not isinstance(random_state, int):
244
+ raise ValueError("random_state must be None or an integer")
245
+ return (
246
+ random_state
247
+ if random_state is not None
248
+ else np.random.randint(0, np.iinfo(np.int32).max)
249
+ )
250
+
251
+ def _validate_categories(self, categories: Optional[List[int]]) -> List[int]:
252
+ if categories is not None:
253
+ if not isinstance(categories, (list, tuple)):
254
+ raise ValueError("categories must be None or a list/tuple of integers")
255
+ if not all(isinstance(x, int) for x in categories):
256
+ raise ValueError("All elements in categories must be integers")
257
+ if any(x < 0 for x in categories):
258
+ raise ValueError("All elements in categories must be non-negative integers")
259
+ return list(categories)
260
+ return []
261
+
262
+
263
+ def _validate_use_oblique(self, use_oblique: bool) -> bool:
264
+ if not isinstance(use_oblique, bool):
265
+ raise ValueError("use_oblique must be a boolean")
266
+ return use_oblique
267
+
268
+ def fit(
269
+ self, X: ArrayLike, y: ArrayLike, sample_weight: Optional[ArrayLike] = None
270
+ ) -> "BaseTree":
271
+ """
272
+ Fit the decision tree to the training data.
273
+
274
+ Parameters
275
+ ----------
276
+ X : ArrayLike
277
+ Training input samples of shape (n_samples, n_features).
278
+ y : ArrayLike
279
+ Target values of shape (n_samples,).
280
+ sample_weight : Optional[ArrayLike], default=None
281
+ Sample weights of shape (n_samples,). If None, all samples are given equal weight.
282
+
283
+ Returns
284
+ -------
285
+ self : BaseTree
286
+ Fitted estimator.
287
+
288
+ Raises
289
+ ------
290
+ ValueError
291
+ If input data is invalid or contains NaN/Inf values where not allowed.
292
+ """
293
+ X = np.asarray(X, order="F", dtype=np.float64)
294
+ y = np.asarray(y, order="C", dtype=np.float64)
295
+
296
+ # Validate target vector
297
+ self._validate_target(y)
298
+
299
+ # Validate sample weights
300
+ sample_weight = self._process_sample_weight(sample_weight, y.shape[0])
301
+
302
+ # Validate feature matrix
303
+ self._validate_features(X)
304
+
305
+ # Classification or Regression setup
306
+ self.n_classes = self._setup_task(y)
307
+
308
+ # Validate categorical features
309
+ self._validate_categories_in_data(X)
310
+
311
+ # Warn if the number of feature combinations is too large for oblique splits
312
+ if self.use_oblique:
313
+ self._warn_large_combinations(X.shape[1] - len(self.categories))
314
+
315
+ super().fit(X, y, sample_weight)
316
+
317
+ self._fit = True
318
+
319
+ return self
320
+
321
+ def _validate_target(self, y: NDArray) -> None:
322
+ if y.ndim != 1:
323
+ raise ValueError("y must be 1-dimensional")
324
+
325
+ if self.task: # Regression
326
+ return
327
+ else: # Classification
328
+ unique_labels = np.unique(y)
329
+ expected_labels = np.arange(len(unique_labels))
330
+ if not np.array_equal(unique_labels, expected_labels):
331
+ raise ValueError(
332
+ "Classification labels must start from 0 and increment by 1"
333
+ )
334
+
335
+ def _process_sample_weight(
336
+ self, sample_weight: Optional[ArrayLike], n_samples: int
337
+ ) -> NDArray:
338
+ if sample_weight is not None:
339
+ sample_weight = np.asarray(sample_weight, order="C", dtype=np.float64)
340
+
341
+ if sample_weight.shape != (n_samples,):
342
+ raise ValueError(
343
+ f"sample_weight has incompatible shape: {sample_weight.shape} "
344
+ f"while y has shape ({n_samples},)"
345
+ )
346
+
347
+ if (
348
+ np.any(np.isnan(sample_weight))
349
+ or np.any(np.isinf(sample_weight))
350
+ or np.any(sample_weight < 0)
351
+ ):
352
+ raise ValueError(
353
+ "sample_weight cannot contain negative, NaN or inf values"
354
+ )
355
+
356
+ min_val = np.min(sample_weight)
357
+ if min_val != 1:
358
+ sample_weight = sample_weight / min_val
359
+
360
+ else:
361
+ sample_weight = np.ones(n_samples, dtype=np.float64)
362
+
363
+ return sample_weight
364
+
365
+ def _validate_features(self, X: NDArray) -> None:
366
+ if self.use_oblique:
367
+ if np.any(np.isnan(X)) or np.any(np.isinf(X)):
368
+ raise ValueError(
369
+ "X cannot contain NaN or Inf values when use_oblique is False"
370
+ )
371
+
372
+ max_possible_pairs = (
373
+ X.shape[1] - len(self.categories) if self.categories else X.shape[1]
374
+ )
375
+
376
+ if self.categories:
377
+ if max_possible_pairs < 2:
378
+ warnings.warn(
379
+ f"Total features: {X.shape[1]}, categorical features: {len(self.categories)}. "
380
+ f"The number of possible feature pairs ({max_possible_pairs}) is less than 2. "
381
+ f"As a result, 'use_oblique' set 'False'."
382
+ )
383
+ self.use_oblique = False
384
+
385
+ elif self.n_pair > max_possible_pairs:
386
+ warnings.warn(
387
+ f"Total features: {X.shape[1]}, categorical features: {len(self.categories)}. "
388
+ f"n_pair ({self.n_pair}) exceeds the usable features, adjusting n_pair to {max_possible_pairs}."
389
+ )
390
+ self.n_pair = max_possible_pairs
391
+ else: # If there are no categorical features
392
+ if self.n_pair > X.shape[1]:
393
+ warnings.warn(
394
+ f"n_pair ({self.n_pair}) exceeds the total features ({X.shape[1]}). "
395
+ f"Adjusting n_pair to {X.shape[1]}."
396
+ )
397
+ self.n_pair = X.shape[1]
398
+
399
+ def _setup_task(self, y: NDArray) -> int:
400
+ if not self.task:
401
+ n_classes = len(np.unique(y))
402
+ return n_classes
403
+ else:
404
+ return 1 # Regression
405
+
406
+ def _validate_categories_in_data(self, X: NDArray) -> None:
407
+ if self.categories:
408
+ for col_idx in self.categories:
409
+ # Kategori indeksi matris boyutlarını aşmamalı
410
+ if col_idx >= X.shape[1]:
411
+ raise ValueError(
412
+ f"Category column index {col_idx} exceeds X dimensions ({X.shape[1]} features)."
413
+ )
414
+
415
+ # Kategorik sütunlardaki değerler negatif olmamalı
416
+ if (X[:, self.categories] < 0).any():
417
+ raise ValueError(
418
+ "X contains negative values in the specified category columns, which are not allowed."
419
+ )
420
+
421
+ def _warn_large_combinations(self, n_features: int) -> None:
422
+ total_combinations = comb(n_features, self.n_pair)
423
+ if total_combinations > 1000: # Optimal threshold can be adjusted
424
+ warnings.warn(
425
+ "The number of feature combinations for oblique splits is very large, which may lead to long training times. "
426
+ "Consider reducing `n_pair` or the number of features."
427
+ )
428
+
429
+ def predict(self, X: ArrayLike) -> NDArray:
430
+ """
431
+ Predict target values for the input samples.
432
+
433
+ Parameters
434
+ ----------
435
+ X : ArrayLike
436
+ Input samples of shape (n_samples, n_features).
437
+
438
+ Returns
439
+ -------
440
+ NDArray
441
+ Predicted values.
442
+
443
+ Raises
444
+ ------
445
+ ValueError
446
+ If the model has not been fitted yet.
447
+ """
448
+ if not self._fit:
449
+ raise ValueError(
450
+ "The model has not been fitted yet. Please call `fit` first."
451
+ )
452
+
453
+ X = np.asarray(X, order="F", dtype=np.float64)
454
+ return super().predict(X)
455
+
456
+
457
+ class Classifier(BaseTree):
458
+ def __init__(
459
+ self,
460
+ use_oblique: bool = True,
461
+ max_depth: int = -1,
462
+ min_samples_leaf: int = 1,
463
+ min_samples_split: int = 2,
464
+ min_impurity_decrease: float = 0.0,
465
+ ccp_alpha: float = 0.0,
466
+ categories: Optional[List[int]] = None,
467
+ random_state: Optional[int] = None,
468
+ n_pair: int = 2,
469
+ gamma: float = 1.0,
470
+ max_iter: int = 100,
471
+ relative_change: float = 0.001,
472
+ ):
473
+ """
474
+ A decision tree classifier supporting both traditional axis-aligned and oblique splits.
475
+
476
+ This advanced decision tree classifier extends traditional regression trees by supporting oblique
477
+ splits (linear combinations of features) alongside conventional axis-aligned splits. It offers enhanced
478
+ flexibility in modeling continuous outputs while maintaining the interpretability of decision trees.
479
+
480
+ Parameters
481
+ ----------
482
+ use_oblique : bool, default=True
483
+ - If `True`, enables oblique splits using linear combinations of features.
484
+ - If `False`, uses traditional axis-aligned splits only.
485
+
486
+ max_depth : int, default=-1
487
+ Maximum depth of the tree. Controls model complexity and prevents overfitting.
488
+
489
+ - If `-1`: Expands until leaves are pure or contain fewer than `min_samples_split` samples.
490
+ - If `int > 0`: Limits the tree to the specified depth.
491
+
492
+ min_samples_leaf : int, default=1
493
+ Minimum number of samples required at leaf nodes.
494
+
495
+ min_samples_split : int, default=2
496
+ Minimum number of samples required to split an internal node.
497
+
498
+ min_impurity_decrease : float, default=0.0
499
+ Minimum required decrease in impurity to create a split.
500
+
501
+ ccp_alpha : float, default=0.0
502
+ Complexity parameter for Minimal Cost-Complexity Pruning.
503
+
504
+ categories : List[int], default=None
505
+ Indices of categorical features in the dataset.
506
+
507
+ random_state : int, default=None
508
+ Seed for random number generation in oblique splits.
509
+
510
+ - Only used when `use_oblique=True`.
511
+
512
+ n_pair : int, default=2
513
+ Number of features to combine in oblique splits.
514
+
515
+ - Only used when `use_oblique=True`.
516
+
517
+ gamma : float, default=1.0
518
+ Separation strength parameter for oblique splits.
519
+
520
+ - Only used when `use_oblique=True`.
521
+
522
+ max_iter : int, default=100
523
+ Maximum iterations for L-BFGS optimization in oblique splits.
524
+
525
+ - Only used when `use_oblique=True`.
526
+
527
+ relative_change : float, default=0.001
528
+ Early stopping threshold for L-BFGS optimization.
529
+
530
+ - Only used when `use_oblique=True`.
531
+ """
532
+ super().__init__(
533
+ task=False,
534
+ max_depth=max_depth,
535
+ min_samples_leaf=min_samples_leaf,
536
+ min_samples_split=min_samples_split,
537
+ min_impurity_decrease=min_impurity_decrease,
538
+ ccp_alpha=ccp_alpha,
539
+ categories=categories,
540
+ use_oblique=use_oblique,
541
+ random_state=random_state,
542
+ n_pair=n_pair,
543
+ gamma=gamma,
544
+ max_iter=max_iter,
545
+ relative_change=relative_change,
546
+ )
547
+
548
+ def fit(
549
+ self, X: ArrayLike, y: ArrayLike, sample_weight: Optional[ArrayLike] = None
550
+ ) -> "Classifier":
551
+ """
552
+ Build a decision tree classifier from the training set (X, y).
553
+
554
+ Parameters
555
+ ----------
556
+ X : array-like of shape (n_samples, n_features)
557
+ The training input samples.
558
+ y : array-like of shape (n_samples,)
559
+ Target values (class labels).
560
+ sample_weight : array-like of shape (n_samples,), default=None
561
+ Sample weights.
562
+
563
+ Returns
564
+ -------
565
+ self : Classifier
566
+ Fitted estimator.
567
+ """
568
+ return super().fit(X, y, sample_weight)
569
+
570
+ def predict(self, X: ArrayLike) -> NDArray:
571
+ """
572
+ Predict regression target for X.
573
+
574
+ Parameters
575
+ ----------
576
+ X : array-like of shape (n_samples, n_features)
577
+ The input samples to predict.
578
+
579
+ Returns
580
+ -------
581
+ y : NDArray of shape (n_samples,)
582
+ The predicted values.
583
+ """
584
+ return np.argmax(super().predict(X), axis=1)
585
+
586
+ def predict_proba(self, X: ArrayLike) -> NDArray:
587
+ """
588
+ Predict class probabilities for X.
589
+
590
+ Parameters
591
+ ----------
592
+ X : array-like of shape (n_samples, n_features)
593
+ The input samples.
594
+
595
+ Returns
596
+ -------
597
+ proba : NDArray of shape (n_samples, n_classes)
598
+ The class probabilities of the input samples.
599
+ """
600
+ return super().predict(X)
601
+
602
+
603
+ class Regressor(BaseTree):
604
+ def __init__(
605
+ self,
606
+ use_oblique: bool = True,
607
+ max_depth: int = -1,
608
+ min_samples_leaf: int = 1,
609
+ min_samples_split: int = 2,
610
+ min_impurity_decrease: float = 0.0,
611
+ ccp_alpha: float = 0.0,
612
+ categories: Optional[List[int]] = None,
613
+ random_state: Optional[int] = None,
614
+ n_pair: int = 2,
615
+ gamma: float = 1.0,
616
+ max_iter: int = 100,
617
+ relative_change: float = 0.001,
618
+ ):
619
+ """
620
+ A decision tree regressor supporting both traditional axis-aligned and oblique splits.
621
+
622
+ This advanced decision tree regressor extends traditional regression trees by supporting oblique
623
+ splits (linear combinations of features) alongside conventional axis-aligned splits. It offers enhanced
624
+ flexibility in modeling continuous outputs while maintaining the interpretability of decision trees.
625
+
626
+ Parameters
627
+ ----------
628
+ use_oblique : bool, default=True
629
+ - If `True`, enables oblique splits using linear combinations of features.
630
+ - If `False`, uses traditional axis-aligned splits only.
631
+
632
+ max_depth : int, default=-1
633
+ Maximum depth of the tree. Controls model complexity and prevents overfitting.
634
+
635
+ - If `-1`: Expands until leaves are pure or contain fewer than `min_samples_split` samples.
636
+ - If `int > 0`: Limits the tree to the specified depth.
637
+
638
+ min_samples_leaf : int, default=1
639
+ Minimum number of samples required at leaf nodes.
640
+
641
+ min_samples_split : int, default=2
642
+ Minimum number of samples required to split an internal node.
643
+
644
+ min_impurity_decrease : float, default=0.0
645
+ Minimum required decrease in impurity to create a split.
646
+
647
+ ccp_alpha : float, default=0.0
648
+ Complexity parameter for Minimal Cost-Complexity Pruning.
649
+
650
+ categories : List[int], default=None
651
+ Indices of categorical features in the dataset.
652
+
653
+ random_state : int, default=None
654
+ Seed for random number generation in oblique splits.
655
+
656
+ - Only used when `use_oblique=True`.
657
+
658
+ n_pair : int, default=2
659
+ Number of features to combine in oblique splits.
660
+
661
+ - Only used when `use_oblique=True`.
662
+
663
+ gamma : float, default=1.0
664
+ Separation strength parameter for oblique splits.
665
+
666
+ - Only used when `use_oblique=True`.
667
+
668
+ max_iter : int, default=100
669
+ Maximum iterations for L-BFGS optimization in oblique splits.
670
+
671
+ - Only used when `use_oblique=True`.
672
+
673
+ relative_change : float, default=0.001
674
+ Early stopping threshold for L-BFGS optimization.
675
+
676
+ - Only used when `use_oblique=True`.
677
+ """
678
+ super().__init__(
679
+ task=True,
680
+ max_depth=max_depth,
681
+ min_samples_leaf=min_samples_leaf,
682
+ min_samples_split=min_samples_split,
683
+ min_impurity_decrease=min_impurity_decrease,
684
+ ccp_alpha=ccp_alpha,
685
+ categories=categories,
686
+ use_oblique=use_oblique,
687
+ random_state=random_state,
688
+ n_pair=n_pair,
689
+ gamma=gamma,
690
+ max_iter=max_iter,
691
+ relative_change=relative_change,
692
+ )
693
+
694
+ def fit(
695
+ self, X: ArrayLike, y: ArrayLike, sample_weight: Optional[ArrayLike] = None
696
+ ) -> "Regressor":
697
+ """
698
+ Build a decision tree regressor from the training set (X, y).
699
+
700
+ Parameters
701
+ ----------
702
+ X : array-like of shape (n_samples, n_features)
703
+ The training input samples.
704
+ y : array-like of shape (n_samples,)
705
+ Target values.
706
+ sample_weight : array-like of shape (n_samples,), optional, default=None
707
+ Sample weights.
708
+
709
+ Returns
710
+ -------
711
+ self : Regressor
712
+ Fitted estimator.
713
+ """
714
+ return super().fit(X, y, sample_weight)
715
+
716
+ def predict(self, X: ArrayLike) -> NDArray:
717
+ """
718
+ Predict regression target for X.
719
+
720
+ Parameters
721
+ ----------
722
+ X : array-like of shape (n_samples, n_features)
723
+ The input samples to predict.
724
+
725
+ Returns
726
+ -------
727
+ y : NDArray of shape (n_samples,)
728
+ The predicted values.
729
+ """
730
+ return super().predict(X).ravel()