obliquetree 1.0.5__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
obliquetree/_pywrap.py ADDED
@@ -0,0 +1,769 @@
1
+ from __future__ import annotations
2
+
3
+ from .src.base import TreeClassifier
4
+
5
+ from typing import List, Optional
6
+ from numpy.typing import ArrayLike, NDArray
7
+ import numpy as np
8
+ from math import comb
9
+ import warnings
10
+
11
+
12
+ def formatwarning(message, category, filename, lineno, line=None, **kwargs):
13
+ return f"UserWarning: {message}\n"
14
+
15
+
16
+ warnings.formatwarning = formatwarning
17
+
18
+
19
+ class BaseTree(TreeClassifier):
20
+ """
21
+ Base class for decision tree classifiers and regressors.
22
+
23
+ This class provides foundational functionality for building decision trees,
24
+ including parameter validation, data preprocessing, and interfacing with the
25
+ underlying `TreeClassifier`. It handles both classification and regression
26
+ tasks based on the `task` parameter.
27
+
28
+ Parameters
29
+ ----------
30
+ task : bool
31
+ - If `True`, construct regression tree.
32
+ - If `False`, construct classification tree.
33
+
34
+ max_depth : int
35
+ Maximum depth of the tree. Controls model complexity and prevents overfitting.
36
+
37
+ - If `-1`: Expands until leaves are pure or contain fewer than `min_samples_split` samples.
38
+ - If `int > 0`: Limits the tree to the specified depth.
39
+
40
+ min_samples_leaf : int
41
+ Minimum number of samples required at leaf nodes.
42
+
43
+ min_samples_split : int
44
+ Minimum number of samples required to split an internal node.
45
+
46
+ min_impurity_decrease : float
47
+ Minimum required decrease in impurity to create a split.
48
+
49
+ ccp_alpha : float
50
+ Complexity parameter for Minimal Cost-Complexity Pruning.
51
+
52
+ categories : List[int]
53
+ Indices of categorical features in the dataset.
54
+
55
+ use_oblique : bool
56
+ - If `True`, enables oblique splits using linear combinations of features.
57
+ - If `False`, uses traditional axis-aligned splits only.
58
+
59
+ random_state : int
60
+ Seed for random number generation in oblique splits.
61
+
62
+ - Only used when `use_oblique=True`.
63
+
64
+ n_pair : int
65
+ Number of features to combine in oblique splits.
66
+
67
+ - Only used when `use_oblique=True`.
68
+
69
+ gamma : float
70
+ Separation strength parameter for oblique splits.
71
+
72
+ - Only used when `use_oblique=True`.
73
+
74
+ max_iter : int
75
+ Maximum iterations for L-BFGS optimization in oblique splits.
76
+
77
+ - Only used when `use_oblique=True`.
78
+
79
+ relative_change : float
80
+ Early stopping threshold for L-BFGS optimization.
81
+
82
+ - Only used when `use_oblique=True`.
83
+ """
84
+
85
+ def __init__(
86
+ self,
87
+ task: bool,
88
+ max_depth: int,
89
+ min_samples_leaf: int,
90
+ min_samples_split: int,
91
+ min_impurity_decrease: float,
92
+ ccp_alpha: float,
93
+ categories: Optional[List[int]],
94
+ use_oblique: bool,
95
+ random_state: Optional[int],
96
+ n_pair: int,
97
+ gamma: float,
98
+ max_iter: int,
99
+ relative_change: float,
100
+ ) -> None:
101
+ # Validate and assign parameters
102
+ self.task = task
103
+ self.use_oblique = self._validate_use_oblique(use_oblique)
104
+ self.max_depth = self._validate_max_depth(max_depth)
105
+ self.min_samples_leaf = self._validate_min_samples_leaf(min_samples_leaf)
106
+ self.min_samples_split = self._validate_min_samples_split(min_samples_split)
107
+ self.min_impurity_decrease = self._validate_min_impurity_decrease(
108
+ min_impurity_decrease
109
+ )
110
+ self.ccp_alpha = self._validate_ccp_alpha(ccp_alpha)
111
+ self.n_pair = self._validate_n_pair(n_pair)
112
+ self.gamma = self._validate_gamma(gamma)
113
+ self.max_iter = self._validate_max_iter(max_iter)
114
+ self.relative_change = self._validate_relative_change(
115
+ relative_change, self.use_oblique
116
+ )
117
+ self.random_state = self._validate_random_state(random_state)
118
+ self.categories = self._validate_categories(categories)
119
+ self._fit = False
120
+ self._categories: dict[int, NDArray]
121
+
122
+ # Initialize the TreeClassifier
123
+ super().__init__(
124
+ self.max_depth,
125
+ self.min_samples_leaf,
126
+ self.min_samples_split,
127
+ self.min_impurity_decrease,
128
+ self.random_state,
129
+ self.n_pair,
130
+ self.gamma,
131
+ self.max_iter,
132
+ self.relative_change,
133
+ self.categories,
134
+ self.ccp_alpha,
135
+ self.use_oblique,
136
+ self.task,
137
+ 1,
138
+ )
139
+
140
+ def __getstate__(self):
141
+ """Return the state for pickling."""
142
+ state = super().__getstate__()
143
+ state["_fit"] = self._fit
144
+
145
+ return state
146
+
147
+ def __setstate__(self, state):
148
+ """Restore the state from pickle."""
149
+ # Extract special attributes
150
+ _fit = state.pop("_fit", False)
151
+ super().__setstate__(state)
152
+
153
+ # Restore state directly without re-initialization
154
+ self.__dict__.update(state)
155
+ self._fit = _fit
156
+
157
+ def __repr__(self):
158
+ param_str = (
159
+ f"use_oblique={getattr(self, 'use_oblique', None)}, "
160
+ f"max_depth={getattr(self, 'max_depth', None)}, "
161
+ f"min_samples_leaf={getattr(self, 'min_samples_leaf', None)}, "
162
+ f"min_samples_split={getattr(self, 'min_samples_split', None)}, "
163
+ f"min_impurity_decrease={getattr(self, 'min_impurity_decrease', None)}, "
164
+ f"ccp_alpha={getattr(self, 'ccp_alpha', None)}, "
165
+ f"categories={getattr(self, 'categories', None)}, "
166
+ f"random_state={getattr(self, 'random_state', None)}, "
167
+ f"n_pair={getattr(self, 'n_pair', None)}, "
168
+ f"gamma={getattr(self, 'gamma', None)}, "
169
+ f"max_iter={getattr(self, 'max_iter', None)}, "
170
+ f"relative_change={getattr(self, 'relative_change', None)}"
171
+ )
172
+ return f"{self.__class__.__name__}({param_str})"
173
+
174
+ def _validate_max_depth(self, max_depth: int) -> int:
175
+ if not isinstance(max_depth, int):
176
+ raise ValueError("max_depth must be an integer")
177
+ if max_depth < -1:
178
+ raise ValueError("max_depth must be >= -1")
179
+ return 255 if max_depth == -1 else min(max_depth, 255)
180
+
181
+ def _validate_min_samples_leaf(self, min_samples_leaf: int) -> int:
182
+ if not isinstance(min_samples_leaf, int):
183
+ raise ValueError("min_samples_leaf must be an integer")
184
+ if min_samples_leaf < 1:
185
+ raise ValueError("min_samples_leaf must be >= 1")
186
+ return min_samples_leaf
187
+
188
+ def _validate_min_samples_split(self, min_samples_split: int) -> int:
189
+ if not isinstance(min_samples_split, int):
190
+ raise ValueError("min_samples_split must be an integer")
191
+ if min_samples_split < 2:
192
+ raise ValueError("min_samples_split must be >= 2")
193
+ return min_samples_split
194
+
195
+ def _validate_min_impurity_decrease(self, min_impurity_decrease: float) -> float:
196
+ if not isinstance(min_impurity_decrease, (int, float)):
197
+ raise ValueError("min_impurity_decrease must be a number")
198
+ if min_impurity_decrease < 0.0:
199
+ raise ValueError("min_impurity_decrease must be >= 0.0")
200
+ return float(min_impurity_decrease)
201
+
202
+ def _validate_ccp_alpha(self, ccp_alpha: float) -> float:
203
+ if not isinstance(ccp_alpha, (int, float)):
204
+ raise ValueError("ccp_alpha must be a number")
205
+ if ccp_alpha < 0.0:
206
+ raise ValueError("ccp_alpha must be >= 0.0")
207
+ return float(ccp_alpha)
208
+
209
+ def _validate_n_pair(self, n_pair: int) -> int:
210
+ if not isinstance(n_pair, int):
211
+ raise ValueError("n_pair must be an integer")
212
+ if n_pair < 2:
213
+ raise ValueError("n_pair must be >= 2")
214
+ return n_pair
215
+
216
+ def _validate_gamma(self, gamma: float) -> float:
217
+ if not isinstance(gamma, (int, float)):
218
+ raise ValueError("gamma must be a number")
219
+ if gamma <= 0.0:
220
+ raise ValueError("gamma must be > 0.0")
221
+ return float(gamma)
222
+
223
+ def _validate_max_iter(self, max_iter: int) -> int:
224
+ if not isinstance(max_iter, int):
225
+ raise ValueError("max_iter must be an integer")
226
+ if max_iter < 1:
227
+ raise ValueError("max_iter must be >= 1")
228
+ return max_iter
229
+
230
+ def _validate_relative_change(
231
+ self, relative_change: float, use_oblique: bool
232
+ ) -> float:
233
+ if not isinstance(relative_change, (int, float)):
234
+ raise ValueError("relative_change must be a number")
235
+ if relative_change < 0.0:
236
+ raise ValueError("relative_change must be >= 0.0")
237
+ if use_oblique and relative_change <= 1e-5:
238
+ warnings.warn(
239
+ "relative_change is set very low. This may prolong the oblique training time."
240
+ )
241
+ return float(relative_change)
242
+
243
+ def _validate_random_state(self, random_state: Optional[int]) -> int:
244
+ if random_state is not None and not isinstance(random_state, int):
245
+ raise ValueError("random_state must be None or an integer")
246
+ return (
247
+ random_state
248
+ if random_state is not None
249
+ else np.random.randint(0, np.iinfo(np.int32).max)
250
+ )
251
+
252
+ def _validate_categories(self, categories: Optional[List[int]]) -> List[int]:
253
+ if categories is not None:
254
+ if not isinstance(categories, (list, tuple)):
255
+ raise ValueError("categories must be None or a list/tuple of integers")
256
+ if not all(isinstance(x, int) for x in categories):
257
+ raise ValueError("All elements in categories must be integers")
258
+ if any(x < 0 for x in categories):
259
+ raise ValueError(
260
+ "All elements in categories must be non-negative integers"
261
+ )
262
+ return list(categories)
263
+ return []
264
+
265
+ def _validate_use_oblique(self, use_oblique: bool) -> bool:
266
+ if not isinstance(use_oblique, bool):
267
+ raise ValueError("use_oblique must be a boolean")
268
+ return use_oblique
269
+
270
+ def fit(
271
+ self, X: ArrayLike, y: ArrayLike, sample_weight: Optional[ArrayLike] = None
272
+ ) -> "BaseTree":
273
+ """
274
+ Fit the decision tree to the training data.
275
+
276
+ Parameters
277
+ ----------
278
+ X : ArrayLike
279
+ Training input samples of shape (n_samples, n_features).
280
+ y : ArrayLike
281
+ Target values of shape (n_samples,).
282
+ sample_weight : Optional[ArrayLike], default=None
283
+ Sample weights of shape (n_samples,). If None, all samples are given equal weight.
284
+
285
+ Returns
286
+ -------
287
+ self : BaseTree
288
+ Fitted estimator.
289
+
290
+ Raises
291
+ ------
292
+ ValueError
293
+ If input data is invalid or contains NaN/Inf values where not allowed.
294
+ """
295
+ X = np.asarray(X, order="F", dtype=np.float64)
296
+ y = np.asarray(y, order="C", dtype=np.float64)
297
+
298
+ if X.ndim != 2:
299
+ raise ValueError(
300
+ f"Expected a 2D array for input samples, but got an array with {X.ndim} dimensions."
301
+ )
302
+
303
+ if X.shape[0] != y.shape[0]:
304
+ raise ValueError(
305
+ f"The number of samples in `X` ({X.shape[0]}) does not match the number of target values in `y` ({y.shape[0]})."
306
+ )
307
+
308
+ # Validate target vector
309
+ self._validate_target(y)
310
+
311
+ # Validate sample weights
312
+ sample_weight = self._process_sample_weight(sample_weight, y.shape[0])
313
+
314
+ # Validate feature matrix
315
+ self._validate_features(X)
316
+
317
+ # Classification or Regression setup
318
+ self.n_classes = self._setup_task(y)
319
+
320
+ # Validate categorical features
321
+ self._validate_categories_in_data(X, is_fit=True)
322
+
323
+ # Warn if the number of feature combinations is too large for oblique splits
324
+ if self.use_oblique:
325
+ self._warn_large_combinations(X.shape[1] - len(self.categories))
326
+
327
+ super().fit(X, y, sample_weight)
328
+
329
+ self._fit = True
330
+
331
+ return self
332
+
333
+ def _validate_target(self, y: NDArray) -> None:
334
+ if y.ndim != 1:
335
+ raise ValueError("y must be 1-dimensional")
336
+
337
+ if self.task: # Regression
338
+ return
339
+ else: # Classification
340
+ unique_labels = np.unique(y)
341
+ expected_labels = np.arange(len(unique_labels))
342
+ if not np.array_equal(unique_labels, expected_labels):
343
+ raise ValueError(
344
+ "Classification labels must start from 0 and increment by 1"
345
+ )
346
+
347
+ def _process_sample_weight(
348
+ self, sample_weight: Optional[ArrayLike], n_samples: int
349
+ ) -> NDArray:
350
+ if sample_weight is not None:
351
+ sample_weight = np.asarray(sample_weight, order="C", dtype=np.float64)
352
+
353
+ if sample_weight.shape != (n_samples,):
354
+ raise ValueError(
355
+ f"sample_weight has incompatible shape: {sample_weight.shape} "
356
+ f"while y has shape ({n_samples},)"
357
+ )
358
+
359
+ if (
360
+ np.any(np.isnan(sample_weight))
361
+ or np.any(np.isinf(sample_weight))
362
+ or np.any(sample_weight < 0)
363
+ ):
364
+ raise ValueError(
365
+ "sample_weight cannot contain negative, NaN or inf values"
366
+ )
367
+
368
+ min_val = np.min(sample_weight)
369
+ if min_val != 1:
370
+ sample_weight = sample_weight / min_val
371
+
372
+ else:
373
+ sample_weight = np.ones(n_samples, dtype=np.float64)
374
+
375
+ return sample_weight
376
+
377
+ def _validate_features(self, X: NDArray) -> None:
378
+ if self.use_oblique:
379
+ if np.any(np.isnan(X)) or np.any(np.isinf(X)):
380
+ raise ValueError(
381
+ "X cannot contain NaN or Inf values when use_oblique is True"
382
+ )
383
+
384
+ max_possible_pairs = (
385
+ X.shape[1] - len(self.categories) if self.categories else X.shape[1]
386
+ )
387
+
388
+ if self.categories:
389
+ if max_possible_pairs < 2:
390
+ warnings.warn(
391
+ f"Total features: {X.shape[1]}, categorical features: {len(self.categories)}. "
392
+ f"The number of possible feature pairs ({max_possible_pairs}) is less than 2. "
393
+ f"As a result, 'use_oblique' set 'False'."
394
+ )
395
+ self.use_oblique = False
396
+
397
+ elif self.n_pair > max_possible_pairs:
398
+ warnings.warn(
399
+ f"Total features: {X.shape[1]}, categorical features: {len(self.categories)}. "
400
+ f"n_pair ({self.n_pair}) exceeds the usable features, adjusting n_pair to {max_possible_pairs}."
401
+ )
402
+ self.n_pair = max_possible_pairs
403
+ else: # If there are no categorical features
404
+ if self.n_pair > X.shape[1]:
405
+ warnings.warn(
406
+ f"n_pair ({self.n_pair}) exceeds the total features ({X.shape[1]}). "
407
+ f"Adjusting n_pair to {X.shape[1]}."
408
+ )
409
+ self.n_pair = X.shape[1]
410
+
411
+ def _setup_task(self, y: NDArray) -> int:
412
+ if not self.task:
413
+ n_classes = len(np.unique(y))
414
+ return n_classes
415
+ else:
416
+ return 1 # Regression
417
+
418
+ def _validate_categories_in_data(self, X: NDArray, is_fit: bool) -> None:
419
+ if self.categories:
420
+ for col_idx in self.categories:
421
+ # Kategori indeksi matris boyutlarını aşmamalı
422
+ if col_idx >= X.shape[1]:
423
+ raise ValueError(
424
+ f"Category column index {col_idx} exceeds X dimensions ({X.shape[1]} features)."
425
+ )
426
+
427
+ # Kategorik sütunlardaki değerler negatif olmamalı
428
+ if (X[:, self.categories] < 0).any():
429
+ raise ValueError(
430
+ "X contains negative values in the specified category columns, which are not allowed."
431
+ )
432
+
433
+ if np.isnan(X[:, self.categories]).any():
434
+ raise ValueError(
435
+ "X contains null values in the specified category columns. Please encode them before passing."
436
+ )
437
+
438
+ if is_fit:
439
+ self._categories = {
440
+ idx: np.unique(X[:, idx]) for idx in self.categories
441
+ }
442
+
443
+ else:
444
+ for idx in self.categories:
445
+ unknown = np.setdiff1d(np.unique(X[:, idx]), self._categories[idx])
446
+ if len(unknown) > 0:
447
+ raise ValueError(
448
+ f"Unknown categories in column {idx}: {unknown}. "
449
+ f"Available categories: {self._categories[idx]}"
450
+ )
451
+
452
+ def _warn_large_combinations(self, n_features: int) -> None:
453
+ total_combinations = comb(n_features, self.n_pair)
454
+ if total_combinations > 1000: # Optimal threshold can be adjusted
455
+ warnings.warn(
456
+ "The number of feature combinations for oblique splits is very large, which may lead to long training times. "
457
+ "Consider reducing `n_pair` or the number of features."
458
+ )
459
+
460
+ def predict(self, X: ArrayLike) -> NDArray:
461
+ """
462
+ Predict target values for the input samples.
463
+
464
+ Parameters
465
+ ----------
466
+ X : ArrayLike
467
+ Input samples of shape (n_samples, n_features).
468
+
469
+ Returns
470
+ -------
471
+ NDArray
472
+ Predicted values.
473
+
474
+ Raises
475
+ ------
476
+ ValueError
477
+ If the model has not been fitted yet.
478
+ """
479
+ if not self._fit:
480
+ raise ValueError(
481
+ "The model has not been fitted yet. Please call `fit` first."
482
+ )
483
+
484
+ X = np.asarray(X, order="F", dtype=np.float64)
485
+
486
+ if X.ndim != 2:
487
+ raise ValueError(
488
+ f"Expected a 2D array for input samples, but got an array with {X.ndim} dimensions. "
489
+ )
490
+
491
+ self._validate_categories_in_data(X, is_fit=False)
492
+
493
+ return super().predict(X)
494
+
495
+
496
+ class Classifier(BaseTree):
497
+ def __init__(
498
+ self,
499
+ use_oblique: bool = True,
500
+ max_depth: int = -1,
501
+ min_samples_leaf: int = 1,
502
+ min_samples_split: int = 2,
503
+ min_impurity_decrease: float = 0.0,
504
+ ccp_alpha: float = 0.0,
505
+ categories: Optional[List[int]] = None,
506
+ random_state: Optional[int] = None,
507
+ n_pair: int = 2,
508
+ gamma: float = 1.0,
509
+ max_iter: int = 100,
510
+ relative_change: float = 0.001,
511
+ ):
512
+ """
513
+ A decision tree classifier supporting both traditional axis-aligned and oblique splits.
514
+
515
+ This advanced decision tree classifier extends traditional regression trees by supporting oblique
516
+ splits (linear combinations of features) alongside conventional axis-aligned splits. It offers enhanced
517
+ flexibility in modeling continuous outputs while maintaining the interpretability of decision trees.
518
+
519
+ Parameters
520
+ ----------
521
+ use_oblique : bool, default=True
522
+ - If `True`, enables oblique splits using linear combinations of features.
523
+ - If `False`, uses traditional axis-aligned splits only.
524
+
525
+ max_depth : int, default=-1
526
+ Maximum depth of the tree. Controls model complexity and prevents overfitting.
527
+
528
+ - If `-1`: Expands until leaves are pure or contain fewer than `min_samples_split` samples.
529
+ - If `int > 0`: Limits the tree to the specified depth.
530
+
531
+ min_samples_leaf : int, default=1
532
+ Minimum number of samples required at leaf nodes.
533
+
534
+ min_samples_split : int, default=2
535
+ Minimum number of samples required to split an internal node.
536
+
537
+ min_impurity_decrease : float, default=0.0
538
+ Minimum required decrease in impurity to create a split.
539
+
540
+ ccp_alpha : float, default=0.0
541
+ Complexity parameter for Minimal Cost-Complexity Pruning.
542
+
543
+ categories : List[int], default=None
544
+ Indices of categorical features in the dataset.
545
+
546
+ random_state : int, default=None
547
+ Seed for random number generation in oblique splits.
548
+
549
+ - Only used when `use_oblique=True`.
550
+
551
+ n_pair : int, default=2
552
+ Number of features to combine in oblique splits.
553
+
554
+ - Only used when `use_oblique=True`.
555
+
556
+ gamma : float, default=1.0
557
+ Separation strength parameter for oblique splits.
558
+
559
+ - Only used when `use_oblique=True`.
560
+
561
+ max_iter : int, default=100
562
+ Maximum iterations for L-BFGS optimization in oblique splits.
563
+
564
+ - Only used when `use_oblique=True`.
565
+
566
+ relative_change : float, default=0.001
567
+ Early stopping threshold for L-BFGS optimization.
568
+
569
+ - Only used when `use_oblique=True`.
570
+ """
571
+ super().__init__(
572
+ task=False,
573
+ max_depth=max_depth,
574
+ min_samples_leaf=min_samples_leaf,
575
+ min_samples_split=min_samples_split,
576
+ min_impurity_decrease=min_impurity_decrease,
577
+ ccp_alpha=ccp_alpha,
578
+ categories=categories,
579
+ use_oblique=use_oblique,
580
+ random_state=random_state,
581
+ n_pair=n_pair,
582
+ gamma=gamma,
583
+ max_iter=max_iter,
584
+ relative_change=relative_change,
585
+ )
586
+
587
+ def fit(
588
+ self, X: ArrayLike, y: ArrayLike, sample_weight: Optional[ArrayLike] = None
589
+ ) -> "Classifier":
590
+ """
591
+ Build a decision tree classifier from the training set (X, y).
592
+
593
+ Parameters
594
+ ----------
595
+ X : array-like of shape (n_samples, n_features)
596
+ The training input samples.
597
+ y : array-like of shape (n_samples,)
598
+ Target values (class labels).
599
+ sample_weight : array-like of shape (n_samples,), default=None
600
+ Sample weights.
601
+
602
+ Returns
603
+ -------
604
+ self : Classifier
605
+ Fitted estimator.
606
+ """
607
+ return super().fit(X, y, sample_weight)
608
+
609
+ def predict(self, X: ArrayLike) -> NDArray:
610
+ """
611
+ Predict regression target for X.
612
+
613
+ Parameters
614
+ ----------
615
+ X : array-like of shape (n_samples, n_features)
616
+ The input samples to predict.
617
+
618
+ Returns
619
+ -------
620
+ y : NDArray of shape (n_samples,)
621
+ The predicted values.
622
+ """
623
+ return np.argmax(super().predict(X), axis=1)
624
+
625
+ def predict_proba(self, X: ArrayLike) -> NDArray:
626
+ """
627
+ Predict class probabilities for X.
628
+
629
+ Parameters
630
+ ----------
631
+ X : array-like of shape (n_samples, n_features)
632
+ The input samples.
633
+
634
+ Returns
635
+ -------
636
+ proba : NDArray of shape (n_samples, n_classes)
637
+ The class probabilities of the input samples.
638
+ """
639
+ return super().predict(X)
640
+
641
+
642
+ class Regressor(BaseTree):
643
+ def __init__(
644
+ self,
645
+ use_oblique: bool = True,
646
+ max_depth: int = -1,
647
+ min_samples_leaf: int = 1,
648
+ min_samples_split: int = 2,
649
+ min_impurity_decrease: float = 0.0,
650
+ ccp_alpha: float = 0.0,
651
+ categories: Optional[List[int]] = None,
652
+ random_state: Optional[int] = None,
653
+ n_pair: int = 2,
654
+ gamma: float = 1.0,
655
+ max_iter: int = 100,
656
+ relative_change: float = 0.001,
657
+ ):
658
+ """
659
+ A decision tree regressor supporting both traditional axis-aligned and oblique splits.
660
+
661
+ This advanced decision tree regressor extends traditional regression trees by supporting oblique
662
+ splits (linear combinations of features) alongside conventional axis-aligned splits. It offers enhanced
663
+ flexibility in modeling continuous outputs while maintaining the interpretability of decision trees.
664
+
665
+ Parameters
666
+ ----------
667
+ use_oblique : bool, default=True
668
+ - If `True`, enables oblique splits using linear combinations of features.
669
+ - If `False`, uses traditional axis-aligned splits only.
670
+
671
+ max_depth : int, default=-1
672
+ Maximum depth of the tree. Controls model complexity and prevents overfitting.
673
+
674
+ - If `-1`: Expands until leaves are pure or contain fewer than `min_samples_split` samples.
675
+ - If `int > 0`: Limits the tree to the specified depth.
676
+
677
+ min_samples_leaf : int, default=1
678
+ Minimum number of samples required at leaf nodes.
679
+
680
+ min_samples_split : int, default=2
681
+ Minimum number of samples required to split an internal node.
682
+
683
+ min_impurity_decrease : float, default=0.0
684
+ Minimum required decrease in impurity to create a split.
685
+
686
+ ccp_alpha : float, default=0.0
687
+ Complexity parameter for Minimal Cost-Complexity Pruning.
688
+
689
+ categories : List[int], default=None
690
+ Indices of categorical features in the dataset.
691
+
692
+ random_state : int, default=None
693
+ Seed for random number generation in oblique splits.
694
+
695
+ - Only used when `use_oblique=True`.
696
+
697
+ n_pair : int, default=2
698
+ Number of features to combine in oblique splits.
699
+
700
+ - Only used when `use_oblique=True`.
701
+
702
+ gamma : float, default=1.0
703
+ Separation strength parameter for oblique splits.
704
+
705
+ - Only used when `use_oblique=True`.
706
+
707
+ max_iter : int, default=100
708
+ Maximum iterations for L-BFGS optimization in oblique splits.
709
+
710
+ - Only used when `use_oblique=True`.
711
+
712
+ relative_change : float, default=0.001
713
+ Early stopping threshold for L-BFGS optimization.
714
+
715
+ - Only used when `use_oblique=True`.
716
+ """
717
+ super().__init__(
718
+ task=True,
719
+ max_depth=max_depth,
720
+ min_samples_leaf=min_samples_leaf,
721
+ min_samples_split=min_samples_split,
722
+ min_impurity_decrease=min_impurity_decrease,
723
+ ccp_alpha=ccp_alpha,
724
+ categories=categories,
725
+ use_oblique=use_oblique,
726
+ random_state=random_state,
727
+ n_pair=n_pair,
728
+ gamma=gamma,
729
+ max_iter=max_iter,
730
+ relative_change=relative_change,
731
+ )
732
+
733
+ def fit(
734
+ self, X: ArrayLike, y: ArrayLike, sample_weight: Optional[ArrayLike] = None
735
+ ) -> "Regressor":
736
+ """
737
+ Build a decision tree regressor from the training set (X, y).
738
+
739
+ Parameters
740
+ ----------
741
+ X : array-like of shape (n_samples, n_features)
742
+ The training input samples.
743
+ y : array-like of shape (n_samples,)
744
+ Target values.
745
+ sample_weight : array-like of shape (n_samples,), optional, default=None
746
+ Sample weights.
747
+
748
+ Returns
749
+ -------
750
+ self : Regressor
751
+ Fitted estimator.
752
+ """
753
+ return super().fit(X, y, sample_weight)
754
+
755
+ def predict(self, X: ArrayLike) -> NDArray:
756
+ """
757
+ Predict regression target for X.
758
+
759
+ Parameters
760
+ ----------
761
+ X : array-like of shape (n_samples, n_features)
762
+ The input samples to predict.
763
+
764
+ Returns
765
+ -------
766
+ y : NDArray of shape (n_samples,)
767
+ The predicted values.
768
+ """
769
+ return super().predict(X).ravel()