perpetual 1.0.40__cp311-cp311-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
perpetual/data.py ADDED
@@ -0,0 +1,27 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, Iterable, Optional, Union
3
+
4
+
5
+ @dataclass
6
+ class Node:
7
+ """Dataclass representation of a node, this represents all of the fields present in a tree node."""
8
+
9
+ num: int
10
+ weight_value: float
11
+ hessian_sum: float
12
+ depth: int = 0
13
+ split_value: float = 0.0
14
+ split_feature: Union[str, int] = ""
15
+ split_gain: float = 0.0
16
+ missing_node: int = 0
17
+ left_child: int = 0
18
+ right_child: int = 0
19
+ is_leaf: bool = False
20
+ node_type: str = "split"
21
+ parent_node: int = 0
22
+ generalization: Optional[float] = None
23
+ left_cats: Optional[Iterable] = None
24
+ right_cats: Optional[Iterable] = None
25
+ count: int = 0
26
+ weights: Optional[Iterable[float]] = None
27
+ stats: Optional[Any] = None
perpetual/serialize.py ADDED
@@ -0,0 +1,74 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from abc import ABC, abstractmethod
5
+ from ast import literal_eval
6
+ from dataclasses import dataclass
7
+ from typing import Dict, Generic, List, Tuple, TypeVar, Union
8
+
9
+ import numpy as np
10
+ import numpy.typing as npt
11
+
12
+ T = TypeVar("T")
13
+
14
+
15
+ class BaseSerializer(ABC, Generic[T]):
16
+ @abstractmethod
17
+ def serialize(self, obj: T) -> str:
18
+ """serialize method - should take an object and return a string"""
19
+
20
+ @abstractmethod
21
+ def deserialize(self, obj_repr: str) -> T:
22
+ """deserialize method - should take a string and return original object"""
23
+
24
+
25
+ Scaler = Union[int, float, str]
26
+
27
+
28
+ class ScalerSerializer(BaseSerializer[Scaler]):
29
+ def serialize(self, obj: Scaler) -> str:
30
+ if isinstance(obj, str):
31
+ obj_ = f"'{obj}'"
32
+ else:
33
+ obj_ = str(obj)
34
+ return obj_
35
+
36
+ def deserialize(self, obj_repr: str) -> Scaler:
37
+ return literal_eval(node_or_string=obj_repr)
38
+
39
+
40
+ ObjectItem = Union[
41
+ List[Scaler],
42
+ Dict[str, Scaler],
43
+ Scaler,
44
+ ]
45
+
46
+
47
+ class ObjectSerializer(BaseSerializer[ObjectItem]):
48
+ def serialize(self, obj: ObjectItem) -> str:
49
+ return json.dumps(obj)
50
+
51
+ def deserialize(self, obj_repr: str) -> ObjectItem:
52
+ return json.loads(obj_repr)
53
+
54
+
55
+ @dataclass
56
+ class NumpyData:
57
+ array: Union[List[float], List[int]]
58
+ dtype: str
59
+ shape: Tuple[int, ...]
60
+
61
+
62
+ class NumpySerializer(BaseSerializer[npt.NDArray]):
63
+ def serialize(self, obj: npt.NDArray) -> str:
64
+ return json.dumps(
65
+ {"array": obj.tolist(), "dtype": str(obj.dtype), "shape": obj.shape}
66
+ )
67
+
68
+ def deserialize(self, obj_repr: str) -> npt.NDArray:
69
+ data = NumpyData(**json.loads(obj_repr))
70
+ a = np.array(data.array, dtype=data.dtype) # type: ignore
71
+ if len(data.shape) == 1:
72
+ return a
73
+ else:
74
+ return a.reshape(data.shape)
perpetual/sklearn.py ADDED
@@ -0,0 +1,194 @@
1
+ import warnings
2
+ from types import FunctionType
3
+ from typing import Any, Dict, Optional, Tuple, Union
4
+
5
+ from sklearn.base import ClassifierMixin, RegressorMixin
6
+ from sklearn.metrics import accuracy_score, r2_score
7
+ from typing_extensions import Self
8
+
9
+ from perpetual.booster import PerpetualBooster
10
+
11
+
12
+ class PerpetualClassifier(PerpetualBooster, ClassifierMixin):
13
+ """
14
+ A scikit-learn compatible classifier based on PerpetualBooster.
15
+ Uses 'LogLoss' as the default objective.
16
+ """
17
+
18
+ # Expose the objective explicitly in the __init__ signature to allow
19
+ # scikit-learn to correctly discover and set it via set_params.
20
+ def __init__(
21
+ self,
22
+ *,
23
+ objective: Union[
24
+ str, Tuple[FunctionType, FunctionType, FunctionType]
25
+ ] = "LogLoss",
26
+ budget: float = 0.5,
27
+ num_threads: Optional[int] = None,
28
+ monotone_constraints: Union[Dict[Any, int], None] = None,
29
+ # ... other parameters ...
30
+ max_bin: int = 256,
31
+ max_cat: int = 1000,
32
+ # Capture all parameters in a way that BaseEstimator can handle
33
+ **kwargs,
34
+ ):
35
+ # Ensure the objective is one of the valid classification objectives
36
+ valid_objectives = {
37
+ "LogLoss"
38
+ } # Assuming only LogLoss for classification for simplicity
39
+ if isinstance(objective, str) and objective not in valid_objectives:
40
+ # Custom objectives are allowed via the tuple form
41
+ pass
42
+
43
+ super().__init__(
44
+ objective=objective,
45
+ budget=budget,
46
+ num_threads=num_threads,
47
+ monotone_constraints=monotone_constraints,
48
+ # ... pass all other parameters ...
49
+ max_bin=max_bin,
50
+ max_cat=max_cat,
51
+ **kwargs, # Catch-all for any other parameters passed by user or set_params
52
+ )
53
+
54
+ # fit, predict, predict_proba, and predict_log_proba are inherited
55
+ # and properly adapted in PerpetualBooster.
56
+
57
+ def score(self, X, y, sample_weight=None):
58
+ """Returns the mean accuracy on the given test data and labels."""
59
+ preds = self.predict(X)
60
+ return accuracy_score(y, preds, sample_weight=sample_weight)
61
+
62
+ def fit(self, X, y, sample_weight=None, **fit_params) -> Self:
63
+ """A wrapper for the base fit method."""
64
+ # Check if objective is appropriate for classification if it's a string
65
+ if isinstance(self.objective, str) and self.objective not in ["LogLoss"]:
66
+ warnings.warn(
67
+ f"Objective '{self.objective}' is typically for regression/ranking but used in PerpetualClassifier. Consider 'LogLoss'."
68
+ )
69
+
70
+ # In classification, the labels (classes_) are set in the base fit.
71
+ return super().fit(X, y, sample_weight=sample_weight, **fit_params)
72
+
73
+
74
+ class PerpetualRegressor(PerpetualBooster, RegressorMixin):
75
+ """
76
+ A scikit-learn compatible regressor based on PerpetualBooster.
77
+ Uses 'SquaredLoss' as the default objective.
78
+ """
79
+
80
+ def __init__(
81
+ self,
82
+ *,
83
+ objective: Union[
84
+ str, Tuple[FunctionType, FunctionType, FunctionType]
85
+ ] = "SquaredLoss",
86
+ budget: float = 0.5,
87
+ num_threads: Optional[int] = None,
88
+ monotone_constraints: Union[Dict[Any, int], None] = None,
89
+ # ... other parameters ...
90
+ max_bin: int = 256,
91
+ max_cat: int = 1000,
92
+ **kwargs,
93
+ ):
94
+ # Enforce or warn about regression objectives
95
+ valid_objectives = {
96
+ "SquaredLoss",
97
+ "QuantileLoss",
98
+ "HuberLoss",
99
+ "AdaptiveHuberLoss",
100
+ }
101
+ if isinstance(objective, str) and objective not in valid_objectives:
102
+ pass # Allow for custom string or tuple objective
103
+
104
+ super().__init__(
105
+ objective=objective,
106
+ budget=budget,
107
+ num_threads=num_threads,
108
+ monotone_constraints=monotone_constraints,
109
+ # ... pass all other parameters ...
110
+ max_bin=max_bin,
111
+ max_cat=max_cat,
112
+ **kwargs,
113
+ )
114
+
115
+ def fit(self, X, y, sample_weight=None, **fit_params) -> Self:
116
+ """A wrapper for the base fit method."""
117
+ # For regression, we typically enforce len(self.classes_) == 0 after fit
118
+ if isinstance(self.objective, str) and self.objective not in [
119
+ "SquaredLoss",
120
+ "QuantileLoss",
121
+ "HuberLoss",
122
+ "AdaptiveHuberLoss",
123
+ ]:
124
+ warnings.warn(
125
+ f"Objective '{self.objective}' may not be suitable for PerpetualRegressor. Consider 'SquaredLoss' or a quantile/huber loss."
126
+ )
127
+
128
+ return super().fit(X, y, sample_weight=sample_weight, **fit_params)
129
+
130
+ def score(self, X, y, sample_weight=None):
131
+ """Returns the coefficient of determination ($R^2$) of the prediction."""
132
+ preds = self.predict(X)
133
+ return r2_score(y, preds, sample_weight=sample_weight)
134
+
135
+
136
+ class PerpetualRanker(
137
+ PerpetualBooster, RegressorMixin
138
+ ): # Ranking models sometimes inherit from RegressorMixin for compatibility
139
+ """
140
+ A scikit-learn compatible ranker based on PerpetualBooster.
141
+ Uses 'ListNetLoss' as the default objective.
142
+ Requires the 'group' parameter to be passed to fit.
143
+ """
144
+
145
+ def __init__(
146
+ self,
147
+ *,
148
+ objective: Union[
149
+ str, Tuple[FunctionType, FunctionType, FunctionType]
150
+ ] = "ListNetLoss",
151
+ budget: float = 0.5,
152
+ num_threads: Optional[int] = None,
153
+ monotone_constraints: Union[Dict[Any, int], None] = None,
154
+ # ... other parameters ...
155
+ max_bin: int = 256,
156
+ max_cat: int = 1000,
157
+ **kwargs,
158
+ ):
159
+ if isinstance(objective, str) and objective not in {"ListNetLoss"}:
160
+ warnings.warn(
161
+ f"Objective '{objective}' may not be suitable for PerpetualRanker. Consider 'ListNetLoss'."
162
+ )
163
+
164
+ super().__init__(
165
+ objective=objective,
166
+ budget=budget,
167
+ num_threads=num_threads,
168
+ monotone_constraints=monotone_constraints,
169
+ # ... pass all other parameters ...
170
+ max_bin=max_bin,
171
+ max_cat=max_cat,
172
+ **kwargs,
173
+ )
174
+
175
+ def fit(self, X, y, group=None, sample_weight=None, **fit_params) -> Self:
176
+ """
177
+ Fit the ranker. Requires the 'group' parameter.
178
+
179
+ Args:
180
+ X: Training data.
181
+ y: Target relevance scores.
182
+ group: Group lengths to use for a ranking objective. (Required for ListNetLoss).
183
+ sample_weight: Instance weights.
184
+ """
185
+ if (
186
+ group is None
187
+ and isinstance(self.objective, str)
188
+ and self.objective == "ListNetLoss"
189
+ ):
190
+ raise ValueError(
191
+ "The 'group' parameter must be provided when using the 'ListNetLoss' objective for ranking."
192
+ )
193
+
194
+ return super().fit(X, y, sample_weight=sample_weight, group=group, **fit_params)
perpetual/types.py ADDED
@@ -0,0 +1,151 @@
1
+ from typing import Any, Dict, Iterable, Protocol, Set
2
+
3
+ import numpy as np
4
+ from typing_extensions import Self
5
+
6
+
7
+ class BoosterType(Protocol):
8
+ monotone_constraints: Dict[int, int]
9
+ terminate_missing_features: Set[int]
10
+ number_of_trees: int
11
+ base_score: float
12
+
13
+ def fit(
14
+ self,
15
+ flat_data: np.ndarray,
16
+ rows: int,
17
+ cols: int,
18
+ y: np.ndarray,
19
+ budget: float,
20
+ sample_weight: np.ndarray,
21
+ parallel: bool = False,
22
+ ):
23
+ """Fit method"""
24
+
25
+ def predict(
26
+ self,
27
+ flat_data: np.ndarray,
28
+ rows: int,
29
+ cols: int,
30
+ parallel: bool = True,
31
+ ) -> np.ndarray:
32
+ """predict method"""
33
+
34
+ def predict_proba(
35
+ self,
36
+ flat_data: np.ndarray,
37
+ rows: int,
38
+ cols: int,
39
+ parallel: bool = True,
40
+ ) -> np.ndarray:
41
+ """predict probabilities method"""
42
+
43
+ def predict_contributions(
44
+ self,
45
+ flat_data: np.ndarray,
46
+ rows: int,
47
+ cols: int,
48
+ method: str,
49
+ parallel: bool = True,
50
+ ) -> np.ndarray:
51
+ """method"""
52
+
53
+ def value_partial_dependence(
54
+ self,
55
+ feature: int,
56
+ value: float,
57
+ ) -> float:
58
+ """pass"""
59
+
60
+ def calculate_feature_importance(
61
+ self,
62
+ method: str,
63
+ normalize: bool,
64
+ ) -> Dict[int, float]:
65
+ """pass"""
66
+
67
+ def text_dump(self) -> Iterable[str]:
68
+ """pass"""
69
+
70
+ @classmethod
71
+ def load_booster(cls, path: str) -> Self:
72
+ """pass"""
73
+
74
+ def save_booster(self, path: str):
75
+ """pass"""
76
+
77
+ @classmethod
78
+ def from_json(cls, json_str: str) -> Self:
79
+ """pass"""
80
+
81
+ def json_dump(self) -> str:
82
+ """pass"""
83
+
84
+ def get_params(self) -> Dict[str, Any]:
85
+ """pass"""
86
+
87
+ def insert_metadata(self, key: str, value: str) -> None:
88
+ """pass"""
89
+
90
+ def get_metadata(self, key: str) -> str:
91
+ """pass"""
92
+
93
+
94
+ class MultiOutputBoosterType(Protocol):
95
+ monotone_constraints: Dict[int, int]
96
+ terminate_missing_features: Set[int]
97
+ number_of_trees: Iterable[int]
98
+ base_score: Iterable[float]
99
+
100
+ def fit(
101
+ self,
102
+ flat_data: np.ndarray,
103
+ rows: int,
104
+ cols: int,
105
+ y: np.ndarray,
106
+ budget: float,
107
+ sample_weight: np.ndarray,
108
+ parallel: bool = False,
109
+ ):
110
+ """Fit method"""
111
+
112
+ def predict(
113
+ self,
114
+ flat_data: np.ndarray,
115
+ rows: int,
116
+ cols: int,
117
+ parallel: bool = True,
118
+ ) -> np.ndarray:
119
+ """predict method"""
120
+
121
+ def predict_proba(
122
+ self,
123
+ flat_data: np.ndarray,
124
+ rows: int,
125
+ cols: int,
126
+ parallel: bool = True,
127
+ ) -> np.ndarray:
128
+ """predict probabilities method"""
129
+
130
+ @classmethod
131
+ def load_booster(cls, path: str) -> Self:
132
+ """pass"""
133
+
134
+ def save_booster(self, path: str):
135
+ """pass"""
136
+
137
+ @classmethod
138
+ def from_json(cls, json_str: str) -> Self:
139
+ """pass"""
140
+
141
+ def json_dump(self) -> str:
142
+ """pass"""
143
+
144
+ def get_params(self) -> Dict[str, Any]:
145
+ """pass"""
146
+
147
+ def insert_metadata(self, key: str, value: str) -> None:
148
+ """pass"""
149
+
150
+ def get_metadata(self, key: str) -> str:
151
+ """pass"""