perpetual 0.9.5__cp313-cp313-win_amd64.whl → 0.10.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of perpetual might be problematic. Click here for more details.
- perpetual/booster.py +69 -14
- perpetual/perpetual.cp313-win_amd64.pyd +0 -0
- perpetual/sklearn.py +193 -0
- perpetual/utils.py +6 -3
- perpetual-0.10.0.dist-info/METADATA +31 -0
- perpetual-0.10.0.dist-info/RECORD +12 -0
- {perpetual-0.9.5.dist-info → perpetual-0.10.0.dist-info}/WHEEL +1 -1
- perpetual-0.9.5.dist-info/METADATA +0 -166
- perpetual-0.9.5.dist-info/RECORD +0 -11
- {perpetual-0.9.5.dist-info → perpetual-0.10.0.dist-info}/licenses/LICENSE +0 -0
perpetual/booster.py
CHANGED
|
@@ -1,16 +1,19 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import inspect
|
|
2
|
+
import json
|
|
3
3
|
import warnings
|
|
4
|
-
from
|
|
4
|
+
from types import FunctionType
|
|
5
5
|
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union, cast
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
|
-
|
|
9
|
-
from perpetual.perpetual import
|
|
10
|
-
|
|
8
|
+
from perpetual.data import Node
|
|
9
|
+
from perpetual.perpetual import (
|
|
10
|
+
MultiOutputBooster as CrateMultiOutputBooster, # type: ignore
|
|
11
|
+
)
|
|
12
|
+
from perpetual.perpetual import (
|
|
13
|
+
PerpetualBooster as CratePerpetualBooster, # type: ignore
|
|
14
|
+
)
|
|
11
15
|
from perpetual.serialize import BaseSerializer, ObjectSerializer
|
|
12
16
|
from perpetual.types import BoosterType, MultiOutputBoosterType
|
|
13
|
-
from perpetual.data import Node
|
|
14
17
|
from perpetual.utils import (
|
|
15
18
|
CONTRIBUTION_METHODS,
|
|
16
19
|
convert_input_array,
|
|
@@ -18,6 +21,7 @@ from perpetual.utils import (
|
|
|
18
21
|
transform_input_frame,
|
|
19
22
|
type_df,
|
|
20
23
|
)
|
|
24
|
+
from typing_extensions import Self
|
|
21
25
|
|
|
22
26
|
|
|
23
27
|
class PerpetualBooster:
|
|
@@ -37,7 +41,9 @@ class PerpetualBooster:
|
|
|
37
41
|
def __init__(
|
|
38
42
|
self,
|
|
39
43
|
*,
|
|
40
|
-
objective:
|
|
44
|
+
objective: Union[
|
|
45
|
+
str, Tuple[FunctionType, FunctionType, FunctionType]
|
|
46
|
+
] = "LogLoss",
|
|
41
47
|
budget: float = 0.5,
|
|
42
48
|
num_threads: Optional[int] = None,
|
|
43
49
|
monotone_constraints: Union[Dict[Any, int], None] = None,
|
|
@@ -68,6 +74,10 @@ class PerpetualBooster:
|
|
|
68
74
|
"QuantileLoss" to use quantile error (regression),
|
|
69
75
|
"HuberLoss" to use huber error (regression),
|
|
70
76
|
"AdaptiveHuberLoss" to use adaptive huber error (regression).
|
|
77
|
+
"ListNetLoss" to use ListNet loss (ranking).
|
|
78
|
+
custom objective in the form of (grad, hess, init)
|
|
79
|
+
where grad and hess are functions that take (y, pred, sample_weight, group) and return the gradient and hessian
|
|
80
|
+
init is a function that takes (y, sample_weight, group) and returns the initial prediction value.
|
|
71
81
|
Defaults to "LogLoss".
|
|
72
82
|
budget (float, optional): a positive number for fitting budget. Increasing this number will more
|
|
73
83
|
likely result in more boosting rounds and more increased predictive power.
|
|
@@ -165,7 +175,16 @@ class PerpetualBooster:
|
|
|
165
175
|
{} if monotone_constraints is None else monotone_constraints
|
|
166
176
|
)
|
|
167
177
|
|
|
168
|
-
|
|
178
|
+
if isinstance(objective, str):
|
|
179
|
+
self.objective = objective
|
|
180
|
+
self.loss = None
|
|
181
|
+
self.grad = None
|
|
182
|
+
self.init = None
|
|
183
|
+
else:
|
|
184
|
+
self.objective = None
|
|
185
|
+
self.loss = objective[0]
|
|
186
|
+
self.grad = objective[1]
|
|
187
|
+
self.init = objective[2]
|
|
169
188
|
self.budget = budget
|
|
170
189
|
self.num_threads = num_threads
|
|
171
190
|
self.monotone_constraints = monotone_constraints_
|
|
@@ -207,10 +226,13 @@ class PerpetualBooster:
|
|
|
207
226
|
iteration_limit=self.iteration_limit,
|
|
208
227
|
memory_limit=self.memory_limit,
|
|
209
228
|
stopping_rounds=self.stopping_rounds,
|
|
229
|
+
loss=self.loss,
|
|
230
|
+
grad=self.grad,
|
|
231
|
+
init=self.init,
|
|
210
232
|
)
|
|
211
233
|
self.booster = cast(BoosterType, booster)
|
|
212
234
|
|
|
213
|
-
def fit(self, X, y, sample_weight=None) -> Self:
|
|
235
|
+
def fit(self, X, y, sample_weight=None, group=None) -> Self:
|
|
214
236
|
"""Fit the gradient booster on a provided dataset.
|
|
215
237
|
|
|
216
238
|
Args:
|
|
@@ -220,11 +242,19 @@ class PerpetualBooster:
|
|
|
220
242
|
sample_weight (Union[ArrayLike, None], optional): Instance weights to use when
|
|
221
243
|
training the model. If None is passed, a weight of 1 will be used for every record.
|
|
222
244
|
Defaults to None.
|
|
245
|
+
group (Union[ArrayLike, None], optional): Group lengths to use for a ranking objective.
|
|
246
|
+
If None is passes, all items are assumed to be in the same group.
|
|
247
|
+
Defaults to None.
|
|
223
248
|
"""
|
|
224
249
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
250
|
+
(
|
|
251
|
+
features_,
|
|
252
|
+
flat_data,
|
|
253
|
+
rows,
|
|
254
|
+
cols,
|
|
255
|
+
categorical_features_,
|
|
256
|
+
cat_mapping,
|
|
257
|
+
) = convert_input_frame(X, self.categorical_features, self.max_cat)
|
|
228
258
|
self.n_features_ = cols
|
|
229
259
|
self.cat_mapping = cat_mapping
|
|
230
260
|
self.feature_names_in_ = features_
|
|
@@ -237,6 +267,11 @@ class PerpetualBooster:
|
|
|
237
267
|
else:
|
|
238
268
|
sample_weight_, _ = convert_input_array(sample_weight, self.objective)
|
|
239
269
|
|
|
270
|
+
if group is None:
|
|
271
|
+
group_ = None
|
|
272
|
+
else:
|
|
273
|
+
group_, _ = convert_input_array(group, self.objective, is_int=True)
|
|
274
|
+
|
|
240
275
|
# Convert the monotone constraints into the form needed
|
|
241
276
|
# by the rust code.
|
|
242
277
|
crate_mc = self._standardize_monotonicity_map(X)
|
|
@@ -265,6 +300,9 @@ class PerpetualBooster:
|
|
|
265
300
|
iteration_limit=self.iteration_limit,
|
|
266
301
|
memory_limit=self.memory_limit,
|
|
267
302
|
stopping_rounds=self.stopping_rounds,
|
|
303
|
+
loss=self.loss,
|
|
304
|
+
grad=self.grad,
|
|
305
|
+
init=self.init,
|
|
268
306
|
)
|
|
269
307
|
self.booster = cast(BoosterType, booster)
|
|
270
308
|
else:
|
|
@@ -289,6 +327,9 @@ class PerpetualBooster:
|
|
|
289
327
|
iteration_limit=self.iteration_limit,
|
|
290
328
|
memory_limit=self.memory_limit,
|
|
291
329
|
stopping_rounds=self.stopping_rounds,
|
|
330
|
+
loss=self.loss,
|
|
331
|
+
grad=self.grad,
|
|
332
|
+
init=self.init,
|
|
292
333
|
)
|
|
293
334
|
self.booster = cast(MultiOutputBoosterType, booster)
|
|
294
335
|
|
|
@@ -308,11 +349,12 @@ class PerpetualBooster:
|
|
|
308
349
|
cols=cols,
|
|
309
350
|
y=y_,
|
|
310
351
|
sample_weight=sample_weight_, # type: ignore
|
|
352
|
+
group=group_,
|
|
311
353
|
)
|
|
312
354
|
|
|
313
355
|
return self
|
|
314
356
|
|
|
315
|
-
def prune(self, X, y, sample_weight=None) -> Self:
|
|
357
|
+
def prune(self, X, y, sample_weight=None, group=None) -> Self:
|
|
316
358
|
"""Prune the gradient booster on a provided dataset.
|
|
317
359
|
|
|
318
360
|
Args:
|
|
@@ -322,6 +364,9 @@ class PerpetualBooster:
|
|
|
322
364
|
sample_weight (Union[ArrayLike, None], optional): Instance weights to use when
|
|
323
365
|
training the model. If None is passed, a weight of 1 will be used for every record.
|
|
324
366
|
Defaults to None.
|
|
367
|
+
group (Union[ArrayLike, None], optional): Group lengths to use for a ranking objective.
|
|
368
|
+
If None is passes, all items are assumed to be in the same group.
|
|
369
|
+
Defaults to None.
|
|
325
370
|
"""
|
|
326
371
|
|
|
327
372
|
_, flat_data, rows, cols = transform_input_frame(X, self.cat_mapping)
|
|
@@ -333,18 +378,24 @@ class PerpetualBooster:
|
|
|
333
378
|
else:
|
|
334
379
|
sample_weight_, _ = convert_input_array(sample_weight, self.objective)
|
|
335
380
|
|
|
381
|
+
if group is None:
|
|
382
|
+
group_ = None
|
|
383
|
+
else:
|
|
384
|
+
group_, _ = convert_input_array(group, self.objective, is_int=True)
|
|
385
|
+
|
|
336
386
|
self.booster.prune(
|
|
337
387
|
flat_data=flat_data,
|
|
338
388
|
rows=rows,
|
|
339
389
|
cols=cols,
|
|
340
390
|
y=y_,
|
|
341
391
|
sample_weight=sample_weight_, # type: ignore
|
|
392
|
+
group=group_,
|
|
342
393
|
)
|
|
343
394
|
|
|
344
395
|
return self
|
|
345
396
|
|
|
346
397
|
def calibrate(
|
|
347
|
-
self, X_train, y_train, X_cal, y_cal, alpha, sample_weight=None
|
|
398
|
+
self, X_train, y_train, X_cal, y_cal, alpha, sample_weight=None, group=None
|
|
348
399
|
) -> Self:
|
|
349
400
|
"""Calibrate the gradient booster on a provided dataset.
|
|
350
401
|
|
|
@@ -361,6 +412,9 @@ class PerpetualBooster:
|
|
|
361
412
|
sample_weight (Union[ArrayLike, None], optional): Instance weights to use when
|
|
362
413
|
training the model. If None is passed, a weight of 1 will be used for every record.
|
|
363
414
|
Defaults to None.
|
|
415
|
+
group (Union[ArrayLike, None], optional): Group lengths to use for a ranking objective.
|
|
416
|
+
If None is passes, all items are assumed to be in the same group.
|
|
417
|
+
Defaults to None.
|
|
364
418
|
"""
|
|
365
419
|
|
|
366
420
|
_, flat_data_train, rows_train, cols_train = transform_input_frame(
|
|
@@ -391,6 +445,7 @@ class PerpetualBooster:
|
|
|
391
445
|
y_cal=y_cal_,
|
|
392
446
|
alpha=np.array(alpha),
|
|
393
447
|
sample_weight=sample_weight_, # type: ignore
|
|
448
|
+
group=group,
|
|
394
449
|
)
|
|
395
450
|
|
|
396
451
|
return self
|
|
Binary file
|
perpetual/sklearn.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from types import FunctionType
|
|
3
|
+
from typing import Any, Dict, Optional, Tuple, Union
|
|
4
|
+
|
|
5
|
+
from perpetual.booster import PerpetualBooster
|
|
6
|
+
from sklearn.base import ClassifierMixin, RegressorMixin
|
|
7
|
+
from sklearn.metrics import accuracy_score, r2_score
|
|
8
|
+
from typing_extensions import Self
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PerpetualClassifier(PerpetualBooster, ClassifierMixin):
|
|
12
|
+
"""
|
|
13
|
+
A scikit-learn compatible classifier based on PerpetualBooster.
|
|
14
|
+
Uses 'LogLoss' as the default objective.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
# Expose the objective explicitly in the __init__ signature to allow
|
|
18
|
+
# scikit-learn to correctly discover and set it via set_params.
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
*,
|
|
22
|
+
objective: Union[
|
|
23
|
+
str, Tuple[FunctionType, FunctionType, FunctionType]
|
|
24
|
+
] = "LogLoss",
|
|
25
|
+
budget: float = 0.5,
|
|
26
|
+
num_threads: Optional[int] = None,
|
|
27
|
+
monotone_constraints: Union[Dict[Any, int], None] = None,
|
|
28
|
+
# ... other parameters ...
|
|
29
|
+
max_bin: int = 256,
|
|
30
|
+
max_cat: int = 1000,
|
|
31
|
+
# Capture all parameters in a way that BaseEstimator can handle
|
|
32
|
+
**kwargs,
|
|
33
|
+
):
|
|
34
|
+
# Ensure the objective is one of the valid classification objectives
|
|
35
|
+
valid_objectives = {
|
|
36
|
+
"LogLoss"
|
|
37
|
+
} # Assuming only LogLoss for classification for simplicity
|
|
38
|
+
if isinstance(objective, str) and objective not in valid_objectives:
|
|
39
|
+
# Custom objectives are allowed via the tuple form
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
super().__init__(
|
|
43
|
+
objective=objective,
|
|
44
|
+
budget=budget,
|
|
45
|
+
num_threads=num_threads,
|
|
46
|
+
monotone_constraints=monotone_constraints,
|
|
47
|
+
# ... pass all other parameters ...
|
|
48
|
+
max_bin=max_bin,
|
|
49
|
+
max_cat=max_cat,
|
|
50
|
+
**kwargs, # Catch-all for any other parameters passed by user or set_params
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# fit, predict, predict_proba, and predict_log_proba are inherited
|
|
54
|
+
# and properly adapted in PerpetualBooster.
|
|
55
|
+
|
|
56
|
+
def score(self, X, y, sample_weight=None):
|
|
57
|
+
"""Returns the mean accuracy on the given test data and labels."""
|
|
58
|
+
preds = self.predict(X)
|
|
59
|
+
return accuracy_score(y, preds, sample_weight=sample_weight)
|
|
60
|
+
|
|
61
|
+
def fit(self, X, y, sample_weight=None, **fit_params) -> Self:
|
|
62
|
+
"""A wrapper for the base fit method."""
|
|
63
|
+
# Check if objective is appropriate for classification if it's a string
|
|
64
|
+
if isinstance(self.objective, str) and self.objective not in ["LogLoss"]:
|
|
65
|
+
warnings.warn(
|
|
66
|
+
f"Objective '{self.objective}' is typically for regression/ranking but used in PerpetualClassifier. Consider 'LogLoss'."
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# In classification, the labels (classes_) are set in the base fit.
|
|
70
|
+
return super().fit(X, y, sample_weight=sample_weight, **fit_params)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class PerpetualRegressor(PerpetualBooster, RegressorMixin):
|
|
74
|
+
"""
|
|
75
|
+
A scikit-learn compatible regressor based on PerpetualBooster.
|
|
76
|
+
Uses 'SquaredLoss' as the default objective.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
def __init__(
|
|
80
|
+
self,
|
|
81
|
+
*,
|
|
82
|
+
objective: Union[
|
|
83
|
+
str, Tuple[FunctionType, FunctionType, FunctionType]
|
|
84
|
+
] = "SquaredLoss",
|
|
85
|
+
budget: float = 0.5,
|
|
86
|
+
num_threads: Optional[int] = None,
|
|
87
|
+
monotone_constraints: Union[Dict[Any, int], None] = None,
|
|
88
|
+
# ... other parameters ...
|
|
89
|
+
max_bin: int = 256,
|
|
90
|
+
max_cat: int = 1000,
|
|
91
|
+
**kwargs,
|
|
92
|
+
):
|
|
93
|
+
# Enforce or warn about regression objectives
|
|
94
|
+
valid_objectives = {
|
|
95
|
+
"SquaredLoss",
|
|
96
|
+
"QuantileLoss",
|
|
97
|
+
"HuberLoss",
|
|
98
|
+
"AdaptiveHuberLoss",
|
|
99
|
+
}
|
|
100
|
+
if isinstance(objective, str) and objective not in valid_objectives:
|
|
101
|
+
pass # Allow for custom string or tuple objective
|
|
102
|
+
|
|
103
|
+
super().__init__(
|
|
104
|
+
objective=objective,
|
|
105
|
+
budget=budget,
|
|
106
|
+
num_threads=num_threads,
|
|
107
|
+
monotone_constraints=monotone_constraints,
|
|
108
|
+
# ... pass all other parameters ...
|
|
109
|
+
max_bin=max_bin,
|
|
110
|
+
max_cat=max_cat,
|
|
111
|
+
**kwargs,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
def fit(self, X, y, sample_weight=None, **fit_params) -> Self:
|
|
115
|
+
"""A wrapper for the base fit method."""
|
|
116
|
+
# For regression, we typically enforce len(self.classes_) == 0 after fit
|
|
117
|
+
if isinstance(self.objective, str) and self.objective not in [
|
|
118
|
+
"SquaredLoss",
|
|
119
|
+
"QuantileLoss",
|
|
120
|
+
"HuberLoss",
|
|
121
|
+
"AdaptiveHuberLoss",
|
|
122
|
+
]:
|
|
123
|
+
warnings.warn(
|
|
124
|
+
f"Objective '{self.objective}' may not be suitable for PerpetualRegressor. Consider 'SquaredLoss' or a quantile/huber loss."
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
return super().fit(X, y, sample_weight=sample_weight, **fit_params)
|
|
128
|
+
|
|
129
|
+
def score(self, X, y, sample_weight=None):
|
|
130
|
+
"""Returns the coefficient of determination ($R^2$) of the prediction."""
|
|
131
|
+
preds = self.predict(X)
|
|
132
|
+
return r2_score(y, preds, sample_weight=sample_weight)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class PerpetualRanker(
|
|
136
|
+
PerpetualBooster, RegressorMixin
|
|
137
|
+
): # Ranking models sometimes inherit from RegressorMixin for compatibility
|
|
138
|
+
"""
|
|
139
|
+
A scikit-learn compatible ranker based on PerpetualBooster.
|
|
140
|
+
Uses 'ListNetLoss' as the default objective.
|
|
141
|
+
Requires the 'group' parameter to be passed to fit.
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
def __init__(
|
|
145
|
+
self,
|
|
146
|
+
*,
|
|
147
|
+
objective: Union[
|
|
148
|
+
str, Tuple[FunctionType, FunctionType, FunctionType]
|
|
149
|
+
] = "ListNetLoss",
|
|
150
|
+
budget: float = 0.5,
|
|
151
|
+
num_threads: Optional[int] = None,
|
|
152
|
+
monotone_constraints: Union[Dict[Any, int], None] = None,
|
|
153
|
+
# ... other parameters ...
|
|
154
|
+
max_bin: int = 256,
|
|
155
|
+
max_cat: int = 1000,
|
|
156
|
+
**kwargs,
|
|
157
|
+
):
|
|
158
|
+
if isinstance(objective, str) and objective not in {"ListNetLoss"}:
|
|
159
|
+
warnings.warn(
|
|
160
|
+
f"Objective '{objective}' may not be suitable for PerpetualRanker. Consider 'ListNetLoss'."
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
super().__init__(
|
|
164
|
+
objective=objective,
|
|
165
|
+
budget=budget,
|
|
166
|
+
num_threads=num_threads,
|
|
167
|
+
monotone_constraints=monotone_constraints,
|
|
168
|
+
# ... pass all other parameters ...
|
|
169
|
+
max_bin=max_bin,
|
|
170
|
+
max_cat=max_cat,
|
|
171
|
+
**kwargs,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
def fit(self, X, y, group=None, sample_weight=None, **fit_params) -> Self:
|
|
175
|
+
"""
|
|
176
|
+
Fit the ranker. Requires the 'group' parameter.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
X: Training data.
|
|
180
|
+
y: Target relevance scores.
|
|
181
|
+
group: Group lengths to use for a ranking objective. (Required for ListNetLoss).
|
|
182
|
+
sample_weight: Instance weights.
|
|
183
|
+
"""
|
|
184
|
+
if (
|
|
185
|
+
group is None
|
|
186
|
+
and isinstance(self.objective, str)
|
|
187
|
+
and self.objective == "ListNetLoss"
|
|
188
|
+
):
|
|
189
|
+
raise ValueError(
|
|
190
|
+
"The 'group' parameter must be provided when using the 'ListNetLoss' objective for ranking."
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
return super().fit(X, y, sample_weight=sample_weight, group=group, **fit_params)
|
perpetual/utils.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import numpy as np
|
|
3
2
|
from typing import Dict, Iterable, List, Optional, Tuple
|
|
4
3
|
|
|
4
|
+
import numpy as np
|
|
5
5
|
|
|
6
6
|
logger = logging.getLogger(__name__)
|
|
7
7
|
|
|
@@ -32,7 +32,7 @@ def type_series(y):
|
|
|
32
32
|
return ""
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
def convert_input_array(x, objective, is_target=False) -> np.ndarray:
|
|
35
|
+
def convert_input_array(x, objective, is_target=False, is_int=False) -> np.ndarray:
|
|
36
36
|
classes_ = []
|
|
37
37
|
|
|
38
38
|
if type(x).__module__.split(".")[0] == "numpy":
|
|
@@ -55,7 +55,10 @@ def convert_input_array(x, objective, is_target=False) -> np.ndarray:
|
|
|
55
55
|
if len(classes_) > 2:
|
|
56
56
|
x_ = np.squeeze(np.eye(len(classes_))[x_index])
|
|
57
57
|
|
|
58
|
-
if not np.issubdtype(x_.dtype, "
|
|
58
|
+
if is_int and not np.issubdtype(x_.dtype, "uint64"):
|
|
59
|
+
x_ = x_.astype(dtype="uint64", copy=False)
|
|
60
|
+
|
|
61
|
+
if not is_int and not np.issubdtype(x_.dtype, "float64"):
|
|
59
62
|
x_ = x_.astype(dtype="float64", copy=False)
|
|
60
63
|
|
|
61
64
|
if len(x_.shape) == 2:
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: perpetual
|
|
3
|
+
Version: 0.10.0
|
|
4
|
+
Classifier: Programming Language :: Rust
|
|
5
|
+
Classifier: Programming Language :: Python :: 3
|
|
6
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
7
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
11
|
+
Requires-Dist: numpy
|
|
12
|
+
Requires-Dist: typing-extensions
|
|
13
|
+
Requires-Dist: black ; extra == 'dev'
|
|
14
|
+
Requires-Dist: pandas ; extra == 'dev'
|
|
15
|
+
Requires-Dist: polars ; extra == 'dev'
|
|
16
|
+
Requires-Dist: pyarrow ; extra == 'dev'
|
|
17
|
+
Requires-Dist: maturin ; extra == 'dev'
|
|
18
|
+
Requires-Dist: pytest ; extra == 'dev'
|
|
19
|
+
Requires-Dist: seaborn ; extra == 'dev'
|
|
20
|
+
Requires-Dist: scikit-learn ; extra == 'dev'
|
|
21
|
+
Requires-Dist: mkdocs-material ; extra == 'dev'
|
|
22
|
+
Requires-Dist: mkdocstrings[python] ; extra == 'dev'
|
|
23
|
+
Requires-Dist: mkdocs-autorefs ; extra == 'dev'
|
|
24
|
+
Requires-Dist: ruff ; extra == 'dev'
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Summary: A self-generalizing gradient boosting machine that doesn't need hyperparameter optimization
|
|
28
|
+
Keywords: rust,perpetual,machine learning,tree model,decision tree,gradient boosted decision tree,gradient boosting machine
|
|
29
|
+
Home-Page: https://perpetual-ml.com
|
|
30
|
+
Author-email: Mutlu Simsek <mutlusims3k@gmail.com>, Serkan Korkmaz <serkor1@duck.com>, Pieter Pel <pelpieter@gmail.com>
|
|
31
|
+
Requires-Python: >=3.9
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
perpetual-0.10.0.dist-info/METADATA,sha256=jz1ubQqMaGY-CjOIcUJdfzP2sTlikFLMShWkEXlG10s,1403
|
|
2
|
+
perpetual-0.10.0.dist-info/WHEEL,sha256=K7foeVF-x_RZTycPKa1uE1HH2bAWe3AiJbihrXn5Hhc,96
|
|
3
|
+
perpetual-0.10.0.dist-info/licenses/LICENSE,sha256=gcuuhKKc5-dwvyvHsXjlC9oM6N5gZ6umYbC8ewW1Yvg,35821
|
|
4
|
+
perpetual/__init__.py,sha256=V0RhghaG0CuKxKrzYUBYqrf7Drb-gjmznsbz9KT12lk,122
|
|
5
|
+
perpetual/booster.py,sha256=ZPymfG5L1M8XTld1H4af6k61T3eHPrbToTVFDHH29Ro,53161
|
|
6
|
+
perpetual/data.py,sha256=vhjWEc_ESYWoaczz0GkUPtfS0iRSKdVZSrCkQn8yLPw,630
|
|
7
|
+
perpetual/perpetual.cp313-win_amd64.pyd,sha256=prs0wdhYeZdZ6x8ypQOGv5GiRRC37jJsp0RhpBVMJQw,1761792
|
|
8
|
+
perpetual/serialize.py,sha256=FeW4JsUFVsrft9N7gz-ebn5mXvDv4LiJC2sgBEeGxYo,1957
|
|
9
|
+
perpetual/sklearn.py,sha256=5d1clRslX4-Kt8DwE-Jht9xZ01VeSNnz_ZmXWvkg8lc,7203
|
|
10
|
+
perpetual/types.py,sha256=idZNsDErNTur_rJ_5Co8Pb6fik-AUn9lkrXmjbQJVX0,3381
|
|
11
|
+
perpetual/utils.py,sha256=IiQtM6v7Ve4GNuKhjiAHuGal0QPoYG7CI55q_Ci3yd4,7627
|
|
12
|
+
perpetual-0.10.0.dist-info/RECORD,,
|
|
@@ -1,166 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: perpetual
|
|
3
|
-
Version: 0.9.5
|
|
4
|
-
Classifier: Programming Language :: Rust
|
|
5
|
-
Classifier: Programming Language :: Python :: 3
|
|
6
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
7
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
8
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
11
|
-
Requires-Dist: numpy
|
|
12
|
-
Requires-Dist: typing-extensions
|
|
13
|
-
Requires-Dist: black ; extra == 'dev'
|
|
14
|
-
Requires-Dist: pandas ; extra == 'dev'
|
|
15
|
-
Requires-Dist: polars ; extra == 'dev'
|
|
16
|
-
Requires-Dist: pyarrow ; extra == 'dev'
|
|
17
|
-
Requires-Dist: maturin ; extra == 'dev'
|
|
18
|
-
Requires-Dist: pytest ; extra == 'dev'
|
|
19
|
-
Requires-Dist: seaborn ; extra == 'dev'
|
|
20
|
-
Requires-Dist: scikit-learn ; extra == 'dev'
|
|
21
|
-
Requires-Dist: mkdocs-material ; extra == 'dev'
|
|
22
|
-
Requires-Dist: mkdocstrings[python] ; extra == 'dev'
|
|
23
|
-
Requires-Dist: mkdocs-autorefs ; extra == 'dev'
|
|
24
|
-
Requires-Dist: ruff ; extra == 'dev'
|
|
25
|
-
Provides-Extra: dev
|
|
26
|
-
License-File: LICENSE
|
|
27
|
-
Summary: A self-generalizing gradient boosting machine that doesn't need hyperparameter optimization
|
|
28
|
-
Keywords: rust,perpetual,machine learning,tree model,decision tree,gradient boosted decision tree,gradient boosting machine
|
|
29
|
-
Home-Page: https://perpetual-ml.com
|
|
30
|
-
Author: Mutlu Simsek
|
|
31
|
-
Author-email: Mutlu Simsek <msimsek@perpetual-ml.com>
|
|
32
|
-
Requires-Python: >=3.9
|
|
33
|
-
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
34
|
-
Project-URL: Source Code, https://github.com/perpetual-ml/perpetual
|
|
35
|
-
|
|
36
|
-
<p align="center">
|
|
37
|
-
<img height="120" src="https://github.com/perpetual-ml/perpetual/raw/main/resources/perp_logo.png">
|
|
38
|
-
</p>
|
|
39
|
-
|
|
40
|
-
<div align="center">
|
|
41
|
-
|
|
42
|
-
[](https://pypi.org/project/perpetual)
|
|
43
|
-
[](https://pypi.org/project/perpetual)
|
|
44
|
-
[](https://crates.io/crates/perpetual)
|
|
45
|
-
[](https://discord.gg/AyUK7rr6wy)
|
|
46
|
-

|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
</div>
|
|
50
|
-
|
|
51
|
-
# Perpetual
|
|
52
|
-
|
|
53
|
-
PerpetualBooster is a gradient boosting machine (GBM) algorithm that doesn't need hyperparameter optimization unlike other GBM algorithms. Similar to AutoML libraries, it has a `budget` parameter. Increasing the `budget` parameter increases the predictive power of the algorithm and gives better results on unseen data. Start with a small budget (e.g. 0.5) and increase it (e.g. 1.0) once you are confident with your features. If you don't see any improvement with further increasing the `budget`, it means that you are already extracting the most predictive power out of your data.
|
|
54
|
-
|
|
55
|
-
## Usage
|
|
56
|
-
|
|
57
|
-
You can use the algorithm like in the example below. Check examples folders for both Rust and Python.
|
|
58
|
-
|
|
59
|
-
```python
|
|
60
|
-
from perpetual import PerpetualBooster
|
|
61
|
-
|
|
62
|
-
model = PerpetualBooster(objective="SquaredLoss", budget=0.5)
|
|
63
|
-
model.fit(X, y)
|
|
64
|
-
```
|
|
65
|
-
|
|
66
|
-
## Documentation
|
|
67
|
-
|
|
68
|
-
Documentation for the Python API can be found [here](https://perpetual-ml.github.io/perpetual) and for the Rust API [here](https://docs.rs/perpetual/latest/perpetual/).
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
## Benchmark
|
|
72
|
-
|
|
73
|
-
### PerpetualBooster vs. Optuna + LightGBM
|
|
74
|
-
|
|
75
|
-
Hyperparameter optimization usually takes 100 iterations with plain GBM algorithms. PerpetualBooster achieves the same accuracy in a single run. Thus, it achieves up to 100x speed-up at the same accuracy with different `budget` levels and with different datasets.
|
|
76
|
-
|
|
77
|
-
The following table summarizes the results for the [California Housing](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html) dataset (regression):
|
|
78
|
-
|
|
79
|
-
| Perpetual budget | LightGBM n_estimators | Perpetual mse | LightGBM mse | Speed-up wall time | Speed-up cpu time |
|
|
80
|
-
| ---------------- | --------------------- | ------------- | ------------ | ------------------ | ----------------- |
|
|
81
|
-
| 1.0 | 100 | 0.192 | 0.192 | 54x | 56x |
|
|
82
|
-
| 1.5 | 300 | 0.188 | 0.188 | 59x | 58x |
|
|
83
|
-
| 2.1 | 1000 | 0.185 | 0.186 | 42x | 41x |
|
|
84
|
-
|
|
85
|
-
The following table summarizes the results for the [Cover Types](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_covtype.html) dataset (classification):
|
|
86
|
-
|
|
87
|
-
| Perpetual budget | LightGBM n_estimators | Perpetual log loss | LightGBM log loss | Speed-up wall time | Speed-up cpu time |
|
|
88
|
-
| ---------------- | --------------------- | ------------------ | ----------------- | ------------------ | ----------------- |
|
|
89
|
-
| 0.9 | 100 | 0.091 | 0.084 | 72x | 78x |
|
|
90
|
-
|
|
91
|
-
The results can be reproduced using the scripts in the [examples](./python-package/examples) folder.
|
|
92
|
-
|
|
93
|
-
### PerpetualBooster vs. AutoGluon
|
|
94
|
-
|
|
95
|
-
PerpetualBooster is a GBM but behaves like AutoML so it is benchmarked also against AutoGluon (v1.2, best quality preset), the current leader in [AutoML benchmark](https://automlbenchmark.streamlit.app/cd_diagram). Top 10 datasets with the most number of rows are selected from [OpenML datasets](https://www.openml.org/) for both regression and classification tasks.
|
|
96
|
-
|
|
97
|
-
The results are summarized in the following table for regression tasks:
|
|
98
|
-
|
|
99
|
-
| OpenML Task | Perpetual Training Duration | Perpetual Inference Duration | Perpetual RMSE | AutoGluon Training Duration | AutoGluon Inference Duration | AutoGluon RMSE |
|
|
100
|
-
| -------------------------------------------------------- | ----- | ----- | ------------------- | -------- | ------ | ------------------ |
|
|
101
|
-
| [Airlines_DepDelay_10M](https://www.openml.org/t/359929) | 518 | 11.3 | 29.0 | 520 | 30.9 | <ins> 28.8 </ins> |
|
|
102
|
-
| [bates_regr_100](https://www.openml.org/t/361940) | 3421 | 15.1 | <ins> 1.084 </ins> | OOM | OOM | OOM |
|
|
103
|
-
| [BNG(libras_move)](https://www.openml.org/t/7327) | 1956 | 4.2 | <ins> 2.51 </ins> | 1922 | 97.6 | 2.53 |
|
|
104
|
-
| [BNG(satellite_image)](https://www.openml.org/t/7326) | 334 | 1.6 | 0.731 | 337 | 10.0 | <ins> 0.721 </ins> |
|
|
105
|
-
| [COMET_MC](https://www.openml.org/t/14949) | 44 | 1.0 | <ins> 0.0615 </ins> | 47 | 5.0 | 0.0662 |
|
|
106
|
-
| [friedman1](https://www.openml.org/t/361939) | 275 | 4.2 | <ins> 1.047 </ins> | 278 | 5.1 | 1.487 |
|
|
107
|
-
| [poker](https://www.openml.org/t/10102) | 38 | 0.6 | <ins> 0.256 </ins> | 41 | 1.2 | 0.722 |
|
|
108
|
-
| [subset_higgs](https://www.openml.org/t/361955) | 868 | 10.6 | <ins> 0.420 </ins> | 870 | 24.5 | 0.421 |
|
|
109
|
-
| [BNG(autoHorse)](https://www.openml.org/t/7319) | 107 | 1.1 | <ins> 19.0 </ins> | 107 | 3.2 | 20.5 |
|
|
110
|
-
| [BNG(pbc)](https://www.openml.org/t/7318) | 48 | 0.6 | <ins> 836.5 </ins> | 51 | 0.2 | 957.1 |
|
|
111
|
-
| average | 465 | 3.9 | - | 464 | 19.7 | - |
|
|
112
|
-
|
|
113
|
-
PerpetualBooster outperformed AutoGluon on 8 out of 10 regression tasks, training equally fast and inferring 5.1x faster.
|
|
114
|
-
|
|
115
|
-
The results are summarized in the following table for classification tasks:
|
|
116
|
-
|
|
117
|
-
| OpenML Task | Perpetual Training Duration | Perpetual Inference Duration | Perpetual AUC | AutoGluon Training Duration | AutoGluon Inference Duration | AutoGluon AUC |
|
|
118
|
-
| -------------------------------------------------------- | ------- | ------ | ------------------- | -------- | ------ | ------------------ |
|
|
119
|
-
| [BNG(spambase)](https://www.openml.org/t/146163) | 70.1 | 2.1 | <ins> 0.671 </ins> | 73.1 | 3.7 | 0.669 |
|
|
120
|
-
| [BNG(trains)](https://www.openml.org/t/208) | 89.5 | 1.7 | <ins> 0.996 </ins> | 106.4 | 2.4 | 0.994 |
|
|
121
|
-
| [breast](https://www.openml.org/t/361942) | 13699.3 | 97.7 | <ins> 0.991 </ins> | 13330.7 | 79.7 | 0.949 |
|
|
122
|
-
| [Click_prediction_small](https://www.openml.org/t/7291) | 89.1 | 1.0 | <ins> 0.749 </ins> | 101.0 | 2.8 | 0.703 |
|
|
123
|
-
| [colon](https://www.openml.org/t/361938) | 12435.2 | 126.7 | <ins> 0.997 </ins> | 12356.2 | 152.3 | 0.997 |
|
|
124
|
-
| [Higgs](https://www.openml.org/t/362113) | 3485.3 | 40.9 | <ins> 0.843 </ins> | 3501.4 | 67.9 | 0.816 |
|
|
125
|
-
| [SEA(50000)](https://www.openml.org/t/230) | 21.9 | 0.2 | <ins> 0.936 </ins> | 25.6 | 0.5 | 0.935 |
|
|
126
|
-
| [sf-police-incidents](https://www.openml.org/t/359994) | 85.8 | 1.5 | <ins> 0.687 </ins> | 99.4 | 2.8 | 0.659 |
|
|
127
|
-
| [bates_classif_100](https://www.openml.org/t/361941) | 11152.8 | 50.0 | <ins> 0.864 </ins> | OOM | OOM | OOM |
|
|
128
|
-
| [prostate](https://www.openml.org/t/361945) | 13699.9 | 79.8 | <ins> 0.987 </ins> | OOM | OOM | OOM |
|
|
129
|
-
| average | 3747.0 | 34.0 | - | 3699.2 | 39.0 | - |
|
|
130
|
-
|
|
131
|
-
PerpetualBooster outperformed AutoGluon on 10 out of 10 classification tasks, training equally fast and inferring 1.1x faster.
|
|
132
|
-
|
|
133
|
-
PerpetualBooster demonstrates greater robustness compared to AutoGluon, successfully training on all 20 tasks, whereas AutoGluon encountered out-of-memory errors on 3 of those tasks.
|
|
134
|
-
|
|
135
|
-
The results can be reproduced using the automlbenchmark fork [here](https://github.com/deadsoul44/automlbenchmark).
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
## Installation
|
|
140
|
-
|
|
141
|
-
The package can be installed directly from [pypi](https://pypi.org/project/perpetual):
|
|
142
|
-
|
|
143
|
-
```shell
|
|
144
|
-
pip install perpetual
|
|
145
|
-
```
|
|
146
|
-
|
|
147
|
-
Using [conda-forge](https://anaconda.org/conda-forge/perpetual):
|
|
148
|
-
|
|
149
|
-
```shell
|
|
150
|
-
conda install conda-forge::perpetual
|
|
151
|
-
```
|
|
152
|
-
|
|
153
|
-
To use in a Rust project and to get the package from [crates.io](https://crates.io/crates/perpetual):
|
|
154
|
-
|
|
155
|
-
```shell
|
|
156
|
-
cargo add perpetual
|
|
157
|
-
```
|
|
158
|
-
|
|
159
|
-
## Contribution
|
|
160
|
-
|
|
161
|
-
Contributions are welcome. Check CONTRIBUTING.md for the guideline.
|
|
162
|
-
|
|
163
|
-
## Paper
|
|
164
|
-
|
|
165
|
-
PerpetualBooster prevents overfitting with a generalization algorithm. The paper is work-in-progress to explain how the algorithm works. Check our [blog post](https://perpetual-ml.com/blog/how-perpetual-works) for a high level introduction to the algorithm.
|
|
166
|
-
|
perpetual-0.9.5.dist-info/RECORD
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
perpetual-0.9.5.dist-info/METADATA,sha256=Sn-DvsBa-8pKP2NRhJMlVvh_fq0aeF19nI7NgsW7C6s,10724
|
|
2
|
-
perpetual-0.9.5.dist-info/WHEEL,sha256=oXe_QNnB5QbkkMcbfZh2d88Kje6edNs5JzpWke0-klE,96
|
|
3
|
-
perpetual-0.9.5.dist-info/licenses/LICENSE,sha256=gcuuhKKc5-dwvyvHsXjlC9oM6N5gZ6umYbC8ewW1Yvg,35821
|
|
4
|
-
perpetual/__init__.py,sha256=V0RhghaG0CuKxKrzYUBYqrf7Drb-gjmznsbz9KT12lk,122
|
|
5
|
-
perpetual/booster.py,sha256=vyZxchCqvPV79At-yoOVMLvCGdv8xISk2wq_Yu90DrI,50929
|
|
6
|
-
perpetual/data.py,sha256=vhjWEc_ESYWoaczz0GkUPtfS0iRSKdVZSrCkQn8yLPw,630
|
|
7
|
-
perpetual/perpetual.cp313-win_amd64.pyd,sha256=IhPEOjPZMQJ1illSQTWWegGF3LoNImBc0ZMYDZeDho0,1722368
|
|
8
|
-
perpetual/serialize.py,sha256=FeW4JsUFVsrft9N7gz-ebn5mXvDv4LiJC2sgBEeGxYo,1957
|
|
9
|
-
perpetual/types.py,sha256=idZNsDErNTur_rJ_5Co8Pb6fik-AUn9lkrXmjbQJVX0,3381
|
|
10
|
-
perpetual/utils.py,sha256=2ifo-9OXaeZBevSo0HKN4uKVy5qT4LqRAchrtZa9yMM,7486
|
|
11
|
-
perpetual-0.9.5.dist-info/RECORD,,
|
|
File without changes
|