perpetual 0.9.5__cp310-cp310-win_amd64.whl → 0.10.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of perpetual might be problematic. Click here for more details.

perpetual/booster.py CHANGED
@@ -1,16 +1,19 @@
1
- import json
2
1
  import inspect
2
+ import json
3
3
  import warnings
4
- from typing_extensions import Self
4
+ from types import FunctionType
5
5
  from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union, cast
6
6
 
7
7
  import numpy as np
8
-
9
- from perpetual.perpetual import PerpetualBooster as CratePerpetualBooster # type: ignore
10
- from perpetual.perpetual import MultiOutputBooster as CrateMultiOutputBooster # type: ignore
8
+ from perpetual.data import Node
9
+ from perpetual.perpetual import (
10
+ MultiOutputBooster as CrateMultiOutputBooster, # type: ignore
11
+ )
12
+ from perpetual.perpetual import (
13
+ PerpetualBooster as CratePerpetualBooster, # type: ignore
14
+ )
11
15
  from perpetual.serialize import BaseSerializer, ObjectSerializer
12
16
  from perpetual.types import BoosterType, MultiOutputBoosterType
13
- from perpetual.data import Node
14
17
  from perpetual.utils import (
15
18
  CONTRIBUTION_METHODS,
16
19
  convert_input_array,
@@ -18,6 +21,7 @@ from perpetual.utils import (
18
21
  transform_input_frame,
19
22
  type_df,
20
23
  )
24
+ from typing_extensions import Self
21
25
 
22
26
 
23
27
  class PerpetualBooster:
@@ -37,7 +41,9 @@ class PerpetualBooster:
37
41
  def __init__(
38
42
  self,
39
43
  *,
40
- objective: str = "LogLoss",
44
+ objective: Union[
45
+ str, Tuple[FunctionType, FunctionType, FunctionType]
46
+ ] = "LogLoss",
41
47
  budget: float = 0.5,
42
48
  num_threads: Optional[int] = None,
43
49
  monotone_constraints: Union[Dict[Any, int], None] = None,
@@ -68,6 +74,10 @@ class PerpetualBooster:
68
74
  "QuantileLoss" to use quantile error (regression),
69
75
  "HuberLoss" to use huber error (regression),
70
76
  "AdaptiveHuberLoss" to use adaptive huber error (regression).
77
+ "ListNetLoss" to use ListNet loss (ranking).
78
+ custom objective in the form of (grad, hess, init)
79
+ where grad and hess are functions that take (y, pred, sample_weight, group) and return the gradient and hessian
80
+ init is a function that takes (y, sample_weight, group) and returns the initial prediction value.
71
81
  Defaults to "LogLoss".
72
82
  budget (float, optional): a positive number for fitting budget. Increasing this number will more
73
83
  likely result in more boosting rounds and more increased predictive power.
@@ -165,7 +175,16 @@ class PerpetualBooster:
165
175
  {} if monotone_constraints is None else monotone_constraints
166
176
  )
167
177
 
168
- self.objective = objective
178
+ if isinstance(objective, str):
179
+ self.objective = objective
180
+ self.loss = None
181
+ self.grad = None
182
+ self.init = None
183
+ else:
184
+ self.objective = None
185
+ self.loss = objective[0]
186
+ self.grad = objective[1]
187
+ self.init = objective[2]
169
188
  self.budget = budget
170
189
  self.num_threads = num_threads
171
190
  self.monotone_constraints = monotone_constraints_
@@ -207,10 +226,13 @@ class PerpetualBooster:
207
226
  iteration_limit=self.iteration_limit,
208
227
  memory_limit=self.memory_limit,
209
228
  stopping_rounds=self.stopping_rounds,
229
+ loss=self.loss,
230
+ grad=self.grad,
231
+ init=self.init,
210
232
  )
211
233
  self.booster = cast(BoosterType, booster)
212
234
 
213
- def fit(self, X, y, sample_weight=None) -> Self:
235
+ def fit(self, X, y, sample_weight=None, group=None) -> Self:
214
236
  """Fit the gradient booster on a provided dataset.
215
237
 
216
238
  Args:
@@ -220,11 +242,19 @@ class PerpetualBooster:
220
242
  sample_weight (Union[ArrayLike, None], optional): Instance weights to use when
221
243
  training the model. If None is passed, a weight of 1 will be used for every record.
222
244
  Defaults to None.
245
+ group (Union[ArrayLike, None], optional): Group lengths to use for a ranking objective.
246
+ If None is passes, all items are assumed to be in the same group.
247
+ Defaults to None.
223
248
  """
224
249
 
225
- features_, flat_data, rows, cols, categorical_features_, cat_mapping = (
226
- convert_input_frame(X, self.categorical_features, self.max_cat)
227
- )
250
+ (
251
+ features_,
252
+ flat_data,
253
+ rows,
254
+ cols,
255
+ categorical_features_,
256
+ cat_mapping,
257
+ ) = convert_input_frame(X, self.categorical_features, self.max_cat)
228
258
  self.n_features_ = cols
229
259
  self.cat_mapping = cat_mapping
230
260
  self.feature_names_in_ = features_
@@ -237,6 +267,11 @@ class PerpetualBooster:
237
267
  else:
238
268
  sample_weight_, _ = convert_input_array(sample_weight, self.objective)
239
269
 
270
+ if group is None:
271
+ group_ = None
272
+ else:
273
+ group_, _ = convert_input_array(group, self.objective, is_int=True)
274
+
240
275
  # Convert the monotone constraints into the form needed
241
276
  # by the rust code.
242
277
  crate_mc = self._standardize_monotonicity_map(X)
@@ -265,6 +300,9 @@ class PerpetualBooster:
265
300
  iteration_limit=self.iteration_limit,
266
301
  memory_limit=self.memory_limit,
267
302
  stopping_rounds=self.stopping_rounds,
303
+ loss=self.loss,
304
+ grad=self.grad,
305
+ init=self.init,
268
306
  )
269
307
  self.booster = cast(BoosterType, booster)
270
308
  else:
@@ -289,6 +327,9 @@ class PerpetualBooster:
289
327
  iteration_limit=self.iteration_limit,
290
328
  memory_limit=self.memory_limit,
291
329
  stopping_rounds=self.stopping_rounds,
330
+ loss=self.loss,
331
+ grad=self.grad,
332
+ init=self.init,
292
333
  )
293
334
  self.booster = cast(MultiOutputBoosterType, booster)
294
335
 
@@ -308,11 +349,12 @@ class PerpetualBooster:
308
349
  cols=cols,
309
350
  y=y_,
310
351
  sample_weight=sample_weight_, # type: ignore
352
+ group=group_,
311
353
  )
312
354
 
313
355
  return self
314
356
 
315
- def prune(self, X, y, sample_weight=None) -> Self:
357
+ def prune(self, X, y, sample_weight=None, group=None) -> Self:
316
358
  """Prune the gradient booster on a provided dataset.
317
359
 
318
360
  Args:
@@ -322,6 +364,9 @@ class PerpetualBooster:
322
364
  sample_weight (Union[ArrayLike, None], optional): Instance weights to use when
323
365
  training the model. If None is passed, a weight of 1 will be used for every record.
324
366
  Defaults to None.
367
+ group (Union[ArrayLike, None], optional): Group lengths to use for a ranking objective.
368
+ If None is passes, all items are assumed to be in the same group.
369
+ Defaults to None.
325
370
  """
326
371
 
327
372
  _, flat_data, rows, cols = transform_input_frame(X, self.cat_mapping)
@@ -333,18 +378,24 @@ class PerpetualBooster:
333
378
  else:
334
379
  sample_weight_, _ = convert_input_array(sample_weight, self.objective)
335
380
 
381
+ if group is None:
382
+ group_ = None
383
+ else:
384
+ group_, _ = convert_input_array(group, self.objective, is_int=True)
385
+
336
386
  self.booster.prune(
337
387
  flat_data=flat_data,
338
388
  rows=rows,
339
389
  cols=cols,
340
390
  y=y_,
341
391
  sample_weight=sample_weight_, # type: ignore
392
+ group=group_,
342
393
  )
343
394
 
344
395
  return self
345
396
 
346
397
  def calibrate(
347
- self, X_train, y_train, X_cal, y_cal, alpha, sample_weight=None
398
+ self, X_train, y_train, X_cal, y_cal, alpha, sample_weight=None, group=None
348
399
  ) -> Self:
349
400
  """Calibrate the gradient booster on a provided dataset.
350
401
 
@@ -361,6 +412,9 @@ class PerpetualBooster:
361
412
  sample_weight (Union[ArrayLike, None], optional): Instance weights to use when
362
413
  training the model. If None is passed, a weight of 1 will be used for every record.
363
414
  Defaults to None.
415
+ group (Union[ArrayLike, None], optional): Group lengths to use for a ranking objective.
416
+ If None is passes, all items are assumed to be in the same group.
417
+ Defaults to None.
364
418
  """
365
419
 
366
420
  _, flat_data_train, rows_train, cols_train = transform_input_frame(
@@ -391,6 +445,7 @@ class PerpetualBooster:
391
445
  y_cal=y_cal_,
392
446
  alpha=np.array(alpha),
393
447
  sample_weight=sample_weight_, # type: ignore
448
+ group=group,
394
449
  )
395
450
 
396
451
  return self
Binary file
perpetual/sklearn.py ADDED
@@ -0,0 +1,193 @@
1
+ import warnings
2
+ from types import FunctionType
3
+ from typing import Any, Dict, Optional, Tuple, Union
4
+
5
+ from perpetual.booster import PerpetualBooster
6
+ from sklearn.base import ClassifierMixin, RegressorMixin
7
+ from sklearn.metrics import accuracy_score, r2_score
8
+ from typing_extensions import Self
9
+
10
+
11
+ class PerpetualClassifier(PerpetualBooster, ClassifierMixin):
12
+ """
13
+ A scikit-learn compatible classifier based on PerpetualBooster.
14
+ Uses 'LogLoss' as the default objective.
15
+ """
16
+
17
+ # Expose the objective explicitly in the __init__ signature to allow
18
+ # scikit-learn to correctly discover and set it via set_params.
19
+ def __init__(
20
+ self,
21
+ *,
22
+ objective: Union[
23
+ str, Tuple[FunctionType, FunctionType, FunctionType]
24
+ ] = "LogLoss",
25
+ budget: float = 0.5,
26
+ num_threads: Optional[int] = None,
27
+ monotone_constraints: Union[Dict[Any, int], None] = None,
28
+ # ... other parameters ...
29
+ max_bin: int = 256,
30
+ max_cat: int = 1000,
31
+ # Capture all parameters in a way that BaseEstimator can handle
32
+ **kwargs,
33
+ ):
34
+ # Ensure the objective is one of the valid classification objectives
35
+ valid_objectives = {
36
+ "LogLoss"
37
+ } # Assuming only LogLoss for classification for simplicity
38
+ if isinstance(objective, str) and objective not in valid_objectives:
39
+ # Custom objectives are allowed via the tuple form
40
+ pass
41
+
42
+ super().__init__(
43
+ objective=objective,
44
+ budget=budget,
45
+ num_threads=num_threads,
46
+ monotone_constraints=monotone_constraints,
47
+ # ... pass all other parameters ...
48
+ max_bin=max_bin,
49
+ max_cat=max_cat,
50
+ **kwargs, # Catch-all for any other parameters passed by user or set_params
51
+ )
52
+
53
+ # fit, predict, predict_proba, and predict_log_proba are inherited
54
+ # and properly adapted in PerpetualBooster.
55
+
56
+ def score(self, X, y, sample_weight=None):
57
+ """Returns the mean accuracy on the given test data and labels."""
58
+ preds = self.predict(X)
59
+ return accuracy_score(y, preds, sample_weight=sample_weight)
60
+
61
+ def fit(self, X, y, sample_weight=None, **fit_params) -> Self:
62
+ """A wrapper for the base fit method."""
63
+ # Check if objective is appropriate for classification if it's a string
64
+ if isinstance(self.objective, str) and self.objective not in ["LogLoss"]:
65
+ warnings.warn(
66
+ f"Objective '{self.objective}' is typically for regression/ranking but used in PerpetualClassifier. Consider 'LogLoss'."
67
+ )
68
+
69
+ # In classification, the labels (classes_) are set in the base fit.
70
+ return super().fit(X, y, sample_weight=sample_weight, **fit_params)
71
+
72
+
73
+ class PerpetualRegressor(PerpetualBooster, RegressorMixin):
74
+ """
75
+ A scikit-learn compatible regressor based on PerpetualBooster.
76
+ Uses 'SquaredLoss' as the default objective.
77
+ """
78
+
79
+ def __init__(
80
+ self,
81
+ *,
82
+ objective: Union[
83
+ str, Tuple[FunctionType, FunctionType, FunctionType]
84
+ ] = "SquaredLoss",
85
+ budget: float = 0.5,
86
+ num_threads: Optional[int] = None,
87
+ monotone_constraints: Union[Dict[Any, int], None] = None,
88
+ # ... other parameters ...
89
+ max_bin: int = 256,
90
+ max_cat: int = 1000,
91
+ **kwargs,
92
+ ):
93
+ # Enforce or warn about regression objectives
94
+ valid_objectives = {
95
+ "SquaredLoss",
96
+ "QuantileLoss",
97
+ "HuberLoss",
98
+ "AdaptiveHuberLoss",
99
+ }
100
+ if isinstance(objective, str) and objective not in valid_objectives:
101
+ pass # Allow for custom string or tuple objective
102
+
103
+ super().__init__(
104
+ objective=objective,
105
+ budget=budget,
106
+ num_threads=num_threads,
107
+ monotone_constraints=monotone_constraints,
108
+ # ... pass all other parameters ...
109
+ max_bin=max_bin,
110
+ max_cat=max_cat,
111
+ **kwargs,
112
+ )
113
+
114
+ def fit(self, X, y, sample_weight=None, **fit_params) -> Self:
115
+ """A wrapper for the base fit method."""
116
+ # For regression, we typically enforce len(self.classes_) == 0 after fit
117
+ if isinstance(self.objective, str) and self.objective not in [
118
+ "SquaredLoss",
119
+ "QuantileLoss",
120
+ "HuberLoss",
121
+ "AdaptiveHuberLoss",
122
+ ]:
123
+ warnings.warn(
124
+ f"Objective '{self.objective}' may not be suitable for PerpetualRegressor. Consider 'SquaredLoss' or a quantile/huber loss."
125
+ )
126
+
127
+ return super().fit(X, y, sample_weight=sample_weight, **fit_params)
128
+
129
+ def score(self, X, y, sample_weight=None):
130
+ """Returns the coefficient of determination ($R^2$) of the prediction."""
131
+ preds = self.predict(X)
132
+ return r2_score(y, preds, sample_weight=sample_weight)
133
+
134
+
135
+ class PerpetualRanker(
136
+ PerpetualBooster, RegressorMixin
137
+ ): # Ranking models sometimes inherit from RegressorMixin for compatibility
138
+ """
139
+ A scikit-learn compatible ranker based on PerpetualBooster.
140
+ Uses 'ListNetLoss' as the default objective.
141
+ Requires the 'group' parameter to be passed to fit.
142
+ """
143
+
144
+ def __init__(
145
+ self,
146
+ *,
147
+ objective: Union[
148
+ str, Tuple[FunctionType, FunctionType, FunctionType]
149
+ ] = "ListNetLoss",
150
+ budget: float = 0.5,
151
+ num_threads: Optional[int] = None,
152
+ monotone_constraints: Union[Dict[Any, int], None] = None,
153
+ # ... other parameters ...
154
+ max_bin: int = 256,
155
+ max_cat: int = 1000,
156
+ **kwargs,
157
+ ):
158
+ if isinstance(objective, str) and objective not in {"ListNetLoss"}:
159
+ warnings.warn(
160
+ f"Objective '{objective}' may not be suitable for PerpetualRanker. Consider 'ListNetLoss'."
161
+ )
162
+
163
+ super().__init__(
164
+ objective=objective,
165
+ budget=budget,
166
+ num_threads=num_threads,
167
+ monotone_constraints=monotone_constraints,
168
+ # ... pass all other parameters ...
169
+ max_bin=max_bin,
170
+ max_cat=max_cat,
171
+ **kwargs,
172
+ )
173
+
174
+ def fit(self, X, y, group=None, sample_weight=None, **fit_params) -> Self:
175
+ """
176
+ Fit the ranker. Requires the 'group' parameter.
177
+
178
+ Args:
179
+ X: Training data.
180
+ y: Target relevance scores.
181
+ group: Group lengths to use for a ranking objective. (Required for ListNetLoss).
182
+ sample_weight: Instance weights.
183
+ """
184
+ if (
185
+ group is None
186
+ and isinstance(self.objective, str)
187
+ and self.objective == "ListNetLoss"
188
+ ):
189
+ raise ValueError(
190
+ "The 'group' parameter must be provided when using the 'ListNetLoss' objective for ranking."
191
+ )
192
+
193
+ return super().fit(X, y, sample_weight=sample_weight, group=group, **fit_params)
perpetual/utils.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import logging
2
- import numpy as np
3
2
  from typing import Dict, Iterable, List, Optional, Tuple
4
3
 
4
+ import numpy as np
5
5
 
6
6
  logger = logging.getLogger(__name__)
7
7
 
@@ -32,7 +32,7 @@ def type_series(y):
32
32
  return ""
33
33
 
34
34
 
35
- def convert_input_array(x, objective, is_target=False) -> np.ndarray:
35
+ def convert_input_array(x, objective, is_target=False, is_int=False) -> np.ndarray:
36
36
  classes_ = []
37
37
 
38
38
  if type(x).__module__.split(".")[0] == "numpy":
@@ -55,7 +55,10 @@ def convert_input_array(x, objective, is_target=False) -> np.ndarray:
55
55
  if len(classes_) > 2:
56
56
  x_ = np.squeeze(np.eye(len(classes_))[x_index])
57
57
 
58
- if not np.issubdtype(x_.dtype, "float64"):
58
+ if is_int and not np.issubdtype(x_.dtype, "uint64"):
59
+ x_ = x_.astype(dtype="uint64", copy=False)
60
+
61
+ if not is_int and not np.issubdtype(x_.dtype, "float64"):
59
62
  x_ = x_.astype(dtype="float64", copy=False)
60
63
 
61
64
  if len(x_.shape) == 2:
@@ -0,0 +1,31 @@
1
+ Metadata-Version: 2.4
2
+ Name: perpetual
3
+ Version: 0.10.0
4
+ Classifier: Programming Language :: Rust
5
+ Classifier: Programming Language :: Python :: 3
6
+ Classifier: Programming Language :: Python :: 3.9
7
+ Classifier: Programming Language :: Python :: 3.10
8
+ Classifier: Programming Language :: Python :: 3.11
9
+ Classifier: Programming Language :: Python :: 3.12
10
+ Classifier: Programming Language :: Python :: 3.13
11
+ Requires-Dist: numpy
12
+ Requires-Dist: typing-extensions
13
+ Requires-Dist: black ; extra == 'dev'
14
+ Requires-Dist: pandas ; extra == 'dev'
15
+ Requires-Dist: polars ; extra == 'dev'
16
+ Requires-Dist: pyarrow ; extra == 'dev'
17
+ Requires-Dist: maturin ; extra == 'dev'
18
+ Requires-Dist: pytest ; extra == 'dev'
19
+ Requires-Dist: seaborn ; extra == 'dev'
20
+ Requires-Dist: scikit-learn ; extra == 'dev'
21
+ Requires-Dist: mkdocs-material ; extra == 'dev'
22
+ Requires-Dist: mkdocstrings[python] ; extra == 'dev'
23
+ Requires-Dist: mkdocs-autorefs ; extra == 'dev'
24
+ Requires-Dist: ruff ; extra == 'dev'
25
+ Provides-Extra: dev
26
+ License-File: LICENSE
27
+ Summary: A self-generalizing gradient boosting machine that doesn't need hyperparameter optimization
28
+ Keywords: rust,perpetual,machine learning,tree model,decision tree,gradient boosted decision tree,gradient boosting machine
29
+ Home-Page: https://perpetual-ml.com
30
+ Author-email: Mutlu Simsek <mutlusims3k@gmail.com>, Serkan Korkmaz <serkor1@duck.com>, Pieter Pel <pelpieter@gmail.com>
31
+ Requires-Python: >=3.9
@@ -0,0 +1,12 @@
1
+ perpetual-0.10.0.dist-info/METADATA,sha256=jz1ubQqMaGY-CjOIcUJdfzP2sTlikFLMShWkEXlG10s,1403
2
+ perpetual-0.10.0.dist-info/WHEEL,sha256=Iz7QqxpWQRXToFIDkGspPPKDuV_klwuhW8ziiU5jhR8,96
3
+ perpetual-0.10.0.dist-info/licenses/LICENSE,sha256=gcuuhKKc5-dwvyvHsXjlC9oM6N5gZ6umYbC8ewW1Yvg,35821
4
+ perpetual/__init__.py,sha256=V0RhghaG0CuKxKrzYUBYqrf7Drb-gjmznsbz9KT12lk,122
5
+ perpetual/booster.py,sha256=ZPymfG5L1M8XTld1H4af6k61T3eHPrbToTVFDHH29Ro,53161
6
+ perpetual/data.py,sha256=vhjWEc_ESYWoaczz0GkUPtfS0iRSKdVZSrCkQn8yLPw,630
7
+ perpetual/perpetual.cp310-win_amd64.pyd,sha256=FPo7v23dIgR8sPAW5cLnfuYu4gvpUgARsphV8D_A2g8,1767936
8
+ perpetual/serialize.py,sha256=FeW4JsUFVsrft9N7gz-ebn5mXvDv4LiJC2sgBEeGxYo,1957
9
+ perpetual/sklearn.py,sha256=5d1clRslX4-Kt8DwE-Jht9xZ01VeSNnz_ZmXWvkg8lc,7203
10
+ perpetual/types.py,sha256=idZNsDErNTur_rJ_5Co8Pb6fik-AUn9lkrXmjbQJVX0,3381
11
+ perpetual/utils.py,sha256=IiQtM6v7Ve4GNuKhjiAHuGal0QPoYG7CI55q_Ci3yd4,7627
12
+ perpetual-0.10.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.9.1)
2
+ Generator: maturin (1.9.4)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp310-cp310-win_amd64
@@ -1,166 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: perpetual
3
- Version: 0.9.5
4
- Classifier: Programming Language :: Rust
5
- Classifier: Programming Language :: Python :: 3
6
- Classifier: Programming Language :: Python :: 3.9
7
- Classifier: Programming Language :: Python :: 3.10
8
- Classifier: Programming Language :: Python :: 3.11
9
- Classifier: Programming Language :: Python :: 3.12
10
- Classifier: Programming Language :: Python :: 3.13
11
- Requires-Dist: numpy
12
- Requires-Dist: typing-extensions
13
- Requires-Dist: black ; extra == 'dev'
14
- Requires-Dist: pandas ; extra == 'dev'
15
- Requires-Dist: polars ; extra == 'dev'
16
- Requires-Dist: pyarrow ; extra == 'dev'
17
- Requires-Dist: maturin ; extra == 'dev'
18
- Requires-Dist: pytest ; extra == 'dev'
19
- Requires-Dist: seaborn ; extra == 'dev'
20
- Requires-Dist: scikit-learn ; extra == 'dev'
21
- Requires-Dist: mkdocs-material ; extra == 'dev'
22
- Requires-Dist: mkdocstrings[python] ; extra == 'dev'
23
- Requires-Dist: mkdocs-autorefs ; extra == 'dev'
24
- Requires-Dist: ruff ; extra == 'dev'
25
- Provides-Extra: dev
26
- License-File: LICENSE
27
- Summary: A self-generalizing gradient boosting machine that doesn't need hyperparameter optimization
28
- Keywords: rust,perpetual,machine learning,tree model,decision tree,gradient boosted decision tree,gradient boosting machine
29
- Home-Page: https://perpetual-ml.com
30
- Author: Mutlu Simsek
31
- Author-email: Mutlu Simsek <msimsek@perpetual-ml.com>
32
- Requires-Python: >=3.9
33
- Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
34
- Project-URL: Source Code, https://github.com/perpetual-ml/perpetual
35
-
36
- <p align="center">
37
- <img height="120" src="https://github.com/perpetual-ml/perpetual/raw/main/resources/perp_logo.png">
38
- </p>
39
-
40
- <div align="center">
41
-
42
- [![Python Versions](https://img.shields.io/pypi/pyversions/perpetual.svg?logo=python&logoColor=white)](https://pypi.org/project/perpetual)
43
- [![PyPI Version](https://img.shields.io/pypi/v/perpetual.svg?logo=pypi&logoColor=white)](https://pypi.org/project/perpetual)
44
- [![Crates.io Version](https://img.shields.io/crates/v/perpetual?logo=rust&logoColor=white)](https://crates.io/crates/perpetual)
45
- [![Static Badge](https://img.shields.io/badge/join-discord-blue?logo=discord)](https://discord.gg/AyUK7rr6wy)
46
- ![PyPI - Downloads](https://img.shields.io/pypi/dm/perpetual)
47
-
48
-
49
- </div>
50
-
51
- # Perpetual
52
-
53
- PerpetualBooster is a gradient boosting machine (GBM) algorithm that doesn't need hyperparameter optimization unlike other GBM algorithms. Similar to AutoML libraries, it has a `budget` parameter. Increasing the `budget` parameter increases the predictive power of the algorithm and gives better results on unseen data. Start with a small budget (e.g. 0.5) and increase it (e.g. 1.0) once you are confident with your features. If you don't see any improvement with further increasing the `budget`, it means that you are already extracting the most predictive power out of your data.
54
-
55
- ## Usage
56
-
57
- You can use the algorithm like in the example below. Check examples folders for both Rust and Python.
58
-
59
- ```python
60
- from perpetual import PerpetualBooster
61
-
62
- model = PerpetualBooster(objective="SquaredLoss", budget=0.5)
63
- model.fit(X, y)
64
- ```
65
-
66
- ## Documentation
67
-
68
- Documentation for the Python API can be found [here](https://perpetual-ml.github.io/perpetual) and for the Rust API [here](https://docs.rs/perpetual/latest/perpetual/).
69
-
70
-
71
- ## Benchmark
72
-
73
- ### PerpetualBooster vs. Optuna + LightGBM
74
-
75
- Hyperparameter optimization usually takes 100 iterations with plain GBM algorithms. PerpetualBooster achieves the same accuracy in a single run. Thus, it achieves up to 100x speed-up at the same accuracy with different `budget` levels and with different datasets.
76
-
77
- The following table summarizes the results for the [California Housing](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html) dataset (regression):
78
-
79
- | Perpetual budget | LightGBM n_estimators | Perpetual mse | LightGBM mse | Speed-up wall time | Speed-up cpu time |
80
- | ---------------- | --------------------- | ------------- | ------------ | ------------------ | ----------------- |
81
- | 1.0 | 100 | 0.192 | 0.192 | 54x | 56x |
82
- | 1.5 | 300 | 0.188 | 0.188 | 59x | 58x |
83
- | 2.1 | 1000 | 0.185 | 0.186 | 42x | 41x |
84
-
85
- The following table summarizes the results for the [Cover Types](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_covtype.html) dataset (classification):
86
-
87
- | Perpetual budget | LightGBM n_estimators | Perpetual log loss | LightGBM log loss | Speed-up wall time | Speed-up cpu time |
88
- | ---------------- | --------------------- | ------------------ | ----------------- | ------------------ | ----------------- |
89
- | 0.9 | 100 | 0.091 | 0.084 | 72x | 78x |
90
-
91
- The results can be reproduced using the scripts in the [examples](./python-package/examples) folder.
92
-
93
- ### PerpetualBooster vs. AutoGluon
94
-
95
- PerpetualBooster is a GBM but behaves like AutoML so it is benchmarked also against AutoGluon (v1.2, best quality preset), the current leader in [AutoML benchmark](https://automlbenchmark.streamlit.app/cd_diagram). Top 10 datasets with the most number of rows are selected from [OpenML datasets](https://www.openml.org/) for both regression and classification tasks.
96
-
97
- The results are summarized in the following table for regression tasks:
98
-
99
- | OpenML Task | Perpetual Training Duration | Perpetual Inference Duration | Perpetual RMSE | AutoGluon Training Duration | AutoGluon Inference Duration | AutoGluon RMSE |
100
- | -------------------------------------------------------- | ----- | ----- | ------------------- | -------- | ------ | ------------------ |
101
- | [Airlines_DepDelay_10M](https://www.openml.org/t/359929) | 518 | 11.3 | 29.0 | 520 | 30.9 | <ins> 28.8 </ins> |
102
- | [bates_regr_100](https://www.openml.org/t/361940) | 3421 | 15.1 | <ins> 1.084 </ins> | OOM | OOM | OOM |
103
- | [BNG(libras_move)](https://www.openml.org/t/7327) | 1956 | 4.2 | <ins> 2.51 </ins> | 1922 | 97.6 | 2.53 |
104
- | [BNG(satellite_image)](https://www.openml.org/t/7326) | 334 | 1.6 | 0.731 | 337 | 10.0 | <ins> 0.721 </ins> |
105
- | [COMET_MC](https://www.openml.org/t/14949) | 44 | 1.0 | <ins> 0.0615 </ins> | 47 | 5.0 | 0.0662 |
106
- | [friedman1](https://www.openml.org/t/361939) | 275 | 4.2 | <ins> 1.047 </ins> | 278 | 5.1 | 1.487 |
107
- | [poker](https://www.openml.org/t/10102) | 38 | 0.6 | <ins> 0.256 </ins> | 41 | 1.2 | 0.722 |
108
- | [subset_higgs](https://www.openml.org/t/361955) | 868 | 10.6 | <ins> 0.420 </ins> | 870 | 24.5 | 0.421 |
109
- | [BNG(autoHorse)](https://www.openml.org/t/7319) | 107 | 1.1 | <ins> 19.0 </ins> | 107 | 3.2 | 20.5 |
110
- | [BNG(pbc)](https://www.openml.org/t/7318) | 48 | 0.6 | <ins> 836.5 </ins> | 51 | 0.2 | 957.1 |
111
- | average | 465 | 3.9 | - | 464 | 19.7 | - |
112
-
113
- PerpetualBooster outperformed AutoGluon on 8 out of 10 regression tasks, training equally fast and inferring 5.1x faster.
114
-
115
- The results are summarized in the following table for classification tasks:
116
-
117
- | OpenML Task | Perpetual Training Duration | Perpetual Inference Duration | Perpetual AUC | AutoGluon Training Duration | AutoGluon Inference Duration | AutoGluon AUC |
118
- | -------------------------------------------------------- | ------- | ------ | ------------------- | -------- | ------ | ------------------ |
119
- | [BNG(spambase)](https://www.openml.org/t/146163) | 70.1 | 2.1 | <ins> 0.671 </ins> | 73.1 | 3.7 | 0.669 |
120
- | [BNG(trains)](https://www.openml.org/t/208) | 89.5 | 1.7 | <ins> 0.996 </ins> | 106.4 | 2.4 | 0.994 |
121
- | [breast](https://www.openml.org/t/361942) | 13699.3 | 97.7 | <ins> 0.991 </ins> | 13330.7 | 79.7 | 0.949 |
122
- | [Click_prediction_small](https://www.openml.org/t/7291) | 89.1 | 1.0 | <ins> 0.749 </ins> | 101.0 | 2.8 | 0.703 |
123
- | [colon](https://www.openml.org/t/361938) | 12435.2 | 126.7 | <ins> 0.997 </ins> | 12356.2 | 152.3 | 0.997 |
124
- | [Higgs](https://www.openml.org/t/362113) | 3485.3 | 40.9 | <ins> 0.843 </ins> | 3501.4 | 67.9 | 0.816 |
125
- | [SEA(50000)](https://www.openml.org/t/230) | 21.9 | 0.2 | <ins> 0.936 </ins> | 25.6 | 0.5 | 0.935 |
126
- | [sf-police-incidents](https://www.openml.org/t/359994) | 85.8 | 1.5 | <ins> 0.687 </ins> | 99.4 | 2.8 | 0.659 |
127
- | [bates_classif_100](https://www.openml.org/t/361941) | 11152.8 | 50.0 | <ins> 0.864 </ins> | OOM | OOM | OOM |
128
- | [prostate](https://www.openml.org/t/361945) | 13699.9 | 79.8 | <ins> 0.987 </ins> | OOM | OOM | OOM |
129
- | average | 3747.0 | 34.0 | - | 3699.2 | 39.0 | - |
130
-
131
- PerpetualBooster outperformed AutoGluon on 10 out of 10 classification tasks, training equally fast and inferring 1.1x faster.
132
-
133
- PerpetualBooster demonstrates greater robustness compared to AutoGluon, successfully training on all 20 tasks, whereas AutoGluon encountered out-of-memory errors on 3 of those tasks.
134
-
135
- The results can be reproduced using the automlbenchmark fork [here](https://github.com/deadsoul44/automlbenchmark).
136
-
137
-
138
-
139
- ## Installation
140
-
141
- The package can be installed directly from [pypi](https://pypi.org/project/perpetual):
142
-
143
- ```shell
144
- pip install perpetual
145
- ```
146
-
147
- Using [conda-forge](https://anaconda.org/conda-forge/perpetual):
148
-
149
- ```shell
150
- conda install conda-forge::perpetual
151
- ```
152
-
153
- To use in a Rust project and to get the package from [crates.io](https://crates.io/crates/perpetual):
154
-
155
- ```shell
156
- cargo add perpetual
157
- ```
158
-
159
- ## Contribution
160
-
161
- Contributions are welcome. Check CONTRIBUTING.md for the guideline.
162
-
163
- ## Paper
164
-
165
- PerpetualBooster prevents overfitting with a generalization algorithm. The paper is work-in-progress to explain how the algorithm works. Check our [blog post](https://perpetual-ml.com/blog/how-perpetual-works) for a high level introduction to the algorithm.
166
-
@@ -1,11 +0,0 @@
1
- perpetual-0.9.5.dist-info/METADATA,sha256=Sn-DvsBa-8pKP2NRhJMlVvh_fq0aeF19nI7NgsW7C6s,10724
2
- perpetual-0.9.5.dist-info/WHEEL,sha256=QKV4Sl7MSpV78xozy9-tp6UITYpirrRleZnt0vKJJXI,96
3
- perpetual-0.9.5.dist-info/licenses/LICENSE,sha256=gcuuhKKc5-dwvyvHsXjlC9oM6N5gZ6umYbC8ewW1Yvg,35821
4
- perpetual/__init__.py,sha256=V0RhghaG0CuKxKrzYUBYqrf7Drb-gjmznsbz9KT12lk,122
5
- perpetual/booster.py,sha256=vyZxchCqvPV79At-yoOVMLvCGdv8xISk2wq_Yu90DrI,50929
6
- perpetual/data.py,sha256=vhjWEc_ESYWoaczz0GkUPtfS0iRSKdVZSrCkQn8yLPw,630
7
- perpetual/perpetual.cp310-win_amd64.pyd,sha256=djVhR2-4tl_7IPmbIr8xYkYVo5mNXFI1jzBAHtLHoWo,1723392
8
- perpetual/serialize.py,sha256=FeW4JsUFVsrft9N7gz-ebn5mXvDv4LiJC2sgBEeGxYo,1957
9
- perpetual/types.py,sha256=idZNsDErNTur_rJ_5Co8Pb6fik-AUn9lkrXmjbQJVX0,3381
10
- perpetual/utils.py,sha256=2ifo-9OXaeZBevSo0HKN4uKVy5qT4LqRAchrtZa9yMM,7486
11
- perpetual-0.9.5.dist-info/RECORD,,