perpetual 0.7.12__cp313-none-win_amd64.whl → 0.8.0__cp313-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of perpetual might be problematic. Click here for more details.

perpetual/booster.py CHANGED
@@ -26,18 +26,20 @@ class PerpetualBooster:
26
26
  # this is useful for parameters that should be
27
27
  # attempted to be loaded in and set
28
28
  # as attributes on the booster after it is loaded.
29
- meta_data_attributes: Dict[str, BaseSerializer] = {
29
+ metadata_attributes: Dict[str, BaseSerializer] = {
30
30
  "feature_names_in_": ObjectSerializer(),
31
31
  "n_features_": ObjectSerializer(),
32
32
  "feature_importance_method": ObjectSerializer(),
33
33
  "cat_mapping": ObjectSerializer(),
34
34
  "classes_": ObjectSerializer(),
35
+ # "categorical_features": ObjectSerializer(),
35
36
  }
36
37
 
37
38
  def __init__(
38
39
  self,
39
40
  *,
40
41
  objective: str = "LogLoss",
42
+ budget: float = 0.5,
41
43
  num_threads: Optional[int] = None,
42
44
  monotone_constraints: Union[Dict[Any, int], None] = None,
43
45
  force_children_to_bound_parent: bool = False,
@@ -48,8 +50,7 @@ class PerpetualBooster:
48
50
  missing_node_treatment: str = "None",
49
51
  log_iterations: int = 0,
50
52
  feature_importance_method: str = "Gain",
51
- budget: Optional[float] = None,
52
- alpha: Optional[float] = None,
53
+ quantile: Optional[float] = None,
53
54
  reset: Optional[bool] = None,
54
55
  categorical_features: Union[Iterable[int], Iterable[str], str, None] = "auto",
55
56
  timeout: Optional[float] = None,
@@ -59,16 +60,17 @@ class PerpetualBooster:
59
60
  max_bin: int = 256,
60
61
  max_cat: int = 1000,
61
62
  ):
62
- """PerpetualBooster class, used to generate gradient boosted decision tree ensembles.
63
- The following parameters can also be specified in the fit method to override the values in the constructor:
64
- budget, alpha, reset, categorical_features, timeout, iteration_limit, memory_limit, and stopping_rounds.
63
+ """PerpetualBooster class, used to create gradient boosted decision tree ensembles.
65
64
 
66
65
  Args:
67
- objective (str, optional): Learning objective function to be used for optimization.
68
- Valid options include "LogLoss" to use logistic loss (classification),
66
+ objective (str, optional): Learning objective function to be used for optimization. Valid options are:
67
+ "LogLoss" to use logistic loss (classification),
69
68
  "SquaredLoss" to use squared error (regression),
70
69
  "QuantileLoss" to use quantile error (regression).
71
70
  Defaults to "LogLoss".
71
+ budget (float, optional): a positive number for fitting budget. Increasing this number will more
72
+ likely result in more boosting rounds and more increased predictive power.
73
+ Default value is 0.5.
72
74
  num_threads (int, optional): Number of threads to be used during training.
73
75
  monotone_constraints (Dict[Any, int], optional): Constraints that are used to enforce a
74
76
  specific relationship between the training features and the target variable. A dictionary
@@ -105,10 +107,7 @@ class PerpetualBooster:
105
107
  - "AverageNodeWeight": Set the missing node to be equal to the weighted average weight of the left and the right nodes.
106
108
  log_iterations (int, optional): Setting to a value (N) other than zero will result in information being logged about ever N iterations, info can be interacted with directly with the python [`logging`](https://docs.python.org/3/howto/logging.html) module. For an example of how to utilize the logging information see the example [here](/#logging-output).
107
109
  feature_importance_method (str, optional): The feature importance method type that will be used to calculate the `feature_importances_` attribute on the booster.
108
- budget (float, optional): a positive number for fitting budget. Increasing this number will more
109
- likely result in more boosting rounds and more increased predictive power.
110
- Default value is 1.0.
111
- alpha (float, optional): only used in quantile regression.
110
+ quantile (float, optional): only used in quantile regression.
112
111
  reset (bool, optional): whether to reset the model or continue training.
113
112
  categorical_features (Union[Iterable[int], Iterable[str], str, None], optional): The names or indices for categorical features.
114
113
  Defaults to `auto` for Polars or Pandas categorical data types.
@@ -166,6 +165,7 @@ class PerpetualBooster:
166
165
  )
167
166
 
168
167
  self.objective = objective
168
+ self.budget = budget
169
169
  self.num_threads = num_threads
170
170
  self.monotone_constraints = monotone_constraints_
171
171
  self.force_children_to_bound_parent = force_children_to_bound_parent
@@ -176,8 +176,7 @@ class PerpetualBooster:
176
176
  self.missing_node_treatment = missing_node_treatment
177
177
  self.log_iterations = log_iterations
178
178
  self.feature_importance_method = feature_importance_method
179
- self.budget = budget
180
- self.alpha = alpha
179
+ self.quantile = quantile
181
180
  self.reset = reset
182
181
  self.categorical_features = categorical_features
183
182
  self.timeout = timeout
@@ -189,6 +188,7 @@ class PerpetualBooster:
189
188
 
190
189
  booster = CratePerpetualBooster(
191
190
  objective=self.objective,
191
+ budget=self.budget,
192
192
  max_bin=self.max_bin,
193
193
  num_threads=self.num_threads,
194
194
  monotone_constraints=dict(),
@@ -199,23 +199,17 @@ class PerpetualBooster:
199
199
  terminate_missing_features=set(),
200
200
  missing_node_treatment=self.missing_node_treatment,
201
201
  log_iterations=self.log_iterations,
202
+ quantile=self.quantile,
203
+ reset=self.reset,
204
+ categorical_features=set(),
205
+ timeout=self.timeout,
206
+ iteration_limit=self.iteration_limit,
207
+ memory_limit=self.memory_limit,
208
+ stopping_rounds=self.stopping_rounds,
202
209
  )
203
210
  self.booster = cast(BoosterType, booster)
204
211
 
205
- def fit(
206
- self,
207
- X,
208
- y,
209
- sample_weight=None,
210
- budget: Optional[float] = None,
211
- alpha: Optional[float] = None,
212
- reset: Optional[bool] = None,
213
- categorical_features: Union[Iterable[int], Iterable[str], str, None] = "auto",
214
- timeout: Optional[float] = None,
215
- iteration_limit: Optional[int] = None,
216
- memory_limit: Optional[float] = None,
217
- stopping_rounds: Optional[int] = None,
218
- ) -> Self:
212
+ def fit(self, X, y, sample_weight=None) -> Self:
219
213
  """Fit the gradient booster on a provided dataset.
220
214
 
221
215
  Args:
@@ -225,26 +219,10 @@ class PerpetualBooster:
225
219
  sample_weight (Union[ArrayLike, None], optional): Instance weights to use when
226
220
  training the model. If None is passed, a weight of 1 will be used for every record.
227
221
  Defaults to None.
228
- budget (float, optional): a positive number for fitting budget. Increasing this number will more
229
- likely result in more boosting rounds and more increased predictive power.
230
- Defaults to 1.0.
231
- alpha (float, optional): only used in quantile regression.
232
- reset (bool, optional): whether to reset the model or continue training.
233
- categorical_features (Union[Iterable[int], Iterable[str], str, None], optional): The names or indices for categorical features.
234
- Defaults to `auto` for Polars or Pandas categorical data types.
235
- timeout (float, optional): optional fit timeout in seconds
236
- iteration_limit (int, optional): optional limit for the number of boosting rounds. The default value is 1000 boosting rounds.
237
- The algorithm automatically stops for most of the cases before hitting this limit.
238
- If you want to experiment with very high budget (>2.0), you can also increase this limit.
239
- memory_limit (float, optional): optional limit for memory allocation in GB. If not set, the memory will be allocated based on
240
- available memory and the algorithm requirements.
241
- stopping_rounds (int, optional): optional limit for auto stopping. Defaults to 3.
242
222
  """
243
223
 
244
224
  features_, flat_data, rows, cols, categorical_features_, cat_mapping = (
245
- convert_input_frame(
246
- X, categorical_features or self.categorical_features, self.max_cat
247
- )
225
+ convert_input_frame(X, self.categorical_features, self.max_cat)
248
226
  )
249
227
  self.n_features_ = cols
250
228
  self.cat_mapping = cat_mapping
@@ -268,6 +246,7 @@ class PerpetualBooster:
268
246
  ):
269
247
  booster = CratePerpetualBooster(
270
248
  objective=self.objective,
249
+ budget=self.budget,
271
250
  max_bin=self.max_bin,
272
251
  num_threads=self.num_threads,
273
252
  monotone_constraints=crate_mc,
@@ -278,12 +257,20 @@ class PerpetualBooster:
278
257
  terminate_missing_features=crate_tmf,
279
258
  missing_node_treatment=self.missing_node_treatment,
280
259
  log_iterations=self.log_iterations,
260
+ quantile=self.quantile,
261
+ reset=self.reset,
262
+ categorical_features=categorical_features_,
263
+ timeout=self.timeout,
264
+ iteration_limit=self.iteration_limit,
265
+ memory_limit=self.memory_limit,
266
+ stopping_rounds=self.stopping_rounds,
281
267
  )
282
268
  self.booster = cast(BoosterType, booster)
283
269
  else:
284
270
  booster = CrateMultiOutputBooster(
285
271
  n_boosters=len(classes_),
286
272
  objective=self.objective,
273
+ budget=self.budget,
287
274
  max_bin=self.max_bin,
288
275
  num_threads=self.num_threads,
289
276
  monotone_constraints=crate_mc,
@@ -294,6 +281,13 @@ class PerpetualBooster:
294
281
  terminate_missing_features=crate_tmf,
295
282
  missing_node_treatment=self.missing_node_treatment,
296
283
  log_iterations=self.log_iterations,
284
+ quantile=self.quantile,
285
+ reset=self.reset,
286
+ categorical_features=categorical_features_,
287
+ timeout=self.timeout,
288
+ iteration_limit=self.iteration_limit,
289
+ memory_limit=self.memory_limit,
290
+ stopping_rounds=self.stopping_rounds,
297
291
  )
298
292
  self.booster = cast(MultiOutputBoosterType, booster)
299
293
 
@@ -305,20 +299,97 @@ class PerpetualBooster:
305
299
  )
306
300
  self._set_metadata_attributes("classes_", self.classes_)
307
301
 
302
+ self.categorical_features = categorical_features_
303
+
308
304
  self.booster.fit(
309
305
  flat_data=flat_data,
310
306
  rows=rows,
311
307
  cols=cols,
312
308
  y=y_,
313
- budget=budget or self.budget,
314
309
  sample_weight=sample_weight_, # type: ignore
315
- alpha=alpha or self.alpha,
316
- reset=reset or self.reset,
317
- categorical_features=categorical_features_, # type: ignore
318
- timeout=timeout or self.timeout,
319
- iteration_limit=iteration_limit or self.iteration_limit,
320
- memory_limit=memory_limit or self.memory_limit,
321
- stopping_rounds=stopping_rounds or self.stopping_rounds,
310
+ )
311
+
312
+ return self
313
+
314
+ def prune(self, X, y, sample_weight=None) -> Self:
315
+ """Prune the gradient booster on a provided dataset.
316
+
317
+ Args:
318
+ X (FrameLike): Either a Polars or Pandas DataFrame, or a 2 dimensional Numpy array.
319
+ y (Union[FrameLike, ArrayLike]): Either a Polars or Pandas DataFrame or Series,
320
+ or a 1 or 2 dimensional Numpy array.
321
+ sample_weight (Union[ArrayLike, None], optional): Instance weights to use when
322
+ training the model. If None is passed, a weight of 1 will be used for every record.
323
+ Defaults to None.
324
+ """
325
+
326
+ _, flat_data, rows, cols = transform_input_frame(X, self.cat_mapping)
327
+
328
+ y_, _ = convert_input_array(y, self.objective)
329
+
330
+ if sample_weight is None:
331
+ sample_weight_ = None
332
+ else:
333
+ sample_weight_, _ = convert_input_array(sample_weight, self.objective)
334
+
335
+ self.booster.prune(
336
+ flat_data=flat_data,
337
+ rows=rows,
338
+ cols=cols,
339
+ y=y_,
340
+ sample_weight=sample_weight_, # type: ignore
341
+ )
342
+
343
+ return self
344
+
345
+ def calibrate(
346
+ self, X_train, y_train, X_cal, y_cal, alpha, sample_weight=None
347
+ ) -> Self:
348
+ """Calibrate the gradient booster on a provided dataset.
349
+
350
+ Args:
351
+ X_train (FrameLike): Either a Polars or Pandas DataFrame, or a 2 dimensional Numpy array.
352
+ y_train (Union[FrameLike, ArrayLike]): Either a Polars or Pandas DataFrame or Series,
353
+ or a 1 or 2 dimensional Numpy array.
354
+ X_cal (FrameLike): Either a Polars or Pandas DataFrame, or a 2 dimensional Numpy array.
355
+ y_cal (Union[FrameLike, ArrayLike]): Either a Polars or Pandas DataFrame or Series,
356
+ or a 1 or 2 dimensional Numpy array.
357
+ alpha (ArrayLike): Between 0 and 1, represents the uncertainty of the confidence interval.
358
+ Lower alpha produce larger (more conservative) prediction intervals.
359
+ alpha is the complement of the target coverage level.
360
+ sample_weight (Union[ArrayLike, None], optional): Instance weights to use when
361
+ training the model. If None is passed, a weight of 1 will be used for every record.
362
+ Defaults to None.
363
+ """
364
+
365
+ _, flat_data_train, rows_train, cols_train = transform_input_frame(
366
+ X_train, self.cat_mapping
367
+ )
368
+
369
+ y_train_, _ = convert_input_array(y_train, self.objective)
370
+
371
+ _, flat_data_cal, rows_cal, cols_cal = transform_input_frame(
372
+ X_cal, self.cat_mapping
373
+ )
374
+
375
+ y_cal_, _ = convert_input_array(y_cal, self.objective)
376
+
377
+ if sample_weight is None:
378
+ sample_weight_ = None
379
+ else:
380
+ sample_weight_, _ = convert_input_array(sample_weight, self.objective)
381
+
382
+ self.booster.calibrate(
383
+ flat_data=flat_data_train,
384
+ rows=rows_train,
385
+ cols=cols_train,
386
+ y=y_train_,
387
+ flat_data_cal=flat_data_cal,
388
+ rows_cal=rows_cal,
389
+ cols_cal=cols_cal,
390
+ y_cal=y_cal_,
391
+ alpha=np.array(alpha),
392
+ sample_weight=sample_weight_, # type: ignore
322
393
  )
323
394
 
324
395
  return self
@@ -331,6 +402,29 @@ class PerpetualBooster:
331
402
  f"Columns mismatch between data {features} passed, and data {self.feature_names_in_} used at fit."
332
403
  )
333
404
 
405
+ def predict_intervals(self, X, parallel: Union[bool, None] = None) -> dict:
406
+ """Predict intervals with the fitted booster on new data.
407
+
408
+ Args:
409
+ X (FrameLike): Either a Polars or Pandas DataFrame, or a 2 dimensional Numpy array.
410
+ parallel (Union[bool, None], optional): Optionally specify if the predict
411
+ function should run in parallel on multiple threads. If `None` is
412
+ passed, the `parallel` attribute of the booster will be used.
413
+ Defaults to `None`.
414
+
415
+ Returns:
416
+ np.ndarray: Returns a numpy array of the predictions.
417
+ """
418
+ features_, flat_data, rows, cols = transform_input_frame(X, self.cat_mapping)
419
+ self._validate_features(features_)
420
+
421
+ return self.booster.predict_intervals(
422
+ flat_data=flat_data,
423
+ rows=rows,
424
+ cols=cols,
425
+ parallel=parallel,
426
+ )
427
+
334
428
  def predict(self, X, parallel: Union[bool, None] = None) -> np.ndarray:
335
429
  """Predict with the fitted booster on new data.
336
430
 
@@ -706,7 +800,7 @@ class PerpetualBooster:
706
800
  warnings.simplefilter("ignore")
707
801
  c = cls(**params)
708
802
  c.booster = booster
709
- for m in c.meta_data_attributes:
803
+ for m in c.metadata_attributes:
710
804
  try:
711
805
  m_ = c._get_metadata_attributes(m)
712
806
  setattr(c, m, m_)
@@ -774,12 +868,12 @@ class PerpetualBooster:
774
868
  return v
775
869
 
776
870
  def _set_metadata_attributes(self, key: str, value: Any) -> None:
777
- value_ = self.meta_data_attributes[key].serialize(value)
871
+ value_ = self.metadata_attributes[key].serialize(value)
778
872
  self.insert_metadata(key=key, value=value_)
779
873
 
780
874
  def _get_metadata_attributes(self, key: str) -> Any:
781
875
  value = self.get_metadata(key)
782
- return self.meta_data_attributes[key].deserialize(value)
876
+ return self.metadata_attributes[key].deserialize(value)
783
877
 
784
878
  @property
785
879
  def base_score(self) -> Union[float, Iterable[float]]:
Binary file
perpetual/utils.py CHANGED
@@ -65,7 +65,9 @@ def convert_input_array(x, objective) -> np.ndarray:
65
65
 
66
66
 
67
67
  def convert_input_frame(
68
- X, categorical_features, max_cat
68
+ X,
69
+ categorical_features,
70
+ max_cat,
69
71
  ) -> Tuple[List[str], np.ndarray, int, int, Optional[Iterable[int]], Optional[Dict]]:
70
72
  """Convert data to format needed by booster.
71
73
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: perpetual
3
- Version: 0.7.12
3
+ Version: 0.8.0
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.9
@@ -10,6 +10,7 @@ Classifier: Programming Language :: Python :: 3.12
10
10
  Classifier: Programming Language :: Python :: 3.13
11
11
  Requires-Dist: numpy
12
12
  Requires-Dist: typing-extensions
13
+ Requires-Dist: black ; extra == 'dev'
13
14
  Requires-Dist: pandas ; extra == 'dev'
14
15
  Requires-Dist: polars ; extra == 'dev'
15
16
  Requires-Dist: pyarrow ; extra == 'dev'
@@ -24,7 +25,7 @@ Requires-Dist: ruff ; extra == 'dev'
24
25
  Provides-Extra: dev
25
26
  License-File: LICENSE
26
27
  License-File: LICENSE
27
- Summary: A self-generalizing gradient boosting machine which doesn't need hyperparameter optimization
28
+ Summary: A self-generalizing gradient boosting machine that doesn't need hyperparameter optimization
28
29
  Keywords: rust,perpetual,machine learning,tree model,decision tree,gradient boosted decision tree,gradient boosting machine
29
30
  Home-Page: https://perpetual-ml.com
30
31
  Author: Mutlu Simsek
@@ -49,10 +50,42 @@ Project-URL: Source Code, https://github.com/perpetual-ml/perpetual
49
50
 
50
51
  # Perpetual
51
52
 
52
- PerpetualBooster is a gradient boosting machine (GBM) algorithm which doesn't need hyperparameter optimization unlike other GBM algorithms. Similar to AutoML libraries, it has a `budget` parameter. Increasing the `budget` parameter increases the predictive power of the algorithm and gives better results on unseen data. Start with a small budget (e.g. 1.0) and increase it (e.g. 2.0) once you are confident with your features. If you don't see any improvement with further increasing the `budget`, it means that you are already extracting the most predictive power out of your data.
53
+ PerpetualBooster is a gradient boosting machine (GBM) algorithm that doesn't need hyperparameter optimization unlike other GBM algorithms. Similar to AutoML libraries, it has a `budget` parameter. Increasing the `budget` parameter increases the predictive power of the algorithm and gives better results on unseen data. Start with a small budget (e.g. 0.5) and increase it (e.g. 1.0) once you are confident with your features. If you don't see any improvement with further increasing the `budget`, it means that you are already extracting the most predictive power out of your data.
54
+
55
+ ## Usage
56
+
57
+ You can use the algorithm like in the example below. Check examples folders for both Rust and Python.
58
+
59
+ ```python
60
+ from perpetual import PerpetualBooster
61
+
62
+ model = PerpetualBooster(objective="SquaredLoss")
63
+ model.fit(X, y, budget=1.0)
64
+ ```
65
+
66
+ ## Documentation
67
+
68
+ Documentation for the Python API can be found [here](https://perpetual-ml.github.io/perpetual) and for the Rust API [here](https://docs.rs/perpetual/latest/perpetual/).
69
+
70
+ ## Usage
71
+
72
+ You can use the algorithm like in the example below. Check examples folders for both Rust and Python.
73
+
74
+ ```python
75
+ from perpetual import PerpetualBooster
76
+
77
+ model = PerpetualBooster(objective="SquaredLoss")
78
+ model.fit(X, y, budget=1.0)
79
+ ```
80
+
81
+ ## Documentation
82
+
83
+ Documentation for the Python API can be found [here](https://perpetual-ml.github.io/perpetual) and for the Rust API [here](https://docs.rs/perpetual/latest/perpetual/).
53
84
 
54
85
  ## Benchmark
55
86
 
87
+ ### PerpetualBooster vs. Optuna + LightGBM
88
+
56
89
  Hyperparameter optimization usually takes 100 iterations with plain GBM algorithms. PerpetualBooster achieves the same accuracy in a single run. Thus, it achieves up to 100x speed-up at the same accuracy with different `budget` levels and with different datasets.
57
90
 
58
91
  The following table summarizes the results for the [California Housing](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html) dataset (regression):
@@ -71,38 +104,51 @@ The following table summarizes the results for the [Cover Types](https://scikit-
71
104
 
72
105
  The results can be reproduced using the scripts in the [examples](./python-package/examples) folder.
73
106
 
74
- PerpetualBooster is a GBM but behaves like AutoML so it is benchmarked also against AutoGluon (v1.2, best quality preset), the current leader in [AutoML benchmark](https://automlbenchmark.streamlit.app/cd_diagram). Top 10 datasets with the most number of rows are selected from [OpenML datasets](https://www.openml.org/). The results are summarized in the following table for regression tasks:
107
+ ### PerpetualBooster vs. AutoGluon
75
108
 
76
- | OpenML Task | Perpetual Training Duration | Perpetual Inference Duration | Perpetual RMSE | AutoGluon Training Duration | AutoGluon Inference Duration | AutoGluon RMSE |
77
- | -------------------------------------------- | --------------------------- | ----------------------------------------------------------------- | -------------- | --------------------------- | ----------------------------------------------------------------- | -------------- |
78
- | [Airlines_DepDelay_10M](https://www.openml.org/t/359929) | 518 | 11.3 | 29.0 | 520 | 30.9 <td style="background-color:green;color:white;"> 28.8 </td> |
79
- | [bates_regr_100](https://www.openml.org/t/361940) | 3421 | 15.1 <td style="background-color:green;color:white;"> 1.084 </td> | OOM | OOM | OOM |
80
- | [BNG(libras_move)](https://www.openml.org/t/7327) | 1956 | 4.2 <td style="background-color:green;color:white;"> 2.51 </td> | 1922 | 97.6 | 2.53 |
81
- | [BNG(satellite_image)](https://www.openml.org/t/7326) | 334 | 1.6 | 0.731 | 337 | 10.0 <td style="background-color:green;color:white;"> 0.721 </td> |
82
- | [COMET_MC](https://www.openml.org/t/14949) | 44 | 1.0 <td style="background-color:green;color:white;"> 0.0615 </td> | 47 | 5.0 | 0.0662 |
83
- | [friedman1](https://www.openml.org/t/361939) | 275 | 4.2 <td style="background-color:green;color:white;"> 1.047 </td> | 278 | 5.1 | 1.487 |
84
- | [poker](https://www.openml.org/t/10102) | 38 | 0.6 <td style="background-color:green;color:white;"> 0.256 </td> | 41 | 1.2 | 0.722 |
85
- | [subset_higgs](https://www.openml.org/t/361955) | 868 | 10.6 <td style="background-color:green;color:white;"> 0.420 </td> | 870 | 24.5 | 0.421 |
86
- | [BNG(autoHorse)](https://www.openml.org/t/7319) | 107 | 1.1 <td style="background-color:green;color:white;"> 19.0 </td> | 107 | 3.2 | 20.5 |
87
- | [BNG(pbc)](https://www.openml.org/t/7318) | 48 | 0.6 <td style="background-color:green;color:white;"> 836.5 </td> | 51 | 0.2 | 957.1 |
88
- | average | 465 | 3.9 | - | 464 | 19.7 | - |
109
+ PerpetualBooster is a GBM but behaves like AutoML so it is benchmarked also against AutoGluon (v1.2, best quality preset), the current leader in [AutoML benchmark](https://automlbenchmark.streamlit.app/cd_diagram). Top 10 datasets with the most number of rows are selected from [OpenML datasets](https://www.openml.org/) for both regression and classification tasks.
89
110
 
90
- PerpetualBooster outperformed AutoGluon on 8 out of 10 datasets, training equally fast and inferring 5x faster. The results can be reproduced using the automlbenchmark fork [here](https://github.com/deadsoul44/automlbenchmark).
111
+ The results are summarized in the following table for regression tasks:
91
112
 
92
- ## Usage
113
+ | OpenML Task | Perpetual Training Duration | Perpetual Inference Duration | Perpetual RMSE | AutoGluon Training Duration | AutoGluon Inference Duration | AutoGluon RMSE |
114
+ | -------------------------------------------------------- | ----- | ----- | ------------------- | -------- | ------ | ------------------ |
115
+ | [Airlines_DepDelay_10M](https://www.openml.org/t/359929) | 518 | 11.3 | 29.0 | 520 | 30.9 | <ins> 28.8 </ins> |
116
+ | [bates_regr_100](https://www.openml.org/t/361940) | 3421 | 15.1 | <ins> 1.084 </ins> | OOM | OOM | OOM |
117
+ | [BNG(libras_move)](https://www.openml.org/t/7327) | 1956 | 4.2 | <ins> 2.51 </ins> | 1922 | 97.6 | 2.53 |
118
+ | [BNG(satellite_image)](https://www.openml.org/t/7326) | 334 | 1.6 | 0.731 | 337 | 10.0 | <ins> 0.721 </ins> |
119
+ | [COMET_MC](https://www.openml.org/t/14949) | 44 | 1.0 | <ins> 0.0615 </ins> | 47 | 5.0 | 0.0662 |
120
+ | [friedman1](https://www.openml.org/t/361939) | 275 | 4.2 | <ins> 1.047 </ins> | 278 | 5.1 | 1.487 |
121
+ | [poker](https://www.openml.org/t/10102) | 38 | 0.6 | <ins> 0.256 </ins> | 41 | 1.2 | 0.722 |
122
+ | [subset_higgs](https://www.openml.org/t/361955) | 868 | 10.6 | <ins> 0.420 </ins> | 870 | 24.5 | 0.421 |
123
+ | [BNG(autoHorse)](https://www.openml.org/t/7319) | 107 | 1.1 | <ins> 19.0 </ins> | 107 | 3.2 | 20.5 |
124
+ | [BNG(pbc)](https://www.openml.org/t/7318) | 48 | 0.6 | <ins> 836.5 </ins> | 51 | 0.2 | 957.1 |
125
+ | average | 465 | 3.9 | - | 464 | 19.7 | - |
93
126
 
94
- You can use the algorithm like in the example below. Check examples folders for both Rust and Python.
127
+ PerpetualBooster outperformed AutoGluon on 8 out of 10 regression tasks, training equally fast and inferring 5.1x faster.
95
128
 
96
- ```python
97
- from perpetual import PerpetualBooster
129
+ The results are summarized in the following table for classification tasks:
98
130
 
99
- model = PerpetualBooster(objective="SquaredLoss")
100
- model.fit(X, y, budget=1.0)
101
- ```
131
+ | OpenML Task | Perpetual Training Duration | Perpetual Inference Duration | Perpetual AUC | AutoGluon Training Duration | AutoGluon Inference Duration | AutoGluon AUC |
132
+ | -------------------------------------------------------- | ------- | ------ | ------------------- | -------- | ------ | ------------------ |
133
+ | [BNG(spambase)](https://www.openml.org/t/146163) | 70.1 | 2.1 | <ins> 0.671 </ins> | 73.1 | 3.7 | 0.669 |
134
+ | [BNG(trains)](https://www.openml.org/t/208) | 89.5 | 1.7 | <ins> 0.996 </ins> | 106.4 | 2.4 | 0.994 |
135
+ | [breast](https://www.openml.org/t/361942) | 13699.3 | 97.7 | <ins> 0.991 </ins> | 13330.7 | 79.7 | 0.949 |
136
+ | [Click_prediction_small](https://www.openml.org/t/7291) | 89.1 | 1.0 | <ins> 0.749 </ins> | 101.0 | 2.8 | 0.703 |
137
+ | [colon](https://www.openml.org/t/361938) | 12435.2 | 126.7 | <ins> 0.997 </ins> | 12356.2 | 152.3 | 0.997 |
138
+ | [Higgs](https://www.openml.org/t/362113) | 3485.3 | 40.9 | <ins> 0.843 </ins> | 3501.4 | 67.9 | 0.816 |
139
+ | [SEA(50000)](https://www.openml.org/t/230) | 21.9 | 0.2 | <ins> 0.936 </ins> | 25.6 | 0.5 | 0.935 |
140
+ | [sf-police-incidents](https://www.openml.org/t/359994) | 85.8 | 1.5 | <ins> 0.687 </ins> | 99.4 | 2.8 | 0.659 |
141
+ | [bates_classif_100](https://www.openml.org/t/361941) | 11152.8 | 50.0 | <ins> 0.864 </ins> | OOM | OOM | OOM |
142
+ | [prostate](https://www.openml.org/t/361945) | 13699.9 | 79.8 | <ins> 0.987 </ins> | OOM | OOM | OOM |
143
+ | average | 3747.0 | 34.0 | - | 3699.2 | 39.0 | - |
144
+
145
+ PerpetualBooster outperformed AutoGluon on 10 out of 10 classification tasks, training equally fast and inferring 1.1x faster.
146
+
147
+ PerpetualBooster demonstrates greater robustness compared to AutoGluon, successfully training on all 20 tasks, whereas AutoGluon encountered out-of-memory errors on 3 of those tasks.
148
+
149
+ The results can be reproduced using the automlbenchmark fork [here](https://github.com/deadsoul44/automlbenchmark).
102
150
 
103
- ## Documentation
104
151
 
105
- Documentation for the Python API can be found [here](https://perpetual-ml.github.io/perpetual) and for the Rust API [here](https://docs.rs/perpetual/latest/perpetual/).
106
152
 
107
153
  ## Installation
108
154
 
@@ -0,0 +1,12 @@
1
+ perpetual-0.8.0.dist-info/METADATA,sha256=ltP-CG0Mf7qq2jMoDAS3hhZBSPFJdbDQ3MGlFuPwczc,11199
2
+ perpetual-0.8.0.dist-info/WHEEL,sha256=iNzfSeughQ6gviCftXhu6zZQCMTOJAdqefPsfmeKgU8,95
3
+ perpetual-0.8.0.dist-info/license_files/LICENSE,sha256=gcuuhKKc5-dwvyvHsXjlC9oM6N5gZ6umYbC8ewW1Yvg,35821
4
+ perpetual-0.8.0.dist-info/license_files/LICENSE,sha256=gcuuhKKc5-dwvyvHsXjlC9oM6N5gZ6umYbC8ewW1Yvg,35821
5
+ perpetual/booster.py,sha256=HB0y3UNFEc0mL9FdmitFdZbPwUxrCN2-fqnCrN4XNrU,49886
6
+ perpetual/data.py,sha256=HiDsv2i1p9cLkXe8vnekxfpafyuxfWXwXrucdIir3xk,614
7
+ perpetual/serialize.py,sha256=FeW4JsUFVsrft9N7gz-ebn5mXvDv4LiJC2sgBEeGxYo,1957
8
+ perpetual/types.py,sha256=idZNsDErNTur_rJ_5Co8Pb6fik-AUn9lkrXmjbQJVX0,3381
9
+ perpetual/utils.py,sha256=nqwO6GFHi7I5iltuvgLT3NFaPm1h9cHlnomjFcdSfHY,7455
10
+ perpetual/__init__.py,sha256=V0RhghaG0CuKxKrzYUBYqrf7Drb-gjmznsbz9KT12lk,122
11
+ perpetual/perpetual.cp313-win_amd64.pyd,sha256=xxO3kFunCPmB-hBUqKys39EwmqcsNF2JfYDXzlrfjQw,1666560
12
+ perpetual-0.8.0.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- perpetual-0.7.12.dist-info/METADATA,sha256=Sfq0haXk0OttukvSaG6ARYPQipfeQ7ZIO_m7iaqkvys,10014
2
- perpetual-0.7.12.dist-info/WHEEL,sha256=iNzfSeughQ6gviCftXhu6zZQCMTOJAdqefPsfmeKgU8,95
3
- perpetual-0.7.12.dist-info/license_files/LICENSE,sha256=gcuuhKKc5-dwvyvHsXjlC9oM6N5gZ6umYbC8ewW1Yvg,35821
4
- perpetual-0.7.12.dist-info/license_files/LICENSE,sha256=gcuuhKKc5-dwvyvHsXjlC9oM6N5gZ6umYbC8ewW1Yvg,35821
5
- perpetual/booster.py,sha256=ICWJRuSxoaUgRHo9N8hodz1MlyRBVKPhVnfQJOes968,46919
6
- perpetual/data.py,sha256=HiDsv2i1p9cLkXe8vnekxfpafyuxfWXwXrucdIir3xk,614
7
- perpetual/serialize.py,sha256=FeW4JsUFVsrft9N7gz-ebn5mXvDv4LiJC2sgBEeGxYo,1957
8
- perpetual/types.py,sha256=idZNsDErNTur_rJ_5Co8Pb6fik-AUn9lkrXmjbQJVX0,3381
9
- perpetual/utils.py,sha256=i_7EB5xQXAGtODONhrOwfxRfH3YR7U0cQJvL8eUNFK8,7444
10
- perpetual/__init__.py,sha256=V0RhghaG0CuKxKrzYUBYqrf7Drb-gjmznsbz9KT12lk,122
11
- perpetual/perpetual.cp313-win_amd64.pyd,sha256=wB4UC94u1mcKNicec_h62WjjJJwp1WOerPjMAcKyELY,1509376
12
- perpetual-0.7.12.dist-info/RECORD,,