perpetual 0.7.12__cp313-none-win_amd64.whl → 0.8.1__cp313-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of perpetual might be problematic. Click here for more details.

perpetual/booster.py CHANGED
@@ -26,7 +26,7 @@ class PerpetualBooster:
26
26
  # this is useful for parameters that should be
27
27
  # attempted to be loaded in and set
28
28
  # as attributes on the booster after it is loaded.
29
- meta_data_attributes: Dict[str, BaseSerializer] = {
29
+ metadata_attributes: Dict[str, BaseSerializer] = {
30
30
  "feature_names_in_": ObjectSerializer(),
31
31
  "n_features_": ObjectSerializer(),
32
32
  "feature_importance_method": ObjectSerializer(),
@@ -38,6 +38,7 @@ class PerpetualBooster:
38
38
  self,
39
39
  *,
40
40
  objective: str = "LogLoss",
41
+ budget: float = 0.5,
41
42
  num_threads: Optional[int] = None,
42
43
  monotone_constraints: Union[Dict[Any, int], None] = None,
43
44
  force_children_to_bound_parent: bool = False,
@@ -48,8 +49,7 @@ class PerpetualBooster:
48
49
  missing_node_treatment: str = "None",
49
50
  log_iterations: int = 0,
50
51
  feature_importance_method: str = "Gain",
51
- budget: Optional[float] = None,
52
- alpha: Optional[float] = None,
52
+ quantile: Optional[float] = None,
53
53
  reset: Optional[bool] = None,
54
54
  categorical_features: Union[Iterable[int], Iterable[str], str, None] = "auto",
55
55
  timeout: Optional[float] = None,
@@ -59,16 +59,17 @@ class PerpetualBooster:
59
59
  max_bin: int = 256,
60
60
  max_cat: int = 1000,
61
61
  ):
62
- """PerpetualBooster class, used to generate gradient boosted decision tree ensembles.
63
- The following parameters can also be specified in the fit method to override the values in the constructor:
64
- budget, alpha, reset, categorical_features, timeout, iteration_limit, memory_limit, and stopping_rounds.
62
+ """PerpetualBooster class, used to create gradient boosted decision tree ensembles.
65
63
 
66
64
  Args:
67
- objective (str, optional): Learning objective function to be used for optimization.
68
- Valid options include "LogLoss" to use logistic loss (classification),
65
+ objective (str, optional): Learning objective function to be used for optimization. Valid options are:
66
+ "LogLoss" to use logistic loss (classification),
69
67
  "SquaredLoss" to use squared error (regression),
70
68
  "QuantileLoss" to use quantile error (regression).
71
69
  Defaults to "LogLoss".
70
+ budget (float, optional): a positive number for fitting budget. Increasing this number will more
71
+ likely result in more boosting rounds and more increased predictive power.
72
+ Default value is 0.5.
72
73
  num_threads (int, optional): Number of threads to be used during training.
73
74
  monotone_constraints (Dict[Any, int], optional): Constraints that are used to enforce a
74
75
  specific relationship between the training features and the target variable. A dictionary
@@ -105,10 +106,7 @@ class PerpetualBooster:
105
106
  - "AverageNodeWeight": Set the missing node to be equal to the weighted average weight of the left and the right nodes.
106
107
  log_iterations (int, optional): Setting to a value (N) other than zero will result in information being logged about ever N iterations, info can be interacted with directly with the python [`logging`](https://docs.python.org/3/howto/logging.html) module. For an example of how to utilize the logging information see the example [here](/#logging-output).
107
108
  feature_importance_method (str, optional): The feature importance method type that will be used to calculate the `feature_importances_` attribute on the booster.
108
- budget (float, optional): a positive number for fitting budget. Increasing this number will more
109
- likely result in more boosting rounds and more increased predictive power.
110
- Default value is 1.0.
111
- alpha (float, optional): only used in quantile regression.
109
+ quantile (float, optional): only used in quantile regression.
112
110
  reset (bool, optional): whether to reset the model or continue training.
113
111
  categorical_features (Union[Iterable[int], Iterable[str], str, None], optional): The names or indices for categorical features.
114
112
  Defaults to `auto` for Polars or Pandas categorical data types.
@@ -166,6 +164,7 @@ class PerpetualBooster:
166
164
  )
167
165
 
168
166
  self.objective = objective
167
+ self.budget = budget
169
168
  self.num_threads = num_threads
170
169
  self.monotone_constraints = monotone_constraints_
171
170
  self.force_children_to_bound_parent = force_children_to_bound_parent
@@ -176,8 +175,7 @@ class PerpetualBooster:
176
175
  self.missing_node_treatment = missing_node_treatment
177
176
  self.log_iterations = log_iterations
178
177
  self.feature_importance_method = feature_importance_method
179
- self.budget = budget
180
- self.alpha = alpha
178
+ self.quantile = quantile
181
179
  self.reset = reset
182
180
  self.categorical_features = categorical_features
183
181
  self.timeout = timeout
@@ -189,6 +187,7 @@ class PerpetualBooster:
189
187
 
190
188
  booster = CratePerpetualBooster(
191
189
  objective=self.objective,
190
+ budget=self.budget,
192
191
  max_bin=self.max_bin,
193
192
  num_threads=self.num_threads,
194
193
  monotone_constraints=dict(),
@@ -199,23 +198,17 @@ class PerpetualBooster:
199
198
  terminate_missing_features=set(),
200
199
  missing_node_treatment=self.missing_node_treatment,
201
200
  log_iterations=self.log_iterations,
201
+ quantile=self.quantile,
202
+ reset=self.reset,
203
+ categorical_features=set(),
204
+ timeout=self.timeout,
205
+ iteration_limit=self.iteration_limit,
206
+ memory_limit=self.memory_limit,
207
+ stopping_rounds=self.stopping_rounds,
202
208
  )
203
209
  self.booster = cast(BoosterType, booster)
204
210
 
205
- def fit(
206
- self,
207
- X,
208
- y,
209
- sample_weight=None,
210
- budget: Optional[float] = None,
211
- alpha: Optional[float] = None,
212
- reset: Optional[bool] = None,
213
- categorical_features: Union[Iterable[int], Iterable[str], str, None] = "auto",
214
- timeout: Optional[float] = None,
215
- iteration_limit: Optional[int] = None,
216
- memory_limit: Optional[float] = None,
217
- stopping_rounds: Optional[int] = None,
218
- ) -> Self:
211
+ def fit(self, X, y, sample_weight=None) -> Self:
219
212
  """Fit the gradient booster on a provided dataset.
220
213
 
221
214
  Args:
@@ -225,26 +218,10 @@ class PerpetualBooster:
225
218
  sample_weight (Union[ArrayLike, None], optional): Instance weights to use when
226
219
  training the model. If None is passed, a weight of 1 will be used for every record.
227
220
  Defaults to None.
228
- budget (float, optional): a positive number for fitting budget. Increasing this number will more
229
- likely result in more boosting rounds and more increased predictive power.
230
- Defaults to 1.0.
231
- alpha (float, optional): only used in quantile regression.
232
- reset (bool, optional): whether to reset the model or continue training.
233
- categorical_features (Union[Iterable[int], Iterable[str], str, None], optional): The names or indices for categorical features.
234
- Defaults to `auto` for Polars or Pandas categorical data types.
235
- timeout (float, optional): optional fit timeout in seconds
236
- iteration_limit (int, optional): optional limit for the number of boosting rounds. The default value is 1000 boosting rounds.
237
- The algorithm automatically stops for most of the cases before hitting this limit.
238
- If you want to experiment with very high budget (>2.0), you can also increase this limit.
239
- memory_limit (float, optional): optional limit for memory allocation in GB. If not set, the memory will be allocated based on
240
- available memory and the algorithm requirements.
241
- stopping_rounds (int, optional): optional limit for auto stopping. Defaults to 3.
242
221
  """
243
222
 
244
223
  features_, flat_data, rows, cols, categorical_features_, cat_mapping = (
245
- convert_input_frame(
246
- X, categorical_features or self.categorical_features, self.max_cat
247
- )
224
+ convert_input_frame(X, self.categorical_features, self.max_cat)
248
225
  )
249
226
  self.n_features_ = cols
250
227
  self.cat_mapping = cat_mapping
@@ -268,6 +245,7 @@ class PerpetualBooster:
268
245
  ):
269
246
  booster = CratePerpetualBooster(
270
247
  objective=self.objective,
248
+ budget=self.budget,
271
249
  max_bin=self.max_bin,
272
250
  num_threads=self.num_threads,
273
251
  monotone_constraints=crate_mc,
@@ -278,12 +256,20 @@ class PerpetualBooster:
278
256
  terminate_missing_features=crate_tmf,
279
257
  missing_node_treatment=self.missing_node_treatment,
280
258
  log_iterations=self.log_iterations,
259
+ quantile=self.quantile,
260
+ reset=self.reset,
261
+ categorical_features=categorical_features_,
262
+ timeout=self.timeout,
263
+ iteration_limit=self.iteration_limit,
264
+ memory_limit=self.memory_limit,
265
+ stopping_rounds=self.stopping_rounds,
281
266
  )
282
267
  self.booster = cast(BoosterType, booster)
283
268
  else:
284
269
  booster = CrateMultiOutputBooster(
285
270
  n_boosters=len(classes_),
286
271
  objective=self.objective,
272
+ budget=self.budget,
287
273
  max_bin=self.max_bin,
288
274
  num_threads=self.num_threads,
289
275
  monotone_constraints=crate_mc,
@@ -294,6 +280,13 @@ class PerpetualBooster:
294
280
  terminate_missing_features=crate_tmf,
295
281
  missing_node_treatment=self.missing_node_treatment,
296
282
  log_iterations=self.log_iterations,
283
+ quantile=self.quantile,
284
+ reset=self.reset,
285
+ categorical_features=categorical_features_,
286
+ timeout=self.timeout,
287
+ iteration_limit=self.iteration_limit,
288
+ memory_limit=self.memory_limit,
289
+ stopping_rounds=self.stopping_rounds,
297
290
  )
298
291
  self.booster = cast(MultiOutputBoosterType, booster)
299
292
 
@@ -305,20 +298,97 @@ class PerpetualBooster:
305
298
  )
306
299
  self._set_metadata_attributes("classes_", self.classes_)
307
300
 
301
+ self.categorical_features = categorical_features_
302
+
308
303
  self.booster.fit(
309
304
  flat_data=flat_data,
310
305
  rows=rows,
311
306
  cols=cols,
312
307
  y=y_,
313
- budget=budget or self.budget,
314
308
  sample_weight=sample_weight_, # type: ignore
315
- alpha=alpha or self.alpha,
316
- reset=reset or self.reset,
317
- categorical_features=categorical_features_, # type: ignore
318
- timeout=timeout or self.timeout,
319
- iteration_limit=iteration_limit or self.iteration_limit,
320
- memory_limit=memory_limit or self.memory_limit,
321
- stopping_rounds=stopping_rounds or self.stopping_rounds,
309
+ )
310
+
311
+ return self
312
+
313
+ def prune(self, X, y, sample_weight=None) -> Self:
314
+ """Prune the gradient booster on a provided dataset.
315
+
316
+ Args:
317
+ X (FrameLike): Either a Polars or Pandas DataFrame, or a 2 dimensional Numpy array.
318
+ y (Union[FrameLike, ArrayLike]): Either a Polars or Pandas DataFrame or Series,
319
+ or a 1 or 2 dimensional Numpy array.
320
+ sample_weight (Union[ArrayLike, None], optional): Instance weights to use when
321
+ training the model. If None is passed, a weight of 1 will be used for every record.
322
+ Defaults to None.
323
+ """
324
+
325
+ _, flat_data, rows, cols = transform_input_frame(X, self.cat_mapping)
326
+
327
+ y_, _ = convert_input_array(y, self.objective)
328
+
329
+ if sample_weight is None:
330
+ sample_weight_ = None
331
+ else:
332
+ sample_weight_, _ = convert_input_array(sample_weight, self.objective)
333
+
334
+ self.booster.prune(
335
+ flat_data=flat_data,
336
+ rows=rows,
337
+ cols=cols,
338
+ y=y_,
339
+ sample_weight=sample_weight_, # type: ignore
340
+ )
341
+
342
+ return self
343
+
344
+ def calibrate(
345
+ self, X_train, y_train, X_cal, y_cal, alpha, sample_weight=None
346
+ ) -> Self:
347
+ """Calibrate the gradient booster on a provided dataset.
348
+
349
+ Args:
350
+ X_train (FrameLike): Either a Polars or Pandas DataFrame, or a 2 dimensional Numpy array.
351
+ y_train (Union[FrameLike, ArrayLike]): Either a Polars or Pandas DataFrame or Series,
352
+ or a 1 or 2 dimensional Numpy array.
353
+ X_cal (FrameLike): Either a Polars or Pandas DataFrame, or a 2 dimensional Numpy array.
354
+ y_cal (Union[FrameLike, ArrayLike]): Either a Polars or Pandas DataFrame or Series,
355
+ or a 1 or 2 dimensional Numpy array.
356
+ alpha (ArrayLike): Between 0 and 1, represents the uncertainty of the confidence interval.
357
+ Lower alpha produce larger (more conservative) prediction intervals.
358
+ alpha is the complement of the target coverage level.
359
+ sample_weight (Union[ArrayLike, None], optional): Instance weights to use when
360
+ training the model. If None is passed, a weight of 1 will be used for every record.
361
+ Defaults to None.
362
+ """
363
+
364
+ _, flat_data_train, rows_train, cols_train = transform_input_frame(
365
+ X_train, self.cat_mapping
366
+ )
367
+
368
+ y_train_, _ = convert_input_array(y_train, self.objective)
369
+
370
+ _, flat_data_cal, rows_cal, cols_cal = transform_input_frame(
371
+ X_cal, self.cat_mapping
372
+ )
373
+
374
+ y_cal_, _ = convert_input_array(y_cal, self.objective)
375
+
376
+ if sample_weight is None:
377
+ sample_weight_ = None
378
+ else:
379
+ sample_weight_, _ = convert_input_array(sample_weight, self.objective)
380
+
381
+ self.booster.calibrate(
382
+ flat_data=flat_data_train,
383
+ rows=rows_train,
384
+ cols=cols_train,
385
+ y=y_train_,
386
+ flat_data_cal=flat_data_cal,
387
+ rows_cal=rows_cal,
388
+ cols_cal=cols_cal,
389
+ y_cal=y_cal_,
390
+ alpha=np.array(alpha),
391
+ sample_weight=sample_weight_, # type: ignore
322
392
  )
323
393
 
324
394
  return self
@@ -331,6 +401,29 @@ class PerpetualBooster:
331
401
  f"Columns mismatch between data {features} passed, and data {self.feature_names_in_} used at fit."
332
402
  )
333
403
 
404
+ def predict_intervals(self, X, parallel: Union[bool, None] = None) -> dict:
405
+ """Predict intervals with the fitted booster on new data.
406
+
407
+ Args:
408
+ X (FrameLike): Either a Polars or Pandas DataFrame, or a 2 dimensional Numpy array.
409
+ parallel (Union[bool, None], optional): Optionally specify if the predict
410
+ function should run in parallel on multiple threads. If `None` is
411
+ passed, the `parallel` attribute of the booster will be used.
412
+ Defaults to `None`.
413
+
414
+ Returns:
415
+ np.ndarray: Returns a numpy array of the predictions.
416
+ """
417
+ features_, flat_data, rows, cols = transform_input_frame(X, self.cat_mapping)
418
+ self._validate_features(features_)
419
+
420
+ return self.booster.predict_intervals(
421
+ flat_data=flat_data,
422
+ rows=rows,
423
+ cols=cols,
424
+ parallel=parallel,
425
+ )
426
+
334
427
  def predict(self, X, parallel: Union[bool, None] = None) -> np.ndarray:
335
428
  """Predict with the fitted booster on new data.
336
429
 
@@ -699,14 +792,17 @@ class PerpetualBooster:
699
792
  Returns:
700
793
  PerpetualBooster: An initialized booster object.
701
794
  """
702
- booster = CratePerpetualBooster.load_booster(str(path))
795
+ try:
796
+ booster = CratePerpetualBooster.load_booster(str(path))
797
+ except ValueError:
798
+ booster = CrateMultiOutputBooster.load_booster(str(path))
703
799
 
704
800
  params = booster.get_params()
705
801
  with warnings.catch_warnings():
706
802
  warnings.simplefilter("ignore")
707
803
  c = cls(**params)
708
804
  c.booster = booster
709
- for m in c.meta_data_attributes:
805
+ for m in c.metadata_attributes:
710
806
  try:
711
807
  m_ = c._get_metadata_attributes(m)
712
808
  setattr(c, m, m_)
@@ -774,12 +870,12 @@ class PerpetualBooster:
774
870
  return v
775
871
 
776
872
  def _set_metadata_attributes(self, key: str, value: Any) -> None:
777
- value_ = self.meta_data_attributes[key].serialize(value)
873
+ value_ = self.metadata_attributes[key].serialize(value)
778
874
  self.insert_metadata(key=key, value=value_)
779
875
 
780
876
  def _get_metadata_attributes(self, key: str) -> Any:
781
877
  value = self.get_metadata(key)
782
- return self.meta_data_attributes[key].deserialize(value)
878
+ return self.metadata_attributes[key].deserialize(value)
783
879
 
784
880
  @property
785
881
  def base_score(self) -> Union[float, Iterable[float]]:
@@ -810,7 +906,12 @@ class PerpetualBooster:
810
906
 
811
907
  def __setstate__(self, d: Dict[Any, Any]) -> None:
812
908
  # Load the booster object the pickled JSon string.
813
- booster_object = CratePerpetualBooster.from_json(d["__booster_json_file__"])
909
+ try:
910
+ booster_object = CratePerpetualBooster.from_json(d["__booster_json_file__"])
911
+ except ValueError:
912
+ booster_object = CrateMultiOutputBooster.from_json(
913
+ d["__booster_json_file__"]
914
+ )
814
915
  d["booster"] = booster_object
815
916
  # Are there any new parameters, that need to be added to the python object,
816
917
  # that would have been loaded in as defaults on the json object?
Binary file
perpetual/utils.py CHANGED
@@ -65,7 +65,9 @@ def convert_input_array(x, objective) -> np.ndarray:
65
65
 
66
66
 
67
67
  def convert_input_frame(
68
- X, categorical_features, max_cat
68
+ X,
69
+ categorical_features,
70
+ max_cat,
69
71
  ) -> Tuple[List[str], np.ndarray, int, int, Optional[Iterable[int]], Optional[Dict]]:
70
72
  """Convert data to format needed by booster.
71
73
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: perpetual
3
- Version: 0.7.12
3
+ Version: 0.8.1
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.9
@@ -10,6 +10,7 @@ Classifier: Programming Language :: Python :: 3.12
10
10
  Classifier: Programming Language :: Python :: 3.13
11
11
  Requires-Dist: numpy
12
12
  Requires-Dist: typing-extensions
13
+ Requires-Dist: black ; extra == 'dev'
13
14
  Requires-Dist: pandas ; extra == 'dev'
14
15
  Requires-Dist: polars ; extra == 'dev'
15
16
  Requires-Dist: pyarrow ; extra == 'dev'
@@ -24,7 +25,7 @@ Requires-Dist: ruff ; extra == 'dev'
24
25
  Provides-Extra: dev
25
26
  License-File: LICENSE
26
27
  License-File: LICENSE
27
- Summary: A self-generalizing gradient boosting machine which doesn't need hyperparameter optimization
28
+ Summary: A self-generalizing gradient boosting machine that doesn't need hyperparameter optimization
28
29
  Keywords: rust,perpetual,machine learning,tree model,decision tree,gradient boosted decision tree,gradient boosting machine
29
30
  Home-Page: https://perpetual-ml.com
30
31
  Author: Mutlu Simsek
@@ -49,10 +50,28 @@ Project-URL: Source Code, https://github.com/perpetual-ml/perpetual
49
50
 
50
51
  # Perpetual
51
52
 
52
- PerpetualBooster is a gradient boosting machine (GBM) algorithm which doesn't need hyperparameter optimization unlike other GBM algorithms. Similar to AutoML libraries, it has a `budget` parameter. Increasing the `budget` parameter increases the predictive power of the algorithm and gives better results on unseen data. Start with a small budget (e.g. 1.0) and increase it (e.g. 2.0) once you are confident with your features. If you don't see any improvement with further increasing the `budget`, it means that you are already extracting the most predictive power out of your data.
53
+ PerpetualBooster is a gradient boosting machine (GBM) algorithm that doesn't need hyperparameter optimization unlike other GBM algorithms. Similar to AutoML libraries, it has a `budget` parameter. Increasing the `budget` parameter increases the predictive power of the algorithm and gives better results on unseen data. Start with a small budget (e.g. 0.5) and increase it (e.g. 1.0) once you are confident with your features. If you don't see any improvement with further increasing the `budget`, it means that you are already extracting the most predictive power out of your data.
54
+
55
+ ## Usage
56
+
57
+ You can use the algorithm like in the example below. Check examples folders for both Rust and Python.
58
+
59
+ ```python
60
+ from perpetual import PerpetualBooster
61
+
62
+ model = PerpetualBooster(objective="SquaredLoss", budget=0.5)
63
+ model.fit(X, y)
64
+ ```
65
+
66
+ ## Documentation
67
+
68
+ Documentation for the Python API can be found [here](https://perpetual-ml.github.io/perpetual) and for the Rust API [here](https://docs.rs/perpetual/latest/perpetual/).
69
+
53
70
 
54
71
  ## Benchmark
55
72
 
73
+ ### PerpetualBooster vs. Optuna + LightGBM
74
+
56
75
  Hyperparameter optimization usually takes 100 iterations with plain GBM algorithms. PerpetualBooster achieves the same accuracy in a single run. Thus, it achieves up to 100x speed-up at the same accuracy with different `budget` levels and with different datasets.
57
76
 
58
77
  The following table summarizes the results for the [California Housing](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html) dataset (regression):
@@ -71,38 +90,51 @@ The following table summarizes the results for the [Cover Types](https://scikit-
71
90
 
72
91
  The results can be reproduced using the scripts in the [examples](./python-package/examples) folder.
73
92
 
74
- PerpetualBooster is a GBM but behaves like AutoML so it is benchmarked also against AutoGluon (v1.2, best quality preset), the current leader in [AutoML benchmark](https://automlbenchmark.streamlit.app/cd_diagram). Top 10 datasets with the most number of rows are selected from [OpenML datasets](https://www.openml.org/). The results are summarized in the following table for regression tasks:
93
+ ### PerpetualBooster vs. AutoGluon
75
94
 
76
- | OpenML Task | Perpetual Training Duration | Perpetual Inference Duration | Perpetual RMSE | AutoGluon Training Duration | AutoGluon Inference Duration | AutoGluon RMSE |
77
- | -------------------------------------------- | --------------------------- | ----------------------------------------------------------------- | -------------- | --------------------------- | ----------------------------------------------------------------- | -------------- |
78
- | [Airlines_DepDelay_10M](https://www.openml.org/t/359929) | 518 | 11.3 | 29.0 | 520 | 30.9 <td style="background-color:green;color:white;"> 28.8 </td> |
79
- | [bates_regr_100](https://www.openml.org/t/361940) | 3421 | 15.1 <td style="background-color:green;color:white;"> 1.084 </td> | OOM | OOM | OOM |
80
- | [BNG(libras_move)](https://www.openml.org/t/7327) | 1956 | 4.2 <td style="background-color:green;color:white;"> 2.51 </td> | 1922 | 97.6 | 2.53 |
81
- | [BNG(satellite_image)](https://www.openml.org/t/7326) | 334 | 1.6 | 0.731 | 337 | 10.0 <td style="background-color:green;color:white;"> 0.721 </td> |
82
- | [COMET_MC](https://www.openml.org/t/14949) | 44 | 1.0 <td style="background-color:green;color:white;"> 0.0615 </td> | 47 | 5.0 | 0.0662 |
83
- | [friedman1](https://www.openml.org/t/361939) | 275 | 4.2 <td style="background-color:green;color:white;"> 1.047 </td> | 278 | 5.1 | 1.487 |
84
- | [poker](https://www.openml.org/t/10102) | 38 | 0.6 <td style="background-color:green;color:white;"> 0.256 </td> | 41 | 1.2 | 0.722 |
85
- | [subset_higgs](https://www.openml.org/t/361955) | 868 | 10.6 <td style="background-color:green;color:white;"> 0.420 </td> | 870 | 24.5 | 0.421 |
86
- | [BNG(autoHorse)](https://www.openml.org/t/7319) | 107 | 1.1 <td style="background-color:green;color:white;"> 19.0 </td> | 107 | 3.2 | 20.5 |
87
- | [BNG(pbc)](https://www.openml.org/t/7318) | 48 | 0.6 <td style="background-color:green;color:white;"> 836.5 </td> | 51 | 0.2 | 957.1 |
88
- | average | 465 | 3.9 | - | 464 | 19.7 | - |
95
+ PerpetualBooster is a GBM but behaves like AutoML so it is benchmarked also against AutoGluon (v1.2, best quality preset), the current leader in [AutoML benchmark](https://automlbenchmark.streamlit.app/cd_diagram). Top 10 datasets with the most number of rows are selected from [OpenML datasets](https://www.openml.org/) for both regression and classification tasks.
89
96
 
90
- PerpetualBooster outperformed AutoGluon on 8 out of 10 datasets, training equally fast and inferring 5x faster. The results can be reproduced using the automlbenchmark fork [here](https://github.com/deadsoul44/automlbenchmark).
97
+ The results are summarized in the following table for regression tasks:
91
98
 
92
- ## Usage
99
+ | OpenML Task | Perpetual Training Duration | Perpetual Inference Duration | Perpetual RMSE | AutoGluon Training Duration | AutoGluon Inference Duration | AutoGluon RMSE |
100
+ | -------------------------------------------------------- | ----- | ----- | ------------------- | -------- | ------ | ------------------ |
101
+ | [Airlines_DepDelay_10M](https://www.openml.org/t/359929) | 518 | 11.3 | 29.0 | 520 | 30.9 | <ins> 28.8 </ins> |
102
+ | [bates_regr_100](https://www.openml.org/t/361940) | 3421 | 15.1 | <ins> 1.084 </ins> | OOM | OOM | OOM |
103
+ | [BNG(libras_move)](https://www.openml.org/t/7327) | 1956 | 4.2 | <ins> 2.51 </ins> | 1922 | 97.6 | 2.53 |
104
+ | [BNG(satellite_image)](https://www.openml.org/t/7326) | 334 | 1.6 | 0.731 | 337 | 10.0 | <ins> 0.721 </ins> |
105
+ | [COMET_MC](https://www.openml.org/t/14949) | 44 | 1.0 | <ins> 0.0615 </ins> | 47 | 5.0 | 0.0662 |
106
+ | [friedman1](https://www.openml.org/t/361939) | 275 | 4.2 | <ins> 1.047 </ins> | 278 | 5.1 | 1.487 |
107
+ | [poker](https://www.openml.org/t/10102) | 38 | 0.6 | <ins> 0.256 </ins> | 41 | 1.2 | 0.722 |
108
+ | [subset_higgs](https://www.openml.org/t/361955) | 868 | 10.6 | <ins> 0.420 </ins> | 870 | 24.5 | 0.421 |
109
+ | [BNG(autoHorse)](https://www.openml.org/t/7319) | 107 | 1.1 | <ins> 19.0 </ins> | 107 | 3.2 | 20.5 |
110
+ | [BNG(pbc)](https://www.openml.org/t/7318) | 48 | 0.6 | <ins> 836.5 </ins> | 51 | 0.2 | 957.1 |
111
+ | average | 465 | 3.9 | - | 464 | 19.7 | - |
93
112
 
94
- You can use the algorithm like in the example below. Check examples folders for both Rust and Python.
113
+ PerpetualBooster outperformed AutoGluon on 8 out of 10 regression tasks, training equally fast and inferring 5.1x faster.
95
114
 
96
- ```python
97
- from perpetual import PerpetualBooster
115
+ The results are summarized in the following table for classification tasks:
98
116
 
99
- model = PerpetualBooster(objective="SquaredLoss")
100
- model.fit(X, y, budget=1.0)
101
- ```
117
+ | OpenML Task | Perpetual Training Duration | Perpetual Inference Duration | Perpetual AUC | AutoGluon Training Duration | AutoGluon Inference Duration | AutoGluon AUC |
118
+ | -------------------------------------------------------- | ------- | ------ | ------------------- | -------- | ------ | ------------------ |
119
+ | [BNG(spambase)](https://www.openml.org/t/146163) | 70.1 | 2.1 | <ins> 0.671 </ins> | 73.1 | 3.7 | 0.669 |
120
+ | [BNG(trains)](https://www.openml.org/t/208) | 89.5 | 1.7 | <ins> 0.996 </ins> | 106.4 | 2.4 | 0.994 |
121
+ | [breast](https://www.openml.org/t/361942) | 13699.3 | 97.7 | <ins> 0.991 </ins> | 13330.7 | 79.7 | 0.949 |
122
+ | [Click_prediction_small](https://www.openml.org/t/7291) | 89.1 | 1.0 | <ins> 0.749 </ins> | 101.0 | 2.8 | 0.703 |
123
+ | [colon](https://www.openml.org/t/361938) | 12435.2 | 126.7 | <ins> 0.997 </ins> | 12356.2 | 152.3 | 0.997 |
124
+ | [Higgs](https://www.openml.org/t/362113) | 3485.3 | 40.9 | <ins> 0.843 </ins> | 3501.4 | 67.9 | 0.816 |
125
+ | [SEA(50000)](https://www.openml.org/t/230) | 21.9 | 0.2 | <ins> 0.936 </ins> | 25.6 | 0.5 | 0.935 |
126
+ | [sf-police-incidents](https://www.openml.org/t/359994) | 85.8 | 1.5 | <ins> 0.687 </ins> | 99.4 | 2.8 | 0.659 |
127
+ | [bates_classif_100](https://www.openml.org/t/361941) | 11152.8 | 50.0 | <ins> 0.864 </ins> | OOM | OOM | OOM |
128
+ | [prostate](https://www.openml.org/t/361945) | 13699.9 | 79.8 | <ins> 0.987 </ins> | OOM | OOM | OOM |
129
+ | average | 3747.0 | 34.0 | - | 3699.2 | 39.0 | - |
130
+
131
+ PerpetualBooster outperformed AutoGluon on 10 out of 10 classification tasks, training equally fast and inferring 1.1x faster.
132
+
133
+ PerpetualBooster demonstrates greater robustness compared to AutoGluon, successfully training on all 20 tasks, whereas AutoGluon encountered out-of-memory errors on 3 of those tasks.
134
+
135
+ The results can be reproduced using the automlbenchmark fork [here](https://github.com/deadsoul44/automlbenchmark).
102
136
 
103
- ## Documentation
104
137
 
105
- Documentation for the Python API can be found [here](https://perpetual-ml.github.io/perpetual) and for the Rust API [here](https://docs.rs/perpetual/latest/perpetual/).
106
138
 
107
139
  ## Installation
108
140
 
@@ -0,0 +1,12 @@
1
+ perpetual-0.8.1.dist-info/METADATA,sha256=UNcSudsW5App4W9EnFAgjxFrhbHwfbomtz7jwGIVi5s,10752
2
+ perpetual-0.8.1.dist-info/WHEEL,sha256=iNzfSeughQ6gviCftXhu6zZQCMTOJAdqefPsfmeKgU8,95
3
+ perpetual-0.8.1.dist-info/license_files/LICENSE,sha256=gcuuhKKc5-dwvyvHsXjlC9oM6N5gZ6umYbC8ewW1Yvg,35821
4
+ perpetual-0.8.1.dist-info/license_files/LICENSE,sha256=gcuuhKKc5-dwvyvHsXjlC9oM6N5gZ6umYbC8ewW1Yvg,35821
5
+ perpetual/booster.py,sha256=ne-RgsYIjzQAYiTtI1PJ_IpolnS-C89trIFCdoiZoH4,50118
6
+ perpetual/data.py,sha256=HiDsv2i1p9cLkXe8vnekxfpafyuxfWXwXrucdIir3xk,614
7
+ perpetual/serialize.py,sha256=FeW4JsUFVsrft9N7gz-ebn5mXvDv4LiJC2sgBEeGxYo,1957
8
+ perpetual/types.py,sha256=idZNsDErNTur_rJ_5Co8Pb6fik-AUn9lkrXmjbQJVX0,3381
9
+ perpetual/utils.py,sha256=nqwO6GFHi7I5iltuvgLT3NFaPm1h9cHlnomjFcdSfHY,7455
10
+ perpetual/__init__.py,sha256=V0RhghaG0CuKxKrzYUBYqrf7Drb-gjmznsbz9KT12lk,122
11
+ perpetual/perpetual.cp313-win_amd64.pyd,sha256=lrWyuEZiMH8l75Tprn383NMkPDGh7q18LRIlKYzzE14,1661952
12
+ perpetual-0.8.1.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- perpetual-0.7.12.dist-info/METADATA,sha256=Sfq0haXk0OttukvSaG6ARYPQipfeQ7ZIO_m7iaqkvys,10014
2
- perpetual-0.7.12.dist-info/WHEEL,sha256=iNzfSeughQ6gviCftXhu6zZQCMTOJAdqefPsfmeKgU8,95
3
- perpetual-0.7.12.dist-info/license_files/LICENSE,sha256=gcuuhKKc5-dwvyvHsXjlC9oM6N5gZ6umYbC8ewW1Yvg,35821
4
- perpetual-0.7.12.dist-info/license_files/LICENSE,sha256=gcuuhKKc5-dwvyvHsXjlC9oM6N5gZ6umYbC8ewW1Yvg,35821
5
- perpetual/booster.py,sha256=ICWJRuSxoaUgRHo9N8hodz1MlyRBVKPhVnfQJOes968,46919
6
- perpetual/data.py,sha256=HiDsv2i1p9cLkXe8vnekxfpafyuxfWXwXrucdIir3xk,614
7
- perpetual/serialize.py,sha256=FeW4JsUFVsrft9N7gz-ebn5mXvDv4LiJC2sgBEeGxYo,1957
8
- perpetual/types.py,sha256=idZNsDErNTur_rJ_5Co8Pb6fik-AUn9lkrXmjbQJVX0,3381
9
- perpetual/utils.py,sha256=i_7EB5xQXAGtODONhrOwfxRfH3YR7U0cQJvL8eUNFK8,7444
10
- perpetual/__init__.py,sha256=V0RhghaG0CuKxKrzYUBYqrf7Drb-gjmznsbz9KT12lk,122
11
- perpetual/perpetual.cp313-win_amd64.pyd,sha256=wB4UC94u1mcKNicec_h62WjjJJwp1WOerPjMAcKyELY,1509376
12
- perpetual-0.7.12.dist-info/RECORD,,