perpetual 0.9.1__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of perpetual might be problematic. Click here for more details.

perpetual/data.py ADDED
@@ -0,0 +1,25 @@
1
+ from dataclasses import dataclass
2
+ from typing import Iterable, Optional, Union
3
+
4
+
5
+ @dataclass
6
+ class Node:
7
+ """Dataclass representation of a node, this represents all of the fields present in a tree node."""
8
+
9
+ num: int
10
+ weight_value: float
11
+ hessian_sum: float
12
+ depth: int
13
+ split_value: float
14
+ split_feature: Union[str, int]
15
+ split_gain: float
16
+ missing_node: int
17
+ left_child: int
18
+ right_child: int
19
+ is_leaf: bool
20
+ node_type: str
21
+ parent_node: int
22
+ generalization: Optional[float]
23
+ left_cats: Optional[Iterable]
24
+ right_cats: Optional[Iterable]
25
+ count: int
Binary file
perpetual/serialize.py ADDED
@@ -0,0 +1,74 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from abc import ABC, abstractmethod
5
+ from ast import literal_eval
6
+ from dataclasses import dataclass
7
+ from typing import Dict, Generic, List, Tuple, TypeVar, Union
8
+
9
+ import numpy as np
10
+ import numpy.typing as npt
11
+
12
+ T = TypeVar("T")
13
+
14
+
15
+ class BaseSerializer(ABC, Generic[T]):
16
+ @abstractmethod
17
+ def serialize(self, obj: T) -> str:
18
+ """serialize method - should take an object and return a string"""
19
+
20
+ @abstractmethod
21
+ def deserialize(self, obj_repr: str) -> T:
22
+ """deserialize method - should take a string and return original object"""
23
+
24
+
25
+ Scaler = Union[int, float, str]
26
+
27
+
28
+ class ScalerSerializer(BaseSerializer[Scaler]):
29
+ def serialize(self, obj: Scaler) -> str:
30
+ if isinstance(obj, str):
31
+ obj_ = f"'{obj}'"
32
+ else:
33
+ obj_ = str(obj)
34
+ return obj_
35
+
36
+ def deserialize(self, obj_repr: str) -> Scaler:
37
+ return literal_eval(node_or_string=obj_repr)
38
+
39
+
40
+ ObjectItem = Union[
41
+ List[Scaler],
42
+ Dict[str, Scaler],
43
+ Scaler,
44
+ ]
45
+
46
+
47
+ class ObjectSerializer(BaseSerializer[ObjectItem]):
48
+ def serialize(self, obj: ObjectItem) -> str:
49
+ return json.dumps(obj)
50
+
51
+ def deserialize(self, obj_repr: str) -> ObjectItem:
52
+ return json.loads(obj_repr)
53
+
54
+
55
+ @dataclass
56
+ class NumpyData:
57
+ array: Union[List[float], List[int]]
58
+ dtype: str
59
+ shape: Tuple[int, ...]
60
+
61
+
62
+ class NumpySerializer(BaseSerializer[npt.NDArray]):
63
+ def serialize(self, obj: npt.NDArray) -> str:
64
+ return json.dumps(
65
+ {"array": obj.tolist(), "dtype": str(obj.dtype), "shape": obj.shape}
66
+ )
67
+
68
+ def deserialize(self, obj_repr: str) -> npt.NDArray:
69
+ data = NumpyData(**json.loads(obj_repr))
70
+ a = np.array(data.array, dtype=data.dtype) # type: ignore
71
+ if len(data.shape) == 1:
72
+ return a
73
+ else:
74
+ return a.reshape(data.shape)
perpetual/types.py ADDED
@@ -0,0 +1,150 @@
1
+ import numpy as np
2
+ from typing_extensions import Self
3
+ from typing import Any, Dict, Iterable, Protocol, Set
4
+
5
+
6
+ class BoosterType(Protocol):
7
+ monotone_constraints: Dict[int, int]
8
+ terminate_missing_features: Set[int]
9
+ number_of_trees: int
10
+ base_score: float
11
+
12
+ def fit(
13
+ self,
14
+ flat_data: np.ndarray,
15
+ rows: int,
16
+ cols: int,
17
+ y: np.ndarray,
18
+ budget: float,
19
+ sample_weight: np.ndarray,
20
+ parallel: bool = False,
21
+ ):
22
+ """Fit method"""
23
+
24
+ def predict(
25
+ self,
26
+ flat_data: np.ndarray,
27
+ rows: int,
28
+ cols: int,
29
+ parallel: bool = True,
30
+ ) -> np.ndarray:
31
+ """predict method"""
32
+
33
+ def predict_proba(
34
+ self,
35
+ flat_data: np.ndarray,
36
+ rows: int,
37
+ cols: int,
38
+ parallel: bool = True,
39
+ ) -> np.ndarray:
40
+ """predict probabilities method"""
41
+
42
+ def predict_contributions(
43
+ self,
44
+ flat_data: np.ndarray,
45
+ rows: int,
46
+ cols: int,
47
+ method: str,
48
+ parallel: bool = True,
49
+ ) -> np.ndarray:
50
+ """method"""
51
+
52
+ def value_partial_dependence(
53
+ self,
54
+ feature: int,
55
+ value: float,
56
+ ) -> float:
57
+ """pass"""
58
+
59
+ def calculate_feature_importance(
60
+ self,
61
+ method: str,
62
+ normalize: bool,
63
+ ) -> Dict[int, float]:
64
+ """pass"""
65
+
66
+ def text_dump(self) -> Iterable[str]:
67
+ """pass"""
68
+
69
+ @classmethod
70
+ def load_booster(cls, path: str) -> Self:
71
+ """pass"""
72
+
73
+ def save_booster(self, path: str):
74
+ """pass"""
75
+
76
+ @classmethod
77
+ def from_json(cls, json_str: str) -> Self:
78
+ """pass"""
79
+
80
+ def json_dump(self) -> str:
81
+ """pass"""
82
+
83
+ def get_params(self) -> Dict[str, Any]:
84
+ """pass"""
85
+
86
+ def insert_metadata(self, key: str, value: str) -> None:
87
+ """pass"""
88
+
89
+ def get_metadata(self, key: str) -> str:
90
+ """pass"""
91
+
92
+
93
+ class MultiOutputBoosterType(Protocol):
94
+ monotone_constraints: Dict[int, int]
95
+ terminate_missing_features: Set[int]
96
+ number_of_trees: Iterable[int]
97
+ base_score: Iterable[float]
98
+
99
+ def fit(
100
+ self,
101
+ flat_data: np.ndarray,
102
+ rows: int,
103
+ cols: int,
104
+ y: np.ndarray,
105
+ budget: float,
106
+ sample_weight: np.ndarray,
107
+ parallel: bool = False,
108
+ ):
109
+ """Fit method"""
110
+
111
+ def predict(
112
+ self,
113
+ flat_data: np.ndarray,
114
+ rows: int,
115
+ cols: int,
116
+ parallel: bool = True,
117
+ ) -> np.ndarray:
118
+ """predict method"""
119
+
120
+ def predict_proba(
121
+ self,
122
+ flat_data: np.ndarray,
123
+ rows: int,
124
+ cols: int,
125
+ parallel: bool = True,
126
+ ) -> np.ndarray:
127
+ """predict probabilities method"""
128
+
129
+ @classmethod
130
+ def load_booster(cls, path: str) -> Self:
131
+ """pass"""
132
+
133
+ def save_booster(self, path: str):
134
+ """pass"""
135
+
136
+ @classmethod
137
+ def from_json(cls, json_str: str) -> Self:
138
+ """pass"""
139
+
140
+ def json_dump(self) -> str:
141
+ """pass"""
142
+
143
+ def get_params(self) -> Dict[str, Any]:
144
+ """pass"""
145
+
146
+ def insert_metadata(self, key: str, value: str) -> None:
147
+ """pass"""
148
+
149
+ def get_metadata(self, key: str) -> str:
150
+ """pass"""
perpetual/utils.py ADDED
@@ -0,0 +1,217 @@
1
+ import logging
2
+ import numpy as np
3
+ from typing import Dict, Iterable, List, Optional, Tuple
4
+
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ def type_df(df):
10
+ library_name = type(df).__module__.split(".")[0]
11
+ if type(df).__name__ == "DataFrame":
12
+ if library_name == "pandas":
13
+ return "pandas_df"
14
+ elif library_name == "polars":
15
+ return "polars_df"
16
+ elif library_name == "numpy":
17
+ return "numpy"
18
+ else:
19
+ return ""
20
+
21
+
22
+ def type_series(y):
23
+ library_name = type(y).__module__.split(".")[0]
24
+ if type(y).__name__ == "Series":
25
+ if library_name == "pandas":
26
+ return "pandas_series"
27
+ elif library_name == "polars":
28
+ return "polars_series"
29
+ elif library_name == "numpy":
30
+ return "numpy"
31
+ else:
32
+ return ""
33
+
34
+
35
+ def convert_input_array(x, objective) -> np.ndarray:
36
+ classes_ = []
37
+
38
+ if type(x).__module__.split(".")[0] == "numpy":
39
+ if len(x.shape) == 2:
40
+ classes_, x_, *_ = convert_input_frame(x, None, 1000)
41
+ else:
42
+ x_ = x
43
+ elif type_series(x) == "pandas_series":
44
+ x_ = x.to_numpy()
45
+ elif type_series(x) == "polars_series":
46
+ x_ = x.to_numpy(allow_copy=False)
47
+ elif type_df(x) == "polars_df" or type_df(x) == "pandas_df":
48
+ classes_, x_, *_ = convert_input_frame(x, None, 1000)
49
+ else:
50
+ x_ = x.to_numpy()
51
+
52
+ if objective == "LogLoss" and len(x_.shape) == 1:
53
+ classes_ = np.unique(x_)
54
+ x_index = np.array([np.where(classes_ == i) for i in x_])
55
+ if len(classes_) > 2:
56
+ x_ = np.squeeze(np.eye(len(classes_))[x_index])
57
+
58
+ if not np.issubdtype(x_.dtype, "float64"):
59
+ x_ = x_.astype(dtype="float64", copy=False)
60
+
61
+ if len(x_.shape) == 2:
62
+ x_ = x_.ravel(order="F")
63
+
64
+ return x_, classes_
65
+
66
+
67
+ def convert_input_frame(
68
+ X,
69
+ categorical_features,
70
+ max_cat,
71
+ ) -> Tuple[List[str], np.ndarray, int, int, Optional[Iterable[int]], Optional[Dict]]:
72
+ """Convert data to format needed by booster.
73
+
74
+ Returns:
75
+ Tuple[List[str], np.ndarray, int, int, Optional[Iterable[int]], Optional[Dict]]: Return column names, the flat data, number of rows, the number of columns, cat_index, cat_mapping
76
+ """
77
+ categorical_features_ = None
78
+ if type_df(X) == "pandas_df":
79
+ X_ = X.to_numpy()
80
+ features_ = X.columns.to_list()
81
+ if categorical_features == "auto":
82
+ categorical_columns = X.select_dtypes(include=["category"]).columns.tolist()
83
+ categorical_features_ = [
84
+ features_.index(c) for c in categorical_columns
85
+ ] or None
86
+ elif type_df(X) == "polars_df":
87
+ import polars.selectors as cs
88
+
89
+ try:
90
+ X_ = X.to_numpy(allow_copy=False)
91
+ except RuntimeError:
92
+ X_ = X.to_numpy(allow_copy=True)
93
+
94
+ features_ = X.columns
95
+ if categorical_features == "auto":
96
+ categorical_columns = X.select(cs.categorical()).columns
97
+ categorical_features_ = [
98
+ features_.index(c) for c in categorical_columns
99
+ ] or None
100
+ else:
101
+ # Assume it's a numpy array.
102
+ X_ = X
103
+ features_ = list(map(str, range(X_.shape[1])))
104
+
105
+ if (
106
+ categorical_features
107
+ and all(isinstance(s, int) for s in categorical_features)
108
+ and isinstance(categorical_features, list)
109
+ ):
110
+ categorical_features_ = categorical_features
111
+ elif (
112
+ categorical_features
113
+ and all(isinstance(s, str) for s in categorical_features)
114
+ and isinstance(categorical_features, list)
115
+ ):
116
+ categorical_features_ = [features_.index(c) for c in categorical_features]
117
+
118
+ cat_mapping = {} # key: feature_name, value: ordered category names
119
+ cat_to_num = []
120
+ if categorical_features_:
121
+ for i in categorical_features_:
122
+ categories, inversed = np.unique(X_[:, i].astype(str), return_inverse=True)
123
+
124
+ categories = list(categories)
125
+ if "nan" in categories:
126
+ categories.remove("nan")
127
+ categories.insert(0, "nan")
128
+
129
+ inversed = inversed + 1.0
130
+
131
+ if len(categories) > max_cat:
132
+ cat_to_num.append(i)
133
+ logger.warning(
134
+ f"Feature {features_[i]} will be treated as numerical since the number of categories ({len(categories)}) exceeds max_cat ({max_cat}) threshold."
135
+ )
136
+
137
+ feature_name = features_[i]
138
+ cat_mapping[feature_name] = categories
139
+ ind_nan = len(categories)
140
+ inversed[inversed == ind_nan] = np.nan
141
+ X_[:, i] = inversed
142
+
143
+ categorical_features_ = [
144
+ x for x in categorical_features_ if x not in cat_to_num
145
+ ]
146
+
147
+ logger.info(f"Categorical features: {categorical_features_}")
148
+ logger.info(f"Mapping of categories: {cat_mapping}")
149
+
150
+ if not np.issubdtype(X_.dtype, "float64"):
151
+ X_ = X_.astype(dtype="float64", copy=False)
152
+ flat_data = X_.ravel(order="F")
153
+ rows, cols = X_.shape
154
+
155
+ if isinstance(categorical_features_, list):
156
+ categorical_features_ = set(categorical_features_)
157
+
158
+ return features_, flat_data, rows, cols, categorical_features_, cat_mapping
159
+
160
+
161
+ def transform_input_frame(X, cat_mapping) -> Tuple[List[str], np.ndarray, int, int]:
162
+ """Convert data to format needed by booster.
163
+
164
+ Returns:
165
+ Tuple[List[str], np.ndarray, int, int]: Return column names, the flat data, number of rows, the number of columns
166
+ """
167
+ if type_df(X) == "pandas_df":
168
+ X_ = X.to_numpy()
169
+ features_ = X.columns.to_list()
170
+ elif type_df(X) == "polars_df":
171
+ try:
172
+ X_ = X.to_numpy(allow_copy=False)
173
+ except RuntimeError:
174
+ X_ = X.to_numpy(allow_copy=True)
175
+ features_ = X.columns
176
+ else:
177
+ # Assume it's a numpy array.
178
+ X_ = X
179
+ features_ = list(map(str, range(X_.shape[1])))
180
+
181
+ if cat_mapping:
182
+ for feature_name, categories in cat_mapping.items():
183
+ feature_index = features_.index(feature_name)
184
+ cats = categories.copy()
185
+ cats.remove("nan")
186
+ x_enc = np.searchsorted(cats, X_[:, feature_index].astype(str))
187
+ x_enc = x_enc + 1.0
188
+ ind_nan = len(categories)
189
+ x_enc[x_enc == ind_nan] = np.nan
190
+ X_[:, feature_index] = x_enc
191
+
192
+ if not np.issubdtype(X_.dtype, "float64"):
193
+ X_ = X_.astype(dtype="float64", copy=False)
194
+ flat_data = X_.ravel(order="F")
195
+ rows, cols = X_.shape
196
+
197
+ return features_, flat_data, rows, cols
198
+
199
+
200
+ CONTRIBUTION_METHODS = {
201
+ "weight": "Weight",
202
+ "Weight": "Weight",
203
+ "average": "Average",
204
+ "Average": "Average",
205
+ "branch-difference": "BranchDifference",
206
+ "branchdifference": "BranchDifference",
207
+ "BranchDifference": "BranchDifference",
208
+ "midpoint-difference": "MidpointDifference",
209
+ "midpointdifference": "MidpointDifference",
210
+ "MidpointDifference": "MidpointDifference",
211
+ "mode-difference": "ModeDifference",
212
+ "modedifference": "ModeDifference",
213
+ "ModeDifference": "ModeDifference",
214
+ "ProbabilityChange": "ProbabilityChange",
215
+ "probabilitychange": "ProbabilityChange",
216
+ "probability-change": "ProbabilityChange",
217
+ }
@@ -0,0 +1,165 @@
1
+ Metadata-Version: 2.4
2
+ Name: perpetual
3
+ Version: 0.9.1
4
+ Classifier: Programming Language :: Rust
5
+ Classifier: Programming Language :: Python :: 3
6
+ Classifier: Programming Language :: Python :: 3.9
7
+ Classifier: Programming Language :: Python :: 3.10
8
+ Classifier: Programming Language :: Python :: 3.11
9
+ Classifier: Programming Language :: Python :: 3.12
10
+ Classifier: Programming Language :: Python :: 3.13
11
+ Requires-Dist: numpy
12
+ Requires-Dist: typing-extensions
13
+ Requires-Dist: black ; extra == 'dev'
14
+ Requires-Dist: pandas ; extra == 'dev'
15
+ Requires-Dist: polars ; extra == 'dev'
16
+ Requires-Dist: pyarrow ; extra == 'dev'
17
+ Requires-Dist: maturin ; extra == 'dev'
18
+ Requires-Dist: pytest ; extra == 'dev'
19
+ Requires-Dist: seaborn ; extra == 'dev'
20
+ Requires-Dist: scikit-learn ; extra == 'dev'
21
+ Requires-Dist: mkdocs-material ; extra == 'dev'
22
+ Requires-Dist: mkdocstrings[python] ; extra == 'dev'
23
+ Requires-Dist: mkdocs-autorefs ; extra == 'dev'
24
+ Requires-Dist: ruff ; extra == 'dev'
25
+ Provides-Extra: dev
26
+ License-File: LICENSE
27
+ Summary: A self-generalizing gradient boosting machine that doesn't need hyperparameter optimization
28
+ Keywords: rust,perpetual,machine learning,tree model,decision tree,gradient boosted decision tree,gradient boosting machine
29
+ Home-Page: https://perpetual-ml.com
30
+ Author: Mutlu Simsek
31
+ Author-email: Mutlu Simsek <msimsek@perpetual-ml.com>
32
+ Requires-Python: >=3.9
33
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
34
+ Project-URL: Source Code, https://github.com/perpetual-ml/perpetual
35
+
36
+ <p align="center">
37
+ <img height="120" src="https://github.com/perpetual-ml/perpetual/raw/main/resources/perp_logo.png">
38
+ </p>
39
+
40
+ <div align="center">
41
+
42
+ [![Python Versions](https://img.shields.io/pypi/pyversions/perpetual.svg?logo=python&logoColor=white)](https://pypi.org/project/perpetual)
43
+ [![PyPI Version](https://img.shields.io/pypi/v/perpetual.svg?logo=pypi&logoColor=white)](https://pypi.org/project/perpetual)
44
+ [![Crates.io Version](https://img.shields.io/crates/v/perpetual?logo=rust&logoColor=white)](https://crates.io/crates/perpetual)
45
+ [![Static Badge](https://img.shields.io/badge/join-discord-blue?logo=discord)](https://discord.gg/AyUK7rr6wy)
46
+ ![PyPI - Downloads](https://img.shields.io/pypi/dm/perpetual)
47
+
48
+ </div>
49
+
50
+ # Perpetual
51
+
52
+ PerpetualBooster is a gradient boosting machine (GBM) algorithm that doesn't need hyperparameter optimization unlike other GBM algorithms. Similar to AutoML libraries, it has a `budget` parameter. Increasing the `budget` parameter increases the predictive power of the algorithm and gives better results on unseen data. Start with a small budget (e.g. 0.5) and increase it (e.g. 1.0) once you are confident with your features. If you don't see any improvement with further increasing the `budget`, it means that you are already extracting the most predictive power out of your data.
53
+
54
+ ## Usage
55
+
56
+ You can use the algorithm like in the example below. Check examples folders for both Rust and Python.
57
+
58
+ ```python
59
+ from perpetual import PerpetualBooster
60
+
61
+ model = PerpetualBooster(objective="SquaredLoss", budget=0.5)
62
+ model.fit(X, y)
63
+ ```
64
+
65
+ ## Documentation
66
+
67
+ Documentation for the Python API can be found [here](https://perpetual-ml.github.io/perpetual) and for the Rust API [here](https://docs.rs/perpetual/latest/perpetual/).
68
+
69
+
70
+ ## Benchmark
71
+
72
+ ### PerpetualBooster vs. Optuna + LightGBM
73
+
74
+ Hyperparameter optimization usually takes 100 iterations with plain GBM algorithms. PerpetualBooster achieves the same accuracy in a single run. Thus, it achieves up to 100x speed-up at the same accuracy with different `budget` levels and with different datasets.
75
+
76
+ The following table summarizes the results for the [California Housing](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html) dataset (regression):
77
+
78
+ | Perpetual budget | LightGBM n_estimators | Perpetual mse | LightGBM mse | Speed-up wall time | Speed-up cpu time |
79
+ | ---------------- | --------------------- | ------------- | ------------ | ------------------ | ----------------- |
80
+ | 1.0 | 100 | 0.192 | 0.192 | 54x | 56x |
81
+ | 1.5 | 300 | 0.188 | 0.188 | 59x | 58x |
82
+ | 2.1 | 1000 | 0.185 | 0.186 | 42x | 41x |
83
+
84
+ The following table summarizes the results for the [Cover Types](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_covtype.html) dataset (classification):
85
+
86
+ | Perpetual budget | LightGBM n_estimators | Perpetual log loss | LightGBM log loss | Speed-up wall time | Speed-up cpu time |
87
+ | ---------------- | --------------------- | ------------------ | ----------------- | ------------------ | ----------------- |
88
+ | 0.9 | 100 | 0.091 | 0.084 | 72x | 78x |
89
+
90
+ The results can be reproduced using the scripts in the [examples](./python-package/examples) folder.
91
+
92
+ ### PerpetualBooster vs. AutoGluon
93
+
94
+ PerpetualBooster is a GBM but behaves like AutoML so it is benchmarked also against AutoGluon (v1.2, best quality preset), the current leader in [AutoML benchmark](https://automlbenchmark.streamlit.app/cd_diagram). Top 10 datasets with the most number of rows are selected from [OpenML datasets](https://www.openml.org/) for both regression and classification tasks.
95
+
96
+ The results are summarized in the following table for regression tasks:
97
+
98
+ | OpenML Task | Perpetual Training Duration | Perpetual Inference Duration | Perpetual RMSE | AutoGluon Training Duration | AutoGluon Inference Duration | AutoGluon RMSE |
99
+ | -------------------------------------------------------- | ----- | ----- | ------------------- | -------- | ------ | ------------------ |
100
+ | [Airlines_DepDelay_10M](https://www.openml.org/t/359929) | 518 | 11.3 | 29.0 | 520 | 30.9 | <ins> 28.8 </ins> |
101
+ | [bates_regr_100](https://www.openml.org/t/361940) | 3421 | 15.1 | <ins> 1.084 </ins> | OOM | OOM | OOM |
102
+ | [BNG(libras_move)](https://www.openml.org/t/7327) | 1956 | 4.2 | <ins> 2.51 </ins> | 1922 | 97.6 | 2.53 |
103
+ | [BNG(satellite_image)](https://www.openml.org/t/7326) | 334 | 1.6 | 0.731 | 337 | 10.0 | <ins> 0.721 </ins> |
104
+ | [COMET_MC](https://www.openml.org/t/14949) | 44 | 1.0 | <ins> 0.0615 </ins> | 47 | 5.0 | 0.0662 |
105
+ | [friedman1](https://www.openml.org/t/361939) | 275 | 4.2 | <ins> 1.047 </ins> | 278 | 5.1 | 1.487 |
106
+ | [poker](https://www.openml.org/t/10102) | 38 | 0.6 | <ins> 0.256 </ins> | 41 | 1.2 | 0.722 |
107
+ | [subset_higgs](https://www.openml.org/t/361955) | 868 | 10.6 | <ins> 0.420 </ins> | 870 | 24.5 | 0.421 |
108
+ | [BNG(autoHorse)](https://www.openml.org/t/7319) | 107 | 1.1 | <ins> 19.0 </ins> | 107 | 3.2 | 20.5 |
109
+ | [BNG(pbc)](https://www.openml.org/t/7318) | 48 | 0.6 | <ins> 836.5 </ins> | 51 | 0.2 | 957.1 |
110
+ | average | 465 | 3.9 | - | 464 | 19.7 | - |
111
+
112
+ PerpetualBooster outperformed AutoGluon on 8 out of 10 regression tasks, training equally fast and inferring 5.1x faster.
113
+
114
+ The results are summarized in the following table for classification tasks:
115
+
116
+ | OpenML Task | Perpetual Training Duration | Perpetual Inference Duration | Perpetual AUC | AutoGluon Training Duration | AutoGluon Inference Duration | AutoGluon AUC |
117
+ | -------------------------------------------------------- | ------- | ------ | ------------------- | -------- | ------ | ------------------ |
118
+ | [BNG(spambase)](https://www.openml.org/t/146163) | 70.1 | 2.1 | <ins> 0.671 </ins> | 73.1 | 3.7 | 0.669 |
119
+ | [BNG(trains)](https://www.openml.org/t/208) | 89.5 | 1.7 | <ins> 0.996 </ins> | 106.4 | 2.4 | 0.994 |
120
+ | [breast](https://www.openml.org/t/361942) | 13699.3 | 97.7 | <ins> 0.991 </ins> | 13330.7 | 79.7 | 0.949 |
121
+ | [Click_prediction_small](https://www.openml.org/t/7291) | 89.1 | 1.0 | <ins> 0.749 </ins> | 101.0 | 2.8 | 0.703 |
122
+ | [colon](https://www.openml.org/t/361938) | 12435.2 | 126.7 | <ins> 0.997 </ins> | 12356.2 | 152.3 | 0.997 |
123
+ | [Higgs](https://www.openml.org/t/362113) | 3485.3 | 40.9 | <ins> 0.843 </ins> | 3501.4 | 67.9 | 0.816 |
124
+ | [SEA(50000)](https://www.openml.org/t/230) | 21.9 | 0.2 | <ins> 0.936 </ins> | 25.6 | 0.5 | 0.935 |
125
+ | [sf-police-incidents](https://www.openml.org/t/359994) | 85.8 | 1.5 | <ins> 0.687 </ins> | 99.4 | 2.8 | 0.659 |
126
+ | [bates_classif_100](https://www.openml.org/t/361941) | 11152.8 | 50.0 | <ins> 0.864 </ins> | OOM | OOM | OOM |
127
+ | [prostate](https://www.openml.org/t/361945) | 13699.9 | 79.8 | <ins> 0.987 </ins> | OOM | OOM | OOM |
128
+ | average | 3747.0 | 34.0 | - | 3699.2 | 39.0 | - |
129
+
130
+ PerpetualBooster outperformed AutoGluon on 10 out of 10 classification tasks, training equally fast and inferring 1.1x faster.
131
+
132
+ PerpetualBooster demonstrates greater robustness compared to AutoGluon, successfully training on all 20 tasks, whereas AutoGluon encountered out-of-memory errors on 3 of those tasks.
133
+
134
+ The results can be reproduced using the automlbenchmark fork [here](https://github.com/deadsoul44/automlbenchmark).
135
+
136
+
137
+
138
+ ## Installation
139
+
140
+ The package can be installed directly from [pypi](https://pypi.org/project/perpetual):
141
+
142
+ ```shell
143
+ pip install perpetual
144
+ ```
145
+
146
+ Using [conda-forge](https://anaconda.org/conda-forge/perpetual):
147
+
148
+ ```shell
149
+ conda install conda-forge::perpetual
150
+ ```
151
+
152
+ To use in a Rust project and to get the package from [crates.io](https://crates.io/crates/perpetual):
153
+
154
+ ```shell
155
+ cargo add perpetual
156
+ ```
157
+
158
+ ## Contribution
159
+
160
+ Contributions are welcome. Check CONTRIBUTING.md for the guideline.
161
+
162
+ ## Paper
163
+
164
+ PerpetualBooster prevents overfitting with a generalization algorithm. The paper is work-in-progress to explain how the algorithm works. Check our [blog post](https://perpetual-ml.com/blog/how-perpetual-works) for a high level introduction to the algorithm.
165
+
@@ -0,0 +1,11 @@
1
+ perpetual-0.9.1.dist-info/METADATA,sha256=CVBQY3JrR_yM4_gNV4rmUmwlC2TLdZRUZZ5DR0uQV3g,10722
2
+ perpetual-0.9.1.dist-info/WHEEL,sha256=jABKVkLC9kJr8mi_er5jOqpiQUjARSLXDUIIxDqsS50,96
3
+ perpetual-0.9.1.dist-info/licenses/LICENSE,sha256=gcuuhKKc5-dwvyvHsXjlC9oM6N5gZ6umYbC8ewW1Yvg,35821
4
+ perpetual/booster.py,sha256=QvKEz-tZmhrRwh39quy1WNbeBF7IWtitquW_8Q8LkII,50750
5
+ perpetual/data.py,sha256=vhjWEc_ESYWoaczz0GkUPtfS0iRSKdVZSrCkQn8yLPw,630
6
+ perpetual/serialize.py,sha256=FeW4JsUFVsrft9N7gz-ebn5mXvDv4LiJC2sgBEeGxYo,1957
7
+ perpetual/types.py,sha256=idZNsDErNTur_rJ_5Co8Pb6fik-AUn9lkrXmjbQJVX0,3381
8
+ perpetual/utils.py,sha256=nqwO6GFHi7I5iltuvgLT3NFaPm1h9cHlnomjFcdSfHY,7455
9
+ perpetual/__init__.py,sha256=V0RhghaG0CuKxKrzYUBYqrf7Drb-gjmznsbz9KT12lk,122
10
+ perpetual/perpetual.cp312-win_amd64.pyd,sha256=JWLPSAvcfqllZWRMU6q3ltpy2jBr5Ch2QB3WqFy9RPw,1694720
11
+ perpetual-0.9.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.8.3)
3
+ Root-Is-Purelib: false
4
+ Tag: cp312-cp312-win_amd64