perpetual 0.7.11__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
perpetual/data.py ADDED
@@ -0,0 +1,24 @@
1
+ from dataclasses import dataclass
2
+ from typing import Iterable, Optional, Union
3
+
4
+
5
+ @dataclass
6
+ class Node:
7
+ """Dataclass representation of a node, this represents all of the fields present in a tree node."""
8
+
9
+ num: int
10
+ weight_value: float
11
+ hessian_sum: float
12
+ depth: int
13
+ split_value: float
14
+ split_feature: Union[str, int]
15
+ split_gain: float
16
+ missing_node: int
17
+ left_child: int
18
+ right_child: int
19
+ is_leaf: bool
20
+ node_type: str
21
+ parent_node: int
22
+ generalization: Optional[float]
23
+ left_cats: Optional[Iterable]
24
+ right_cats: Optional[Iterable]
perpetual/serialize.py ADDED
@@ -0,0 +1,74 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from abc import ABC, abstractmethod
5
+ from ast import literal_eval
6
+ from dataclasses import dataclass
7
+ from typing import Dict, Generic, List, Tuple, TypeVar, Union
8
+
9
+ import numpy as np
10
+ import numpy.typing as npt
11
+
12
+ T = TypeVar("T")
13
+
14
+
15
+ class BaseSerializer(ABC, Generic[T]):
16
+ @abstractmethod
17
+ def serialize(self, obj: T) -> str:
18
+ """serialize method - should take an object and return a string"""
19
+
20
+ @abstractmethod
21
+ def deserialize(self, obj_repr: str) -> T:
22
+ """deserialize method - should take a string and return original object"""
23
+
24
+
25
+ Scaler = Union[int, float, str]
26
+
27
+
28
+ class ScalerSerializer(BaseSerializer[Scaler]):
29
+ def serialize(self, obj: Scaler) -> str:
30
+ if isinstance(obj, str):
31
+ obj_ = f"'{obj}'"
32
+ else:
33
+ obj_ = str(obj)
34
+ return obj_
35
+
36
+ def deserialize(self, obj_repr: str) -> Scaler:
37
+ return literal_eval(node_or_string=obj_repr)
38
+
39
+
40
+ ObjectItem = Union[
41
+ List[Scaler],
42
+ Dict[str, Scaler],
43
+ Scaler,
44
+ ]
45
+
46
+
47
+ class ObjectSerializer(BaseSerializer[ObjectItem]):
48
+ def serialize(self, obj: ObjectItem) -> str:
49
+ return json.dumps(obj)
50
+
51
+ def deserialize(self, obj_repr: str) -> ObjectItem:
52
+ return json.loads(obj_repr)
53
+
54
+
55
+ @dataclass
56
+ class NumpyData:
57
+ array: Union[List[float], List[int]]
58
+ dtype: str
59
+ shape: Tuple[int, ...]
60
+
61
+
62
+ class NumpySerializer(BaseSerializer[npt.NDArray]):
63
+ def serialize(self, obj: npt.NDArray) -> str:
64
+ return json.dumps(
65
+ {"array": obj.tolist(), "dtype": str(obj.dtype), "shape": obj.shape}
66
+ )
67
+
68
+ def deserialize(self, obj_repr: str) -> npt.NDArray:
69
+ data = NumpyData(**json.loads(obj_repr))
70
+ a = np.array(data.array, dtype=data.dtype) # type: ignore
71
+ if len(data.shape) == 1:
72
+ return a
73
+ else:
74
+ return a.reshape(data.shape)
perpetual/types.py ADDED
@@ -0,0 +1,150 @@
1
+ import numpy as np
2
+ from typing_extensions import Self
3
+ from typing import Any, Dict, Iterable, Protocol, Set
4
+
5
+
6
+ class BoosterType(Protocol):
7
+ monotone_constraints: Dict[int, int]
8
+ terminate_missing_features: Set[int]
9
+ number_of_trees: int
10
+ base_score: float
11
+
12
+ def fit(
13
+ self,
14
+ flat_data: np.ndarray,
15
+ rows: int,
16
+ cols: int,
17
+ y: np.ndarray,
18
+ budget: float,
19
+ sample_weight: np.ndarray,
20
+ parallel: bool = False,
21
+ ):
22
+ """Fit method"""
23
+
24
+ def predict(
25
+ self,
26
+ flat_data: np.ndarray,
27
+ rows: int,
28
+ cols: int,
29
+ parallel: bool = True,
30
+ ) -> np.ndarray:
31
+ """predict method"""
32
+
33
+ def predict_proba(
34
+ self,
35
+ flat_data: np.ndarray,
36
+ rows: int,
37
+ cols: int,
38
+ parallel: bool = True,
39
+ ) -> np.ndarray:
40
+ """predict probabilities method"""
41
+
42
+ def predict_contributions(
43
+ self,
44
+ flat_data: np.ndarray,
45
+ rows: int,
46
+ cols: int,
47
+ method: str,
48
+ parallel: bool = True,
49
+ ) -> np.ndarray:
50
+ """method"""
51
+
52
+ def value_partial_dependence(
53
+ self,
54
+ feature: int,
55
+ value: float,
56
+ ) -> float:
57
+ """pass"""
58
+
59
+ def calculate_feature_importance(
60
+ self,
61
+ method: str,
62
+ normalize: bool,
63
+ ) -> Dict[int, float]:
64
+ """pass"""
65
+
66
+ def text_dump(self) -> Iterable[str]:
67
+ """pass"""
68
+
69
+ @classmethod
70
+ def load_booster(cls, path: str) -> Self:
71
+ """pass"""
72
+
73
+ def save_booster(self, path: str):
74
+ """pass"""
75
+
76
+ @classmethod
77
+ def from_json(cls, json_str: str) -> Self:
78
+ """pass"""
79
+
80
+ def json_dump(self) -> str:
81
+ """pass"""
82
+
83
+ def get_params(self) -> Dict[str, Any]:
84
+ """pass"""
85
+
86
+ def insert_metadata(self, key: str, value: str) -> None:
87
+ """pass"""
88
+
89
+ def get_metadata(self, key: str) -> str:
90
+ """pass"""
91
+
92
+
93
+ class MultiOutputBoosterType(Protocol):
94
+ monotone_constraints: Dict[int, int]
95
+ terminate_missing_features: Set[int]
96
+ number_of_trees: Iterable[int]
97
+ base_score: Iterable[float]
98
+
99
+ def fit(
100
+ self,
101
+ flat_data: np.ndarray,
102
+ rows: int,
103
+ cols: int,
104
+ y: np.ndarray,
105
+ budget: float,
106
+ sample_weight: np.ndarray,
107
+ parallel: bool = False,
108
+ ):
109
+ """Fit method"""
110
+
111
+ def predict(
112
+ self,
113
+ flat_data: np.ndarray,
114
+ rows: int,
115
+ cols: int,
116
+ parallel: bool = True,
117
+ ) -> np.ndarray:
118
+ """predict method"""
119
+
120
+ def predict_proba(
121
+ self,
122
+ flat_data: np.ndarray,
123
+ rows: int,
124
+ cols: int,
125
+ parallel: bool = True,
126
+ ) -> np.ndarray:
127
+ """predict probabilities method"""
128
+
129
+ @classmethod
130
+ def load_booster(cls, path: str) -> Self:
131
+ """pass"""
132
+
133
+ def save_booster(self, path: str):
134
+ """pass"""
135
+
136
+ @classmethod
137
+ def from_json(cls, json_str: str) -> Self:
138
+ """pass"""
139
+
140
+ def json_dump(self) -> str:
141
+ """pass"""
142
+
143
+ def get_params(self) -> Dict[str, Any]:
144
+ """pass"""
145
+
146
+ def insert_metadata(self, key: str, value: str) -> None:
147
+ """pass"""
148
+
149
+ def get_metadata(self, key: str) -> str:
150
+ """pass"""
perpetual/utils.py ADDED
@@ -0,0 +1,215 @@
1
+ import logging
2
+ import numpy as np
3
+ from typing import Dict, Iterable, List, Optional, Tuple
4
+
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ def type_df(df):
10
+ library_name = type(df).__module__.split(".")[0]
11
+ if type(df).__name__ == "DataFrame":
12
+ if library_name == "pandas":
13
+ return "pandas_df"
14
+ elif library_name == "polars":
15
+ return "polars_df"
16
+ elif library_name == "numpy":
17
+ return "numpy"
18
+ else:
19
+ return ""
20
+
21
+
22
+ def type_series(y):
23
+ library_name = type(y).__module__.split(".")[0]
24
+ if type(y).__name__ == "Series":
25
+ if library_name == "pandas":
26
+ return "pandas_series"
27
+ elif library_name == "polars":
28
+ return "polars_series"
29
+ elif library_name == "numpy":
30
+ return "numpy"
31
+ else:
32
+ return ""
33
+
34
+
35
+ def convert_input_array(x, objective) -> np.ndarray:
36
+ classes_ = []
37
+
38
+ if type(x).__module__.split(".")[0] == "numpy":
39
+ if len(x.shape) == 2:
40
+ classes_, x_, *_ = convert_input_frame(x, None, 1000)
41
+ else:
42
+ x_ = x
43
+ elif type_series(x) == "pandas_series":
44
+ x_ = x.to_numpy()
45
+ elif type_series(x) == "polars_series":
46
+ x_ = x.to_numpy(allow_copy=False)
47
+ elif type_df(x) == "polars_df" or type_df(x) == "pandas_df":
48
+ classes_, x_, *_ = convert_input_frame(x, None, 1000)
49
+ else:
50
+ x_ = x.to_numpy()
51
+
52
+ if objective == "LogLoss" and len(x_.shape) == 1:
53
+ classes_ = np.unique(x_)
54
+ x_index = np.array([np.where(classes_ == i) for i in x_])
55
+ if len(classes_) > 2:
56
+ x_ = np.squeeze(np.eye(len(classes_))[x_index])
57
+
58
+ if not np.issubdtype(x_.dtype, "float64"):
59
+ x_ = x_.astype(dtype="float64", copy=False)
60
+
61
+ if len(x_.shape) == 2:
62
+ x_ = x_.ravel(order="F")
63
+
64
+ return x_, classes_
65
+
66
+
67
+ def convert_input_frame(
68
+ X, categorical_features, max_cat
69
+ ) -> Tuple[List[str], np.ndarray, int, int, Optional[Iterable[int]], Optional[Dict]]:
70
+ """Convert data to format needed by booster.
71
+
72
+ Returns:
73
+ Tuple[List[str], np.ndarray, int, int, Optional[Iterable[int]], Optional[Dict]]: Return column names, the flat data, number of rows, the number of columns, cat_index, cat_mapping
74
+ """
75
+ categorical_features_ = None
76
+ if type_df(X) == "pandas_df":
77
+ X_ = X.to_numpy()
78
+ features_ = X.columns.to_list()
79
+ if categorical_features == "auto":
80
+ categorical_columns = X.select_dtypes(include=["category"]).columns.tolist()
81
+ categorical_features_ = [
82
+ features_.index(c) for c in categorical_columns
83
+ ] or None
84
+ elif type_df(X) == "polars_df":
85
+ import polars.selectors as cs
86
+
87
+ try:
88
+ X_ = X.to_numpy(allow_copy=False)
89
+ except RuntimeError:
90
+ X_ = X.to_numpy(allow_copy=True)
91
+
92
+ features_ = X.columns
93
+ if categorical_features == "auto":
94
+ categorical_columns = X.select(cs.categorical()).columns
95
+ categorical_features_ = [
96
+ features_.index(c) for c in categorical_columns
97
+ ] or None
98
+ else:
99
+ # Assume it's a numpy array.
100
+ X_ = X
101
+ features_ = list(map(str, range(X_.shape[1])))
102
+
103
+ if (
104
+ categorical_features
105
+ and all(isinstance(s, int) for s in categorical_features)
106
+ and isinstance(categorical_features, list)
107
+ ):
108
+ categorical_features_ = categorical_features
109
+ elif (
110
+ categorical_features
111
+ and all(isinstance(s, str) for s in categorical_features)
112
+ and isinstance(categorical_features, list)
113
+ ):
114
+ categorical_features_ = [features_.index(c) for c in categorical_features]
115
+
116
+ cat_mapping = {} # key: feature_name, value: ordered category names
117
+ cat_to_num = []
118
+ if categorical_features_:
119
+ for i in categorical_features_:
120
+ categories, inversed = np.unique(X_[:, i].astype(str), return_inverse=True)
121
+
122
+ categories = list(categories)
123
+ if "nan" in categories:
124
+ categories.remove("nan")
125
+ categories.insert(0, "nan")
126
+
127
+ inversed = inversed + 1.0
128
+
129
+ if len(categories) > max_cat:
130
+ cat_to_num.append(i)
131
+ logger.warning(
132
+ f"Feature {features_[i]} will be treated as numerical since the number of categories ({len(categories)}) exceeds max_cat ({max_cat}) threshold."
133
+ )
134
+
135
+ feature_name = features_[i]
136
+ cat_mapping[feature_name] = categories
137
+ ind_nan = len(categories)
138
+ inversed[inversed == ind_nan] = np.nan
139
+ X_[:, i] = inversed
140
+
141
+ categorical_features_ = [
142
+ x for x in categorical_features_ if x not in cat_to_num
143
+ ]
144
+
145
+ logger.info(f"Categorical features: {categorical_features_}")
146
+ logger.info(f"Mapping of categories: {cat_mapping}")
147
+
148
+ if not np.issubdtype(X_.dtype, "float64"):
149
+ X_ = X_.astype(dtype="float64", copy=False)
150
+ flat_data = X_.ravel(order="F")
151
+ rows, cols = X_.shape
152
+
153
+ if isinstance(categorical_features_, list):
154
+ categorical_features_ = set(categorical_features_)
155
+
156
+ return features_, flat_data, rows, cols, categorical_features_, cat_mapping
157
+
158
+
159
+ def transform_input_frame(X, cat_mapping) -> Tuple[List[str], np.ndarray, int, int]:
160
+ """Convert data to format needed by booster.
161
+
162
+ Returns:
163
+ Tuple[List[str], np.ndarray, int, int]: Return column names, the flat data, number of rows, the number of columns
164
+ """
165
+ if type_df(X) == "pandas_df":
166
+ X_ = X.to_numpy()
167
+ features_ = X.columns.to_list()
168
+ elif type_df(X) == "polars_df":
169
+ try:
170
+ X_ = X.to_numpy(allow_copy=False)
171
+ except RuntimeError:
172
+ X_ = X.to_numpy(allow_copy=True)
173
+ features_ = X.columns
174
+ else:
175
+ # Assume it's a numpy array.
176
+ X_ = X
177
+ features_ = list(map(str, range(X_.shape[1])))
178
+
179
+ if cat_mapping:
180
+ for feature_name, categories in cat_mapping.items():
181
+ feature_index = features_.index(feature_name)
182
+ cats = categories.copy()
183
+ cats.remove("nan")
184
+ x_enc = np.searchsorted(cats, X_[:, feature_index].astype(str))
185
+ x_enc = x_enc + 1.0
186
+ ind_nan = len(categories)
187
+ x_enc[x_enc == ind_nan] = np.nan
188
+ X_[:, feature_index] = x_enc
189
+
190
+ if not np.issubdtype(X_.dtype, "float64"):
191
+ X_ = X_.astype(dtype="float64", copy=False)
192
+ flat_data = X_.ravel(order="F")
193
+ rows, cols = X_.shape
194
+
195
+ return features_, flat_data, rows, cols
196
+
197
+
198
+ CONTRIBUTION_METHODS = {
199
+ "weight": "Weight",
200
+ "Weight": "Weight",
201
+ "average": "Average",
202
+ "Average": "Average",
203
+ "branch-difference": "BranchDifference",
204
+ "branchdifference": "BranchDifference",
205
+ "BranchDifference": "BranchDifference",
206
+ "midpoint-difference": "MidpointDifference",
207
+ "midpointdifference": "MidpointDifference",
208
+ "MidpointDifference": "MidpointDifference",
209
+ "mode-difference": "ModeDifference",
210
+ "modedifference": "ModeDifference",
211
+ "ModeDifference": "ModeDifference",
212
+ "ProbabilityChange": "ProbabilityChange",
213
+ "probabilitychange": "ProbabilityChange",
214
+ "probability-change": "ProbabilityChange",
215
+ }
@@ -0,0 +1,134 @@
1
+ Metadata-Version: 2.3
2
+ Name: perpetual
3
+ Version: 0.7.11
4
+ Classifier: Programming Language :: Rust
5
+ Classifier: Programming Language :: Python :: 3
6
+ Classifier: Programming Language :: Python :: 3.9
7
+ Classifier: Programming Language :: Python :: 3.10
8
+ Classifier: Programming Language :: Python :: 3.11
9
+ Classifier: Programming Language :: Python :: 3.12
10
+ Classifier: Programming Language :: Python :: 3.13
11
+ Requires-Dist: numpy
12
+ Requires-Dist: typing-extensions
13
+ Requires-Dist: pandas ; extra == 'dev'
14
+ Requires-Dist: polars ; extra == 'dev'
15
+ Requires-Dist: pyarrow ; extra == 'dev'
16
+ Requires-Dist: maturin ==1.6.0 ; extra == 'dev'
17
+ Requires-Dist: pytest ; extra == 'dev'
18
+ Requires-Dist: seaborn ; extra == 'dev'
19
+ Requires-Dist: scikit-learn ; extra == 'dev'
20
+ Requires-Dist: mkdocs-material ; extra == 'dev'
21
+ Requires-Dist: mkdocstrings[python] ; extra == 'dev'
22
+ Requires-Dist: mkdocs-autorefs ; extra == 'dev'
23
+ Requires-Dist: ruff ; extra == 'dev'
24
+ Provides-Extra: dev
25
+ License-File: LICENSE
26
+ License-File: LICENSE
27
+ Summary: A self-generalizing gradient boosting machine which doesn't need hyperparameter optimization
28
+ Keywords: rust,perpetual,machine learning,tree model,decision tree,gradient boosted decision tree,gradient boosting machine
29
+ Home-Page: https://perpetual-ml.com
30
+ Author: Mutlu Simsek
31
+ Author-email: Mutlu Simsek <msimsek@perpetual-ml.com>
32
+ Requires-Python: >=3.9
33
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
34
+ Project-URL: Source Code, https://github.com/perpetual-ml/perpetual
35
+
36
+ <p align="center">
37
+ <img height="120" src="https://github.com/perpetual-ml/perpetual/raw/main/resources/perp_logo.png">
38
+ </p>
39
+
40
+ <div align="center">
41
+
42
+ [![Python Versions](https://img.shields.io/pypi/pyversions/perpetual.svg?logo=python&logoColor=white)](https://pypi.org/project/perpetual)
43
+ [![PyPI Version](https://img.shields.io/pypi/v/perpetual.svg?logo=pypi&logoColor=white)](https://pypi.org/project/perpetual)
44
+ [![Crates.io Version](https://img.shields.io/crates/v/perpetual?logo=rust&logoColor=white)](https://crates.io/crates/perpetual)
45
+ [![Static Badge](https://img.shields.io/badge/join-discord-blue?logo=discord)](https://discord.gg/AyUK7rr6wy)
46
+ ![PyPI - Downloads](https://img.shields.io/pypi/dm/perpetual)
47
+
48
+ </div>
49
+
50
+ # Perpetual
51
+
52
+ PerpetualBooster is a gradient boosting machine (GBM) algorithm which doesn't need hyperparameter optimization unlike other GBM algorithms. Similar to AutoML libraries, it has a `budget` parameter. Increasing the `budget` parameter increases the predictive power of the algorithm and gives better results on unseen data. Start with a small budget (e.g. 1.0) and increase it (e.g. 2.0) once you are confident with your features. If you don't see any improvement with further increasing the `budget`, it means that you are already extracting the most predictive power out of your data.
53
+
54
+ ## Benchmark
55
+
56
+ Hyperparameter optimization usually takes 100 iterations with plain GBM algorithms. PerpetualBooster achieves the same accuracy in a single run. Thus, it achieves up to 100x speed-up at the same accuracy with different `budget` levels and with different datasets.
57
+
58
+ The following table summarizes the results for the [California Housing](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html) dataset (regression):
59
+
60
+ | Perpetual budget | LightGBM n_estimators | Perpetual mse | LightGBM mse | Speed-up wall time | Speed-up cpu time |
61
+ | ---------------- | --------------------- | ------------- | ------------ | ------------------ | ----------------- |
62
+ | 1.0 | 100 | 0.192 | 0.192 | 54x | 56x |
63
+ | 1.5 | 300 | 0.188 | 0.188 | 59x | 58x |
64
+ | 2.1 | 1000 | 0.185 | 0.186 | 42x | 41x |
65
+
66
+ The following table summarizes the results for the [Cover Types](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_covtype.html) dataset (classification):
67
+
68
+ | Perpetual budget | LightGBM n_estimators | Perpetual log loss | LightGBM log loss | Speed-up wall time | Speed-up cpu time |
69
+ | ---------------- | --------------------- | ------------------ | ----------------- | ------------------ | ----------------- |
70
+ | 0.9 | 100 | 0.091 | 0.084 | 72x | 78x |
71
+
72
+ The results can be reproduced using the scripts in the [examples](./python-package/examples) folder.
73
+
74
+ PerpetualBooster is a GBM but behaves like AutoML so it is benchmarked also against AutoGluon (v1.2, best quality preset), the current leader in [AutoML benchmark](https://automlbenchmark.streamlit.app/cd_diagram). Top 10 datasets with the most number of rows are selected from [OpenML datasets](https://www.openml.org/). The results are summarized in the following table for regression tasks:
75
+
76
+ | OpenML Task | Perpetual Training Duration | Perpetual Inference Duration | Perpetual RMSE | AutoGluon Training Duration | AutoGluon Inference Duration | AutoGluon RMSE |
77
+ | -------------------------------------------- | --------------------------- | ----------------------------------------------------------------- | -------------- | --------------------------- | ----------------------------------------------------------------- | -------------- |
78
+ | [Airlines_DepDelay_10M](https://www.openml.org/t/359929) | 518 | 11.3 | 29.0 | 520 | 30.9 <td style="background-color:green;color:white;"> 28.8 </td> |
79
+ | [bates_regr_100](https://www.openml.org/t/361940) | 3421 | 15.1 <td style="background-color:green;color:white;"> 1.084 </td> | OOM | OOM | OOM |
80
+ | [BNG(libras_move)](https://www.openml.org/t/7327) | 1956 | 4.2 <td style="background-color:green;color:white;"> 2.51 </td> | 1922 | 97.6 | 2.53 |
81
+ | [BNG(satellite_image)](https://www.openml.org/t/7326) | 334 | 1.6 | 0.731 | 337 | 10.0 <td style="background-color:green;color:white;"> 0.721 </td> |
82
+ | [COMET_MC](https://www.openml.org/t/14949) | 44 | 1.0 <td style="background-color:green;color:white;"> 0.0615 </td> | 47 | 5.0 | 0.0662 |
83
+ | [friedman1](https://www.openml.org/t/361939) | 275 | 4.2 <td style="background-color:green;color:white;"> 1.047 </td> | 278 | 5.1 | 1.487 |
84
+ | [poker](https://www.openml.org/t/10102) | 38 | 0.6 <td style="background-color:green;color:white;"> 0.256 </td> | 41 | 1.2 | 0.722 |
85
+ | [subset_higgs](https://www.openml.org/t/361955) | 868 | 10.6 <td style="background-color:green;color:white;"> 0.420 </td> | 870 | 24.5 | 0.421 |
86
+ | [BNG(autoHorse)](https://www.openml.org/t/7319) | 107 | 1.1 <td style="background-color:green;color:white;"> 19.0 </td> | 107 | 3.2 | 20.5 |
87
+ | [BNG(pbc)](https://www.openml.org/t/7318) | 48 | 0.6 <td style="background-color:green;color:white;"> 836.5 </td> | 51 | 0.2 | 957.1 |
88
+ | average | 465 | 3.9 | - | 464 | 19.7 | - |
89
+
90
+ PerpetualBooster outperformed AutoGluon on 8 out of 10 datasets, training equally fast and inferring 5x faster. The results can be reproduced using the automlbenchmark fork [here](https://github.com/deadsoul44/automlbenchmark).
91
+
92
+ ## Usage
93
+
94
+ You can use the algorithm like in the example below. Check examples folders for both Rust and Python.
95
+
96
+ ```python
97
+ from perpetual import PerpetualBooster
98
+
99
+ model = PerpetualBooster(objective="SquaredLoss")
100
+ model.fit(X, y, budget=1.0)
101
+ ```
102
+
103
+ ## Documentation
104
+
105
+ Documentation for the Python API can be found [here](https://perpetual-ml.github.io/perpetual) and for the Rust API [here](https://docs.rs/perpetual/latest/perpetual/).
106
+
107
+ ## Installation
108
+
109
+ The package can be installed directly from [pypi](https://pypi.org/project/perpetual):
110
+
111
+ ```shell
112
+ pip install perpetual
113
+ ```
114
+
115
+ Using [conda-forge](https://anaconda.org/conda-forge/perpetual):
116
+
117
+ ```shell
118
+ conda install conda-forge::perpetual
119
+ ```
120
+
121
+ To use in a Rust project and to get the package from [crates.io](https://crates.io/crates/perpetual):
122
+
123
+ ```shell
124
+ cargo add perpetual
125
+ ```
126
+
127
+ ## Contribution
128
+
129
+ Contributions are welcome. Check CONTRIBUTING.md for the guideline.
130
+
131
+ ## Paper
132
+
133
+ PerpetualBooster prevents overfitting with a generalization algorithm. The paper is work-in-progress to explain how the algorithm works. Check our [blog post](https://perpetual-ml.com/blog/how-perpetual-works) for a high level introduction to the algorithm.
134
+
@@ -0,0 +1,12 @@
1
+ perpetual-0.7.11.dist-info/METADATA,sha256=O8rVHCDWs2m0TM1TnOg5Y3CJoG0vmYr7fzbhS1in844,9916
2
+ perpetual-0.7.11.dist-info/WHEEL,sha256=ejgwLTbXJe53teqt8O_fka1tRp0QYpCWayNY4w7JtRY,104
3
+ perpetual-0.7.11.dist-info/license_files/LICENSE,sha256=ixuiBLtpoK3iv89l7ylKkg9rs2GzF9ukPH7ynZYzK5s,35148
4
+ perpetual-0.7.11.dist-info/license_files/LICENSE,sha256=ixuiBLtpoK3iv89l7ylKkg9rs2GzF9ukPH7ynZYzK5s,35148
5
+ perpetual/serialize.py,sha256=Tg2BbuA1jKQ5-ITuVhwtj6hgBaRAbZ66eHctR7fcVk4,1883
6
+ perpetual/__init__.py,sha256=zzFLkwRr8No7DPUNUQlBXdsdGfT7KEt4orHozdZhS7c,116
7
+ perpetual/types.py,sha256=vBwsX2XnSC8RqdmP2Mr6hDljgWhgGNS6OsuBZM1QdW0,3231
8
+ perpetual/utils.py,sha256=kj6yOF3wjbt40w7twOI7xqWfqu-y7nNJ4SorPT9xBbs,7229
9
+ perpetual/data.py,sha256=718adB9qIc0YvBiTQtMkbT6JPb5liR-4z1UD81IyYhY,590
10
+ perpetual/booster.py,sha256=A5vXeOzzhlHpDB6c2kSIGrmMipG4k8hdcJ0XdSIVGRI,45961
11
+ perpetual/perpetual.cpython-313-darwin.so,sha256=-HTJ3SrTYCMFjqS0XvGEXBz5veAGhcjTXRKI-DJUprI,1406416
12
+ perpetual-0.7.11.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.6.0)
3
+ Root-Is-Purelib: false
4
+ Tag: cp313-cp313-macosx_11_0_arm64