autogluon.tabular 1.4.1b20251014__py3-none-any.whl → 1.5.0b20251222__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/tabular/configs/hyperparameter_configs.py +4 -0
- autogluon/tabular/configs/presets_configs.py +39 -2
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2025.py +2 -44
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_cpu_2025_12_18.py +2 -0
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_gpu_2025_12_18.py +2 -0
- autogluon/tabular/learner/default_learner.py +1 -0
- autogluon/tabular/models/__init__.py +3 -1
- autogluon/tabular/models/abstract/__init__.py +0 -0
- autogluon/tabular/models/abstract/abstract_torch_model.py +148 -0
- autogluon/tabular/models/catboost/catboost_model.py +2 -5
- autogluon/tabular/models/ebm/ebm_model.py +2 -6
- autogluon/tabular/models/fastainn/tabular_nn_fastai.py +9 -3
- autogluon/tabular/models/lgb/lgb_model.py +60 -17
- autogluon/tabular/models/lgb/lgb_utils.py +2 -2
- autogluon/tabular/models/lr/lr_model.py +2 -4
- autogluon/tabular/models/lr/lr_preprocessing_utils.py +6 -7
- autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py +14 -1
- autogluon/tabular/models/mitra/mitra_model.py +55 -29
- autogluon/tabular/models/realmlp/realmlp_model.py +8 -5
- autogluon/tabular/models/rf/rf_model.py +6 -8
- autogluon/tabular/models/tabdpt/__init__.py +0 -0
- autogluon/tabular/models/tabdpt/tabdpt_model.py +253 -0
- autogluon/tabular/models/tabicl/tabicl_model.py +15 -5
- autogluon/tabular/models/tabm/tabm_model.py +25 -8
- autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +7 -5
- autogluon/tabular/models/tabpfnv2/tabpfnv2_5_model.py +451 -0
- autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +87 -12
- autogluon/tabular/models/tabprep/__init__.py +0 -0
- autogluon/tabular/models/tabprep/prep_lgb_model.py +21 -0
- autogluon/tabular/models/tabprep/prep_mixin.py +220 -0
- autogluon/tabular/models/tabular_nn/torch/tabular_nn_torch.py +3 -6
- autogluon/tabular/models/tabular_nn/utils/data_preprocessor.py +12 -4
- autogluon/tabular/models/xgboost/xgboost_model.py +3 -4
- autogluon/tabular/predictor/predictor.py +50 -20
- autogluon/tabular/registry/_ag_model_registry.py +8 -2
- autogluon/tabular/testing/fit_helper.py +61 -0
- autogluon/tabular/trainer/abstract_trainer.py +45 -9
- autogluon/tabular/trainer/auto_trainer.py +5 -0
- autogluon/tabular/version.py +1 -1
- autogluon.tabular-1.5.0b20251222-py3.11-nspkg.pth +1 -0
- {autogluon.tabular-1.4.1b20251014.dist-info → autogluon_tabular-1.5.0b20251222.dist-info}/METADATA +97 -87
- {autogluon.tabular-1.4.1b20251014.dist-info → autogluon_tabular-1.5.0b20251222.dist-info}/RECORD +48 -38
- {autogluon.tabular-1.4.1b20251014.dist-info → autogluon_tabular-1.5.0b20251222.dist-info}/WHEEL +1 -1
- autogluon.tabular-1.4.1b20251014-py3.9-nspkg.pth +0 -1
- {autogluon.tabular-1.4.1b20251014.dist-info → autogluon_tabular-1.5.0b20251222.dist-info/licenses}/LICENSE +0 -0
- {autogluon.tabular-1.4.1b20251014.dist-info → autogluon_tabular-1.5.0b20251222.dist-info/licenses}/NOTICE +0 -0
- {autogluon.tabular-1.4.1b20251014.dist-info → autogluon_tabular-1.5.0b20251222.dist-info}/namespace_packages.txt +0 -0
- {autogluon.tabular-1.4.1b20251014.dist-info → autogluon_tabular-1.5.0b20251222.dist-info}/top_level.txt +0 -0
- {autogluon.tabular-1.4.1b20251014.dist-info → autogluon_tabular-1.5.0b20251222.dist-info}/zip-safe +0 -0
|
@@ -46,6 +46,8 @@ class LGBModel(AbstractModel):
|
|
|
46
46
|
ag_priority_by_problem_type = MappingProxyType({
|
|
47
47
|
SOFTCLASS: 100
|
|
48
48
|
})
|
|
49
|
+
seed_name = "seed"
|
|
50
|
+
seed_name_alt = ["seed_value", "random_seed", "random_state"]
|
|
49
51
|
|
|
50
52
|
def __init__(self, **kwargs):
|
|
51
53
|
super().__init__(**kwargs)
|
|
@@ -101,10 +103,46 @@ class LGBModel(AbstractModel):
|
|
|
101
103
|
Scales linearly with the number of estimators, number of classes, and number of leaves.
|
|
102
104
|
Memory usage peaks during model saving, with the peak consuming approximately 2-4x the size of the model in memory.
|
|
103
105
|
"""
|
|
106
|
+
data_mem_usage = get_approximate_df_mem_usage(X).sum()
|
|
107
|
+
return cls._estimate_memory_usage_common(
|
|
108
|
+
num_features=X.shape[1],
|
|
109
|
+
data_mem_usage=data_mem_usage,
|
|
110
|
+
hyperparameters=hyperparameters,
|
|
111
|
+
num_classes=num_classes,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
@classmethod
|
|
115
|
+
def _estimate_memory_usage_static_lite(
|
|
116
|
+
cls,
|
|
117
|
+
num_samples: int,
|
|
118
|
+
num_features: int,
|
|
119
|
+
num_bytes_per_cell: float = 4,
|
|
120
|
+
hyperparameters: dict = None,
|
|
121
|
+
num_classes: int = 1,
|
|
122
|
+
**kwargs,
|
|
123
|
+
) -> int:
|
|
124
|
+
data_mem_usage = num_samples * num_features * num_bytes_per_cell
|
|
125
|
+
return cls._estimate_memory_usage_common(
|
|
126
|
+
num_features=num_features,
|
|
127
|
+
data_mem_usage=data_mem_usage,
|
|
128
|
+
hyperparameters=hyperparameters,
|
|
129
|
+
num_classes=num_classes,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
@classmethod
|
|
133
|
+
def _estimate_memory_usage_common(
|
|
134
|
+
cls,
|
|
135
|
+
num_features: int,
|
|
136
|
+
data_mem_usage: int | float,
|
|
137
|
+
hyperparameters: dict | None = None,
|
|
138
|
+
num_classes: int = 1,
|
|
139
|
+
) -> int:
|
|
140
|
+
"""
|
|
141
|
+
Utility method to avoid code duplication
|
|
142
|
+
"""
|
|
104
143
|
if hyperparameters is None:
|
|
105
144
|
hyperparameters = {}
|
|
106
145
|
num_classes = num_classes if num_classes else 1 # num_classes could be None after initialization if it's a regression problem
|
|
107
|
-
data_mem_usage = get_approximate_df_mem_usage(X).sum()
|
|
108
146
|
data_mem_usage_bytes = data_mem_usage * 5 + data_mem_usage / 4 * num_classes # TODO: Extremely crude approximation, can be vastly improved
|
|
109
147
|
|
|
110
148
|
n_trees_per_estimator = num_classes if num_classes > 2 else 1
|
|
@@ -112,7 +150,7 @@ class LGBModel(AbstractModel):
|
|
|
112
150
|
max_bins = hyperparameters.get("max_bins", 255)
|
|
113
151
|
num_leaves = hyperparameters.get("num_leaves", 31)
|
|
114
152
|
# Memory usage of histogram based on https://github.com/microsoft/LightGBM/issues/562#issuecomment-304524592
|
|
115
|
-
histogram_mem_usage_bytes = 20 * max_bins *
|
|
153
|
+
histogram_mem_usage_bytes = 20 * max_bins * num_features * num_leaves
|
|
116
154
|
histogram_mem_usage_bytes_max = hyperparameters.get("histogram_pool_size", None)
|
|
117
155
|
if histogram_mem_usage_bytes_max is not None:
|
|
118
156
|
histogram_mem_usage_bytes_max *= 1e6 # Convert megabytes to bytes, `histogram_pool_size` is in MB.
|
|
@@ -122,18 +160,11 @@ class LGBModel(AbstractModel):
|
|
|
122
160
|
|
|
123
161
|
mem_size_per_estimator = n_trees_per_estimator * num_leaves * 100 # very rough estimate
|
|
124
162
|
n_estimators = hyperparameters.get("num_boost_round", DEFAULT_NUM_BOOST_ROUND)
|
|
125
|
-
n_estimators_min = min(n_estimators,
|
|
126
|
-
mem_size_estimators = n_estimators_min * mem_size_per_estimator # memory estimate after fitting up to
|
|
163
|
+
n_estimators_min = min(n_estimators, 5000)
|
|
164
|
+
mem_size_estimators = n_estimators_min * mem_size_per_estimator # memory estimate after fitting up to 5000 estimators
|
|
127
165
|
|
|
128
166
|
approx_mem_size_req = data_mem_usage_bytes + histogram_mem_usage_bytes + mem_size_estimators
|
|
129
|
-
return approx_mem_size_req
|
|
130
|
-
|
|
131
|
-
def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
|
|
132
|
-
if "seed_value" in hyperparameters:
|
|
133
|
-
return hyperparameters["seed_value"]
|
|
134
|
-
if "seed" in hyperparameters:
|
|
135
|
-
return hyperparameters["seed"]
|
|
136
|
-
return "N/A"
|
|
167
|
+
return int(approx_mem_size_req)
|
|
137
168
|
|
|
138
169
|
def _fit(self, X, y, X_val=None, y_val=None, time_limit=None, num_gpus=0, num_cpus=0, sample_weight=None, sample_weight_val=None, verbosity=2, **kwargs):
|
|
139
170
|
try_import_lightgbm() # raise helpful error message if LightGBM isn't installed
|
|
@@ -292,8 +323,6 @@ class LGBModel(AbstractModel):
|
|
|
292
323
|
elif self.problem_type == QUANTILE:
|
|
293
324
|
train_params["params"]["quantile_levels"] = self.quantile_levels
|
|
294
325
|
|
|
295
|
-
train_params["params"]["seed"] = self.random_seed
|
|
296
|
-
|
|
297
326
|
# Train LightGBM model:
|
|
298
327
|
# Note that self.model contains a <class 'lightgbm.basic.Booster'> not a LightBGMClassifier or LightGBMRegressor object
|
|
299
328
|
from lightgbm.basic import LightGBMError
|
|
@@ -378,6 +407,9 @@ class LGBModel(AbstractModel):
|
|
|
378
407
|
X = self.preprocess(X, **kwargs)
|
|
379
408
|
|
|
380
409
|
y_pred_proba = self.model.predict(X, num_threads=num_cpus)
|
|
410
|
+
return self._post_process_predictions(y_pred_proba=y_pred_proba)
|
|
411
|
+
|
|
412
|
+
def _post_process_predictions(self, y_pred_proba) -> np.ndarray:
|
|
381
413
|
if self.problem_type == QUANTILE:
|
|
382
414
|
# y_pred_proba is a pd.DataFrame, need to convert
|
|
383
415
|
y_pred_proba = y_pred_proba.to_numpy()
|
|
@@ -430,7 +462,7 @@ class LGBModel(AbstractModel):
|
|
|
430
462
|
self,
|
|
431
463
|
X: DataFrame,
|
|
432
464
|
y: Series,
|
|
433
|
-
params,
|
|
465
|
+
params: dict,
|
|
434
466
|
X_val=None,
|
|
435
467
|
y_val=None,
|
|
436
468
|
X_test=None,
|
|
@@ -439,11 +471,14 @@ class LGBModel(AbstractModel):
|
|
|
439
471
|
sample_weight_val=None,
|
|
440
472
|
sample_weight_test=None,
|
|
441
473
|
save=False,
|
|
474
|
+
init_train=None,
|
|
475
|
+
init_val=None,
|
|
476
|
+
init_test=None,
|
|
442
477
|
):
|
|
443
478
|
lgb_dataset_params_keys = ["two_round"] # Keys that are specific to lightGBM Dataset object construction.
|
|
444
479
|
data_params = {key: params[key] for key in lgb_dataset_params_keys if key in params}.copy()
|
|
445
480
|
|
|
446
|
-
X = self.preprocess(X, is_train=True)
|
|
481
|
+
X = self.preprocess(X, y=y, is_train=True)
|
|
447
482
|
if X_val is not None:
|
|
448
483
|
X_val = self.preprocess(X_val)
|
|
449
484
|
if X_test is not None:
|
|
@@ -465,7 +500,13 @@ class LGBModel(AbstractModel):
|
|
|
465
500
|
|
|
466
501
|
# X, W_train = self.convert_to_weight(X=X)
|
|
467
502
|
dataset_train = construct_dataset(
|
|
468
|
-
x=X,
|
|
503
|
+
x=X,
|
|
504
|
+
y=y,
|
|
505
|
+
location=os.path.join("self.path", "datasets", "train"),
|
|
506
|
+
params=data_params,
|
|
507
|
+
save=save,
|
|
508
|
+
weight=sample_weight,
|
|
509
|
+
init_score=init_train,
|
|
469
510
|
)
|
|
470
511
|
# dataset_train = construct_dataset_lowest_memory(X=X, y=y, location=self.path + 'datasets/train', params=data_params)
|
|
471
512
|
if X_val is not None:
|
|
@@ -478,6 +519,7 @@ class LGBModel(AbstractModel):
|
|
|
478
519
|
params=data_params,
|
|
479
520
|
save=save,
|
|
480
521
|
weight=sample_weight_val,
|
|
522
|
+
init_score=init_val,
|
|
481
523
|
)
|
|
482
524
|
# dataset_val = construct_dataset_lowest_memory(X=X_val, y=y_val, location=self.path + 'datasets/val', reference=dataset_train, params=data_params)
|
|
483
525
|
else:
|
|
@@ -492,6 +534,7 @@ class LGBModel(AbstractModel):
|
|
|
492
534
|
params=data_params,
|
|
493
535
|
save=save,
|
|
494
536
|
weight=sample_weight_test,
|
|
537
|
+
init_score=init_test,
|
|
495
538
|
)
|
|
496
539
|
else:
|
|
497
540
|
dataset_test = None
|
|
@@ -104,11 +104,11 @@ def softclass_lgbobj(preds, train_data):
|
|
|
104
104
|
return grad.flatten("F"), hess.flatten("F")
|
|
105
105
|
|
|
106
106
|
|
|
107
|
-
def construct_dataset(x: DataFrame, y: Series, location=None, reference=None, params=None, save=False, weight=None):
|
|
107
|
+
def construct_dataset(x: DataFrame, y: Series, location=None, reference=None, params=None, save=False, weight=None, init_score=None):
|
|
108
108
|
try_import_lightgbm()
|
|
109
109
|
import lightgbm as lgb
|
|
110
110
|
|
|
111
|
-
dataset = lgb.Dataset(data=x, label=y, reference=reference, free_raw_data=True, params=params, weight=weight)
|
|
111
|
+
dataset = lgb.Dataset(data=x, label=y, reference=reference, free_raw_data=True, params=params, weight=weight, init_score=init_score)
|
|
112
112
|
|
|
113
113
|
if save:
|
|
114
114
|
assert location is not None
|
|
@@ -43,6 +43,7 @@ class LinearModel(AbstractModel):
|
|
|
43
43
|
ag_key = "LR"
|
|
44
44
|
ag_name = "LinearModel"
|
|
45
45
|
ag_priority = 30
|
|
46
|
+
seed_name = "random_state"
|
|
46
47
|
|
|
47
48
|
def __init__(self, **kwargs):
|
|
48
49
|
super().__init__(**kwargs)
|
|
@@ -162,9 +163,6 @@ class LinearModel(AbstractModel):
|
|
|
162
163
|
for param, val in default_params.items():
|
|
163
164
|
self._set_default_param_value(param, val)
|
|
164
165
|
|
|
165
|
-
def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
|
|
166
|
-
return hyperparameters.get("random_seed", "N/A")
|
|
167
|
-
|
|
168
166
|
def _get_default_searchspace(self):
|
|
169
167
|
return get_default_searchspace(self.problem_type)
|
|
170
168
|
|
|
@@ -218,7 +216,7 @@ class LinearModel(AbstractModel):
|
|
|
218
216
|
total_iter = 0
|
|
219
217
|
total_iter_used = 0
|
|
220
218
|
total_max_iter = sum(max_iter_list)
|
|
221
|
-
model = model_cls(max_iter=max_iter_list[0],
|
|
219
|
+
model = model_cls(max_iter=max_iter_list[0], **params)
|
|
222
220
|
early_stop = False
|
|
223
221
|
for i, cur_max_iter in enumerate(max_iter_list):
|
|
224
222
|
if time_left is not None and (i > 0):
|
|
@@ -5,20 +5,19 @@ from autogluon.features.generators import OneHotEncoderFeatureGenerator
|
|
|
5
5
|
|
|
6
6
|
class OheFeaturesGenerator(BaseEstimator, TransformerMixin):
|
|
7
7
|
def __init__(self):
|
|
8
|
-
|
|
9
|
-
self._encoder = None
|
|
8
|
+
pass
|
|
10
9
|
|
|
11
10
|
def fit(self, X, y=None):
|
|
12
|
-
self.
|
|
13
|
-
self.
|
|
14
|
-
self.
|
|
11
|
+
self.encoder_ = OneHotEncoderFeatureGenerator(max_levels=10000, verbosity=0)
|
|
12
|
+
self.encoder_.fit(X)
|
|
13
|
+
self.feature_names_ = self.encoder_.features_out
|
|
15
14
|
return self
|
|
16
15
|
|
|
17
16
|
def transform(self, X, y=None):
|
|
18
|
-
return self.
|
|
17
|
+
return self.encoder_.transform_ohe(X)
|
|
19
18
|
|
|
20
19
|
def get_feature_names(self):
|
|
21
|
-
return self.
|
|
20
|
+
return self.feature_names_
|
|
22
21
|
|
|
23
22
|
|
|
24
23
|
class NlpDataPreprocessor(BaseEstimator, TransformerMixin):
|
|
@@ -73,6 +73,20 @@ class TrainerFinetune(BaseEstimator):
|
|
|
73
73
|
|
|
74
74
|
self.metric = self.cfg.hyperparams['metric']
|
|
75
75
|
|
|
76
|
+
def set_device(self, device: str):
|
|
77
|
+
self.device = device
|
|
78
|
+
self.model = self.model.to(device=device, non_blocking=True)
|
|
79
|
+
|
|
80
|
+
def post_fit_optimize(self):
|
|
81
|
+
# Minimize memory usage post-fit
|
|
82
|
+
self.checkpoint = None
|
|
83
|
+
self.optimizer = None
|
|
84
|
+
self.scaler = None
|
|
85
|
+
self.scheduler_warmup = None
|
|
86
|
+
self.scheduler_reduce_on_plateau = None
|
|
87
|
+
self.loss = None
|
|
88
|
+
self.early_stopping = None
|
|
89
|
+
self.metric = None
|
|
76
90
|
|
|
77
91
|
def train(self, x_train: np.ndarray, y_train: np.ndarray, x_val: np.ndarray, y_val: np.ndarray):
|
|
78
92
|
|
|
@@ -184,7 +198,6 @@ class TrainerFinetune(BaseEstimator):
|
|
|
184
198
|
|
|
185
199
|
self.checkpoint.set_to_best(self.model)
|
|
186
200
|
|
|
187
|
-
|
|
188
201
|
def evaluate(self, x_support: np.ndarray, y_support: np.ndarray, x_query: np.ndarray, y_query: np.ndarray) -> PredictionMetrics:
|
|
189
202
|
|
|
190
203
|
self.model.eval()
|
|
@@ -2,19 +2,21 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
+
from pathlib import Path
|
|
5
6
|
from typing import List, Optional
|
|
6
7
|
|
|
7
8
|
import pandas as pd
|
|
9
|
+
from typing_extensions import Self
|
|
8
10
|
|
|
9
11
|
from autogluon.common.utils.resource_utils import ResourceManager
|
|
10
|
-
from autogluon.
|
|
12
|
+
from autogluon.tabular.models.abstract.abstract_torch_model import AbstractTorchModel
|
|
11
13
|
from autogluon.features.generators import LabelEncoderFeatureGenerator
|
|
12
14
|
from autogluon.tabular import __version__
|
|
13
15
|
|
|
14
16
|
logger = logging.getLogger(__name__)
|
|
15
17
|
|
|
16
18
|
|
|
17
|
-
class MitraModel(
|
|
19
|
+
class MitraModel(AbstractTorchModel):
|
|
18
20
|
"""
|
|
19
21
|
Mitra is a tabular foundation model pre-trained purely on synthetic data with the goal
|
|
20
22
|
of optimizing fine-tuning performance over in-context learning performance.
|
|
@@ -32,6 +34,7 @@ class MitraModel(AbstractModel):
|
|
|
32
34
|
ag_name = "Mitra"
|
|
33
35
|
weights_file_name = "model.pt"
|
|
34
36
|
ag_priority = 55
|
|
37
|
+
seed_name = "seed"
|
|
35
38
|
|
|
36
39
|
def __init__(self, **kwargs):
|
|
37
40
|
super().__init__(**kwargs)
|
|
@@ -77,9 +80,6 @@ class MitraModel(AbstractModel):
|
|
|
77
80
|
|
|
78
81
|
return X
|
|
79
82
|
|
|
80
|
-
def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
|
|
81
|
-
return hyperparameters.get("seed", "N/A")
|
|
82
|
-
|
|
83
83
|
def _fit(
|
|
84
84
|
self,
|
|
85
85
|
X: pd.DataFrame,
|
|
@@ -157,16 +157,13 @@ class MitraModel(AbstractModel):
|
|
|
157
157
|
if "verbose" not in hyp:
|
|
158
158
|
hyp["verbose"] = verbosity >= 3
|
|
159
159
|
|
|
160
|
-
self.model = model_cls(
|
|
161
|
-
seed=self.random_seed,
|
|
162
|
-
**hyp,
|
|
163
|
-
)
|
|
160
|
+
self.model = model_cls(**hyp)
|
|
164
161
|
|
|
165
162
|
X = self.preprocess(X, is_train=True)
|
|
166
163
|
if X_val is not None:
|
|
167
164
|
X_val = self.preprocess(X_val)
|
|
168
165
|
|
|
169
|
-
|
|
166
|
+
model = self.model.fit(
|
|
170
167
|
X=X,
|
|
171
168
|
y=y,
|
|
172
169
|
X_val=X_val,
|
|
@@ -174,6 +171,11 @@ class MitraModel(AbstractModel):
|
|
|
174
171
|
time_limit=time_limit,
|
|
175
172
|
)
|
|
176
173
|
|
|
174
|
+
for i in range(len(model.trainers)):
|
|
175
|
+
model.trainers[i].post_fit_optimize()
|
|
176
|
+
|
|
177
|
+
self.model = model
|
|
178
|
+
|
|
177
179
|
if need_to_reset_torch_threads:
|
|
178
180
|
torch.set_num_threads(torch_threads_og)
|
|
179
181
|
|
|
@@ -195,42 +197,63 @@ class MitraModel(AbstractModel):
|
|
|
195
197
|
)
|
|
196
198
|
return default_auxiliary_params
|
|
197
199
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
200
|
+
def weights_path(self, path: str | None = None) -> str:
|
|
201
|
+
if path is None:
|
|
202
|
+
path = self.path
|
|
203
|
+
return str(Path(path) / self.weights_file_name)
|
|
201
204
|
|
|
202
205
|
def save(self, path: str = None, verbose=True) -> str:
|
|
203
206
|
_model_weights_list = None
|
|
204
207
|
if self.model is not None:
|
|
208
|
+
self._save_model_artifact(path=path)
|
|
205
209
|
_model_weights_list = []
|
|
206
210
|
for i in range(len(self.model.trainers)):
|
|
207
211
|
_model_weights_list.append(self.model.trainers[i].model)
|
|
208
|
-
self.model.trainers[i].checkpoint = None
|
|
209
212
|
self.model.trainers[i].model = None
|
|
210
|
-
|
|
211
|
-
self.model.trainers[i].scheduler_warmup = None
|
|
212
|
-
self.model.trainers[i].scheduler_reduce_on_plateau = None
|
|
213
|
-
self._weights_saved = True
|
|
213
|
+
|
|
214
214
|
path = super().save(path=path, verbose=verbose)
|
|
215
215
|
if _model_weights_list is not None:
|
|
216
|
-
import torch
|
|
217
|
-
|
|
218
|
-
os.makedirs(self.path, exist_ok=True)
|
|
219
|
-
torch.save(_model_weights_list, self.weights_path)
|
|
220
216
|
for i in range(len(self.model.trainers)):
|
|
221
217
|
self.model.trainers[i].model = _model_weights_list[i]
|
|
222
218
|
return path
|
|
223
219
|
|
|
220
|
+
def _save_model_artifact(self, path: str | None):
|
|
221
|
+
if path is None:
|
|
222
|
+
path = self.path
|
|
223
|
+
import torch
|
|
224
|
+
device_og = self.device
|
|
225
|
+
self.set_device("cpu")
|
|
226
|
+
|
|
227
|
+
_model_weights_list = []
|
|
228
|
+
for i in range(len(self.model.trainers)):
|
|
229
|
+
_model_weights_list.append(self.model.trainers[i].model)
|
|
230
|
+
|
|
231
|
+
os.makedirs(path, exist_ok=True)
|
|
232
|
+
torch.save(_model_weights_list, self.weights_path(path=path))
|
|
233
|
+
self.set_device(device_og)
|
|
234
|
+
self._weights_saved = True
|
|
235
|
+
|
|
236
|
+
def _load_model_artifact(self):
|
|
237
|
+
import torch
|
|
238
|
+
device = self.suggest_device_infer()
|
|
239
|
+
model_weights_list = torch.load(self.weights_path(), weights_only=False) # nosec B614
|
|
240
|
+
for i in range(len(self.model.trainers)):
|
|
241
|
+
self.model.trainers[i].model = model_weights_list[i]
|
|
242
|
+
self.set_device(device)
|
|
243
|
+
|
|
244
|
+
def _set_device(self, device: str):
|
|
245
|
+
for i in range(len(self.model.trainers)):
|
|
246
|
+
self.model.trainers[i].set_device(device)
|
|
247
|
+
|
|
248
|
+
def get_device(self) -> str:
|
|
249
|
+
return self.model.trainers[0].device
|
|
250
|
+
|
|
224
251
|
@classmethod
|
|
225
|
-
def load(cls, path: str, reset_paths=
|
|
252
|
+
def load(cls, path: str, reset_paths=True, verbose=True) -> Self:
|
|
226
253
|
model: MitraModel = super().load(path=path, reset_paths=reset_paths, verbose=verbose)
|
|
227
254
|
|
|
228
255
|
if model._weights_saved:
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
model_weights_list = torch.load(model.weights_path, weights_only=False) # nosec B614
|
|
232
|
-
for i in range(len(model.model.trainers)):
|
|
233
|
-
model.model.trainers[i].model = model_weights_list[i]
|
|
256
|
+
model._load_model_artifact()
|
|
234
257
|
model._weights_saved = False
|
|
235
258
|
return model
|
|
236
259
|
|
|
@@ -375,9 +398,12 @@ class MitraModel(AbstractModel):
|
|
|
375
398
|
return int(gpu_memory_mb * 1e6)
|
|
376
399
|
|
|
377
400
|
@classmethod
|
|
378
|
-
def _class_tags(cls)
|
|
401
|
+
def _class_tags(cls):
|
|
379
402
|
return {
|
|
380
403
|
"can_estimate_memory_usage_static": True,
|
|
404
|
+
"can_set_device": True,
|
|
405
|
+
"set_device_on_save_to": None,
|
|
406
|
+
"set_device_on_load": False,
|
|
381
407
|
}
|
|
382
408
|
|
|
383
409
|
def _more_tags(self) -> dict:
|
|
@@ -16,7 +16,7 @@ from sklearn.impute import SimpleImputer
|
|
|
16
16
|
|
|
17
17
|
from autogluon.common.utils.pandas_utils import get_approximate_df_mem_usage
|
|
18
18
|
from autogluon.common.utils.resource_utils import ResourceManager
|
|
19
|
-
from autogluon.
|
|
19
|
+
from autogluon.tabular.models.abstract.abstract_torch_model import AbstractTorchModel
|
|
20
20
|
from autogluon.tabular import __version__
|
|
21
21
|
|
|
22
22
|
logger = logging.getLogger(__name__)
|
|
@@ -34,7 +34,7 @@ def set_logger_level(logger_name: str, level: int):
|
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
# pip install pytabkit
|
|
37
|
-
class RealMLPModel(
|
|
37
|
+
class RealMLPModel(AbstractTorchModel):
|
|
38
38
|
"""
|
|
39
39
|
RealMLP is an improved multilayer perception (MLP) model
|
|
40
40
|
through a bag of tricks and better default hyperparameters.
|
|
@@ -51,6 +51,7 @@ class RealMLPModel(AbstractModel):
|
|
|
51
51
|
ag_key = "REALMLP"
|
|
52
52
|
ag_name = "RealMLP"
|
|
53
53
|
ag_priority = 75
|
|
54
|
+
seed_name = "random_state"
|
|
54
55
|
|
|
55
56
|
def __init__(self, **kwargs):
|
|
56
57
|
super().__init__(**kwargs)
|
|
@@ -82,8 +83,11 @@ class RealMLPModel(AbstractModel):
|
|
|
82
83
|
model_cls = RealMLP_TD_S_Regressor
|
|
83
84
|
return model_cls
|
|
84
85
|
|
|
85
|
-
def
|
|
86
|
-
return
|
|
86
|
+
def get_device(self) -> str:
|
|
87
|
+
return self.model.device
|
|
88
|
+
|
|
89
|
+
def _set_device(self, device: str):
|
|
90
|
+
self.model.to(device)
|
|
87
91
|
|
|
88
92
|
def _fit(
|
|
89
93
|
self,
|
|
@@ -178,7 +182,6 @@ class RealMLPModel(AbstractModel):
|
|
|
178
182
|
self.model = model_cls(
|
|
179
183
|
n_threads=num_cpus,
|
|
180
184
|
device=device,
|
|
181
|
-
random_state=self.random_seed,
|
|
182
185
|
**init_kwargs,
|
|
183
186
|
**hyp,
|
|
184
187
|
)
|
|
@@ -30,6 +30,7 @@ class RFModel(AbstractModel):
|
|
|
30
30
|
ag_key = "RF"
|
|
31
31
|
ag_name = "RandomForest"
|
|
32
32
|
ag_priority = 80
|
|
33
|
+
seed_name = "random_state"
|
|
33
34
|
|
|
34
35
|
def __init__(self, **kwargs):
|
|
35
36
|
super().__init__(**kwargs)
|
|
@@ -107,9 +108,6 @@ class RFModel(AbstractModel):
|
|
|
107
108
|
for param, val in default_params.items():
|
|
108
109
|
self._set_default_param_value(param, val)
|
|
109
110
|
|
|
110
|
-
def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
|
|
111
|
-
return hyperparameters.get("random_state", "N/A")
|
|
112
|
-
|
|
113
111
|
# TODO: Add in documentation that Categorical default is the first index
|
|
114
112
|
# TODO: enable HPO for RF models
|
|
115
113
|
def _get_default_searchspace(self):
|
|
@@ -153,13 +151,13 @@ class RFModel(AbstractModel):
|
|
|
153
151
|
hyperparameters = {}
|
|
154
152
|
n_estimators_final = hyperparameters.get("n_estimators", 300)
|
|
155
153
|
if isinstance(n_estimators_final, int):
|
|
156
|
-
|
|
154
|
+
n_estimators = n_estimators_final
|
|
157
155
|
else: # if search space
|
|
158
|
-
|
|
156
|
+
n_estimators = 40
|
|
159
157
|
num_trees_per_estimator = cls._get_num_trees_per_estimator_static(problem_type=problem_type, num_classes=num_classes)
|
|
160
158
|
bytes_per_estimator = num_trees_per_estimator * len(X) / 60000 * 1e6 # Underestimates by 3x on ExtraTrees
|
|
161
|
-
|
|
162
|
-
return
|
|
159
|
+
expected_memory_usage = int(bytes_per_estimator * n_estimators)
|
|
160
|
+
return expected_memory_usage
|
|
163
161
|
|
|
164
162
|
def _validate_fit_memory_usage(self, mem_error_threshold: float = 0.5, mem_warning_threshold: float = 0.4, mem_size_threshold: int = 1e7, **kwargs):
|
|
165
163
|
return super()._validate_fit_memory_usage(
|
|
@@ -208,7 +206,7 @@ class RFModel(AbstractModel):
|
|
|
208
206
|
# FIXME: This is inefficient but sklearnex doesn't support computing oob_score after training
|
|
209
207
|
params["oob_score"] = True
|
|
210
208
|
|
|
211
|
-
model = model_cls(
|
|
209
|
+
model = model_cls(**params)
|
|
212
210
|
|
|
213
211
|
time_train_start = time.time()
|
|
214
212
|
for i, n_estimators in enumerate(n_estimator_increments):
|
|
File without changes
|