autogluon.tabular 1.4.1b20251014__py3-none-any.whl → 1.5.0b20251222__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. autogluon/tabular/configs/hyperparameter_configs.py +4 -0
  2. autogluon/tabular/configs/presets_configs.py +39 -2
  3. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2025.py +2 -44
  4. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_cpu_2025_12_18.py +2 -0
  5. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_gpu_2025_12_18.py +2 -0
  6. autogluon/tabular/learner/default_learner.py +1 -0
  7. autogluon/tabular/models/__init__.py +3 -1
  8. autogluon/tabular/models/abstract/__init__.py +0 -0
  9. autogluon/tabular/models/abstract/abstract_torch_model.py +148 -0
  10. autogluon/tabular/models/catboost/catboost_model.py +2 -5
  11. autogluon/tabular/models/ebm/ebm_model.py +2 -6
  12. autogluon/tabular/models/fastainn/tabular_nn_fastai.py +9 -3
  13. autogluon/tabular/models/lgb/lgb_model.py +60 -17
  14. autogluon/tabular/models/lgb/lgb_utils.py +2 -2
  15. autogluon/tabular/models/lr/lr_model.py +2 -4
  16. autogluon/tabular/models/lr/lr_preprocessing_utils.py +6 -7
  17. autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py +14 -1
  18. autogluon/tabular/models/mitra/mitra_model.py +55 -29
  19. autogluon/tabular/models/realmlp/realmlp_model.py +8 -5
  20. autogluon/tabular/models/rf/rf_model.py +6 -8
  21. autogluon/tabular/models/tabdpt/__init__.py +0 -0
  22. autogluon/tabular/models/tabdpt/tabdpt_model.py +253 -0
  23. autogluon/tabular/models/tabicl/tabicl_model.py +15 -5
  24. autogluon/tabular/models/tabm/tabm_model.py +25 -8
  25. autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +7 -5
  26. autogluon/tabular/models/tabpfnv2/tabpfnv2_5_model.py +451 -0
  27. autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +87 -12
  28. autogluon/tabular/models/tabprep/__init__.py +0 -0
  29. autogluon/tabular/models/tabprep/prep_lgb_model.py +21 -0
  30. autogluon/tabular/models/tabprep/prep_mixin.py +220 -0
  31. autogluon/tabular/models/tabular_nn/torch/tabular_nn_torch.py +3 -6
  32. autogluon/tabular/models/tabular_nn/utils/data_preprocessor.py +12 -4
  33. autogluon/tabular/models/xgboost/xgboost_model.py +3 -4
  34. autogluon/tabular/predictor/predictor.py +50 -20
  35. autogluon/tabular/registry/_ag_model_registry.py +8 -2
  36. autogluon/tabular/testing/fit_helper.py +61 -0
  37. autogluon/tabular/trainer/abstract_trainer.py +45 -9
  38. autogluon/tabular/trainer/auto_trainer.py +5 -0
  39. autogluon/tabular/version.py +1 -1
  40. autogluon.tabular-1.5.0b20251222-py3.11-nspkg.pth +1 -0
  41. {autogluon.tabular-1.4.1b20251014.dist-info → autogluon_tabular-1.5.0b20251222.dist-info}/METADATA +97 -87
  42. {autogluon.tabular-1.4.1b20251014.dist-info → autogluon_tabular-1.5.0b20251222.dist-info}/RECORD +48 -38
  43. {autogluon.tabular-1.4.1b20251014.dist-info → autogluon_tabular-1.5.0b20251222.dist-info}/WHEEL +1 -1
  44. autogluon.tabular-1.4.1b20251014-py3.9-nspkg.pth +0 -1
  45. {autogluon.tabular-1.4.1b20251014.dist-info → autogluon_tabular-1.5.0b20251222.dist-info/licenses}/LICENSE +0 -0
  46. {autogluon.tabular-1.4.1b20251014.dist-info → autogluon_tabular-1.5.0b20251222.dist-info/licenses}/NOTICE +0 -0
  47. {autogluon.tabular-1.4.1b20251014.dist-info → autogluon_tabular-1.5.0b20251222.dist-info}/namespace_packages.txt +0 -0
  48. {autogluon.tabular-1.4.1b20251014.dist-info → autogluon_tabular-1.5.0b20251222.dist-info}/top_level.txt +0 -0
  49. {autogluon.tabular-1.4.1b20251014.dist-info → autogluon_tabular-1.5.0b20251222.dist-info}/zip-safe +0 -0
@@ -46,6 +46,8 @@ class LGBModel(AbstractModel):
46
46
  ag_priority_by_problem_type = MappingProxyType({
47
47
  SOFTCLASS: 100
48
48
  })
49
+ seed_name = "seed"
50
+ seed_name_alt = ["seed_value", "random_seed", "random_state"]
49
51
 
50
52
  def __init__(self, **kwargs):
51
53
  super().__init__(**kwargs)
@@ -101,10 +103,46 @@ class LGBModel(AbstractModel):
101
103
  Scales linearly with the number of estimators, number of classes, and number of leaves.
102
104
  Memory usage peaks during model saving, with the peak consuming approximately 2-4x the size of the model in memory.
103
105
  """
106
+ data_mem_usage = get_approximate_df_mem_usage(X).sum()
107
+ return cls._estimate_memory_usage_common(
108
+ num_features=X.shape[1],
109
+ data_mem_usage=data_mem_usage,
110
+ hyperparameters=hyperparameters,
111
+ num_classes=num_classes,
112
+ )
113
+
114
+ @classmethod
115
+ def _estimate_memory_usage_static_lite(
116
+ cls,
117
+ num_samples: int,
118
+ num_features: int,
119
+ num_bytes_per_cell: float = 4,
120
+ hyperparameters: dict = None,
121
+ num_classes: int = 1,
122
+ **kwargs,
123
+ ) -> int:
124
+ data_mem_usage = num_samples * num_features * num_bytes_per_cell
125
+ return cls._estimate_memory_usage_common(
126
+ num_features=num_features,
127
+ data_mem_usage=data_mem_usage,
128
+ hyperparameters=hyperparameters,
129
+ num_classes=num_classes,
130
+ )
131
+
132
+ @classmethod
133
+ def _estimate_memory_usage_common(
134
+ cls,
135
+ num_features: int,
136
+ data_mem_usage: int | float,
137
+ hyperparameters: dict | None = None,
138
+ num_classes: int = 1,
139
+ ) -> int:
140
+ """
141
+ Utility method to avoid code duplication
142
+ """
104
143
  if hyperparameters is None:
105
144
  hyperparameters = {}
106
145
  num_classes = num_classes if num_classes else 1 # num_classes could be None after initialization if it's a regression problem
107
- data_mem_usage = get_approximate_df_mem_usage(X).sum()
108
146
  data_mem_usage_bytes = data_mem_usage * 5 + data_mem_usage / 4 * num_classes # TODO: Extremely crude approximation, can be vastly improved
109
147
 
110
148
  n_trees_per_estimator = num_classes if num_classes > 2 else 1
@@ -112,7 +150,7 @@ class LGBModel(AbstractModel):
112
150
  max_bins = hyperparameters.get("max_bins", 255)
113
151
  num_leaves = hyperparameters.get("num_leaves", 31)
114
152
  # Memory usage of histogram based on https://github.com/microsoft/LightGBM/issues/562#issuecomment-304524592
115
- histogram_mem_usage_bytes = 20 * max_bins * len(X.columns) * num_leaves
153
+ histogram_mem_usage_bytes = 20 * max_bins * num_features * num_leaves
116
154
  histogram_mem_usage_bytes_max = hyperparameters.get("histogram_pool_size", None)
117
155
  if histogram_mem_usage_bytes_max is not None:
118
156
  histogram_mem_usage_bytes_max *= 1e6 # Convert megabytes to bytes, `histogram_pool_size` is in MB.
@@ -122,18 +160,11 @@ class LGBModel(AbstractModel):
122
160
 
123
161
  mem_size_per_estimator = n_trees_per_estimator * num_leaves * 100 # very rough estimate
124
162
  n_estimators = hyperparameters.get("num_boost_round", DEFAULT_NUM_BOOST_ROUND)
125
- n_estimators_min = min(n_estimators, 1000)
126
- mem_size_estimators = n_estimators_min * mem_size_per_estimator # memory estimate after fitting up to 1000 estimators
163
+ n_estimators_min = min(n_estimators, 5000)
164
+ mem_size_estimators = n_estimators_min * mem_size_per_estimator # memory estimate after fitting up to 5000 estimators
127
165
 
128
166
  approx_mem_size_req = data_mem_usage_bytes + histogram_mem_usage_bytes + mem_size_estimators
129
- return approx_mem_size_req
130
-
131
- def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
132
- if "seed_value" in hyperparameters:
133
- return hyperparameters["seed_value"]
134
- if "seed" in hyperparameters:
135
- return hyperparameters["seed"]
136
- return "N/A"
167
+ return int(approx_mem_size_req)
137
168
 
138
169
  def _fit(self, X, y, X_val=None, y_val=None, time_limit=None, num_gpus=0, num_cpus=0, sample_weight=None, sample_weight_val=None, verbosity=2, **kwargs):
139
170
  try_import_lightgbm() # raise helpful error message if LightGBM isn't installed
@@ -292,8 +323,6 @@ class LGBModel(AbstractModel):
292
323
  elif self.problem_type == QUANTILE:
293
324
  train_params["params"]["quantile_levels"] = self.quantile_levels
294
325
 
295
- train_params["params"]["seed"] = self.random_seed
296
-
297
326
  # Train LightGBM model:
298
327
  # Note that self.model contains a <class 'lightgbm.basic.Booster'> not a LightBGMClassifier or LightGBMRegressor object
299
328
  from lightgbm.basic import LightGBMError
@@ -378,6 +407,9 @@ class LGBModel(AbstractModel):
378
407
  X = self.preprocess(X, **kwargs)
379
408
 
380
409
  y_pred_proba = self.model.predict(X, num_threads=num_cpus)
410
+ return self._post_process_predictions(y_pred_proba=y_pred_proba)
411
+
412
+ def _post_process_predictions(self, y_pred_proba) -> np.ndarray:
381
413
  if self.problem_type == QUANTILE:
382
414
  # y_pred_proba is a pd.DataFrame, need to convert
383
415
  y_pred_proba = y_pred_proba.to_numpy()
@@ -430,7 +462,7 @@ class LGBModel(AbstractModel):
430
462
  self,
431
463
  X: DataFrame,
432
464
  y: Series,
433
- params,
465
+ params: dict,
434
466
  X_val=None,
435
467
  y_val=None,
436
468
  X_test=None,
@@ -439,11 +471,14 @@ class LGBModel(AbstractModel):
439
471
  sample_weight_val=None,
440
472
  sample_weight_test=None,
441
473
  save=False,
474
+ init_train=None,
475
+ init_val=None,
476
+ init_test=None,
442
477
  ):
443
478
  lgb_dataset_params_keys = ["two_round"] # Keys that are specific to lightGBM Dataset object construction.
444
479
  data_params = {key: params[key] for key in lgb_dataset_params_keys if key in params}.copy()
445
480
 
446
- X = self.preprocess(X, is_train=True)
481
+ X = self.preprocess(X, y=y, is_train=True)
447
482
  if X_val is not None:
448
483
  X_val = self.preprocess(X_val)
449
484
  if X_test is not None:
@@ -465,7 +500,13 @@ class LGBModel(AbstractModel):
465
500
 
466
501
  # X, W_train = self.convert_to_weight(X=X)
467
502
  dataset_train = construct_dataset(
468
- x=X, y=y, location=os.path.join("self.path", "datasets", "train"), params=data_params, save=save, weight=sample_weight
503
+ x=X,
504
+ y=y,
505
+ location=os.path.join("self.path", "datasets", "train"),
506
+ params=data_params,
507
+ save=save,
508
+ weight=sample_weight,
509
+ init_score=init_train,
469
510
  )
470
511
  # dataset_train = construct_dataset_lowest_memory(X=X, y=y, location=self.path + 'datasets/train', params=data_params)
471
512
  if X_val is not None:
@@ -478,6 +519,7 @@ class LGBModel(AbstractModel):
478
519
  params=data_params,
479
520
  save=save,
480
521
  weight=sample_weight_val,
522
+ init_score=init_val,
481
523
  )
482
524
  # dataset_val = construct_dataset_lowest_memory(X=X_val, y=y_val, location=self.path + 'datasets/val', reference=dataset_train, params=data_params)
483
525
  else:
@@ -492,6 +534,7 @@ class LGBModel(AbstractModel):
492
534
  params=data_params,
493
535
  save=save,
494
536
  weight=sample_weight_test,
537
+ init_score=init_test,
495
538
  )
496
539
  else:
497
540
  dataset_test = None
@@ -104,11 +104,11 @@ def softclass_lgbobj(preds, train_data):
104
104
  return grad.flatten("F"), hess.flatten("F")
105
105
 
106
106
 
107
- def construct_dataset(x: DataFrame, y: Series, location=None, reference=None, params=None, save=False, weight=None):
107
+ def construct_dataset(x: DataFrame, y: Series, location=None, reference=None, params=None, save=False, weight=None, init_score=None):
108
108
  try_import_lightgbm()
109
109
  import lightgbm as lgb
110
110
 
111
- dataset = lgb.Dataset(data=x, label=y, reference=reference, free_raw_data=True, params=params, weight=weight)
111
+ dataset = lgb.Dataset(data=x, label=y, reference=reference, free_raw_data=True, params=params, weight=weight, init_score=init_score)
112
112
 
113
113
  if save:
114
114
  assert location is not None
@@ -43,6 +43,7 @@ class LinearModel(AbstractModel):
43
43
  ag_key = "LR"
44
44
  ag_name = "LinearModel"
45
45
  ag_priority = 30
46
+ seed_name = "random_state"
46
47
 
47
48
  def __init__(self, **kwargs):
48
49
  super().__init__(**kwargs)
@@ -162,9 +163,6 @@ class LinearModel(AbstractModel):
162
163
  for param, val in default_params.items():
163
164
  self._set_default_param_value(param, val)
164
165
 
165
- def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
166
- return hyperparameters.get("random_seed", "N/A")
167
-
168
166
  def _get_default_searchspace(self):
169
167
  return get_default_searchspace(self.problem_type)
170
168
 
@@ -218,7 +216,7 @@ class LinearModel(AbstractModel):
218
216
  total_iter = 0
219
217
  total_iter_used = 0
220
218
  total_max_iter = sum(max_iter_list)
221
- model = model_cls(max_iter=max_iter_list[0], random_state=self.random_seed, **params)
219
+ model = model_cls(max_iter=max_iter_list[0], **params)
222
220
  early_stop = False
223
221
  for i, cur_max_iter in enumerate(max_iter_list):
224
222
  if time_left is not None and (i > 0):
@@ -5,20 +5,19 @@ from autogluon.features.generators import OneHotEncoderFeatureGenerator
5
5
 
6
6
  class OheFeaturesGenerator(BaseEstimator, TransformerMixin):
7
7
  def __init__(self):
8
- self._feature_names = []
9
- self._encoder = None
8
+ pass
10
9
 
11
10
  def fit(self, X, y=None):
12
- self._encoder = OneHotEncoderFeatureGenerator(max_levels=10000, verbosity=0)
13
- self._encoder.fit(X)
14
- self._feature_names = self._encoder.features_out
11
+ self.encoder_ = OneHotEncoderFeatureGenerator(max_levels=10000, verbosity=0)
12
+ self.encoder_.fit(X)
13
+ self.feature_names_ = self.encoder_.features_out
15
14
  return self
16
15
 
17
16
  def transform(self, X, y=None):
18
- return self._encoder.transform_ohe(X)
17
+ return self.encoder_.transform_ohe(X)
19
18
 
20
19
  def get_feature_names(self):
21
- return self._feature_names
20
+ return self.feature_names_
22
21
 
23
22
 
24
23
  class NlpDataPreprocessor(BaseEstimator, TransformerMixin):
@@ -73,6 +73,20 @@ class TrainerFinetune(BaseEstimator):
73
73
 
74
74
  self.metric = self.cfg.hyperparams['metric']
75
75
 
76
+ def set_device(self, device: str):
77
+ self.device = device
78
+ self.model = self.model.to(device=device, non_blocking=True)
79
+
80
+ def post_fit_optimize(self):
81
+ # Minimize memory usage post-fit
82
+ self.checkpoint = None
83
+ self.optimizer = None
84
+ self.scaler = None
85
+ self.scheduler_warmup = None
86
+ self.scheduler_reduce_on_plateau = None
87
+ self.loss = None
88
+ self.early_stopping = None
89
+ self.metric = None
76
90
 
77
91
  def train(self, x_train: np.ndarray, y_train: np.ndarray, x_val: np.ndarray, y_val: np.ndarray):
78
92
 
@@ -184,7 +198,6 @@ class TrainerFinetune(BaseEstimator):
184
198
 
185
199
  self.checkpoint.set_to_best(self.model)
186
200
 
187
-
188
201
  def evaluate(self, x_support: np.ndarray, y_support: np.ndarray, x_query: np.ndarray, y_query: np.ndarray) -> PredictionMetrics:
189
202
 
190
203
  self.model.eval()
@@ -2,19 +2,21 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  import os
5
+ from pathlib import Path
5
6
  from typing import List, Optional
6
7
 
7
8
  import pandas as pd
9
+ from typing_extensions import Self
8
10
 
9
11
  from autogluon.common.utils.resource_utils import ResourceManager
10
- from autogluon.core.models import AbstractModel
12
+ from autogluon.tabular.models.abstract.abstract_torch_model import AbstractTorchModel
11
13
  from autogluon.features.generators import LabelEncoderFeatureGenerator
12
14
  from autogluon.tabular import __version__
13
15
 
14
16
  logger = logging.getLogger(__name__)
15
17
 
16
18
 
17
- class MitraModel(AbstractModel):
19
+ class MitraModel(AbstractTorchModel):
18
20
  """
19
21
  Mitra is a tabular foundation model pre-trained purely on synthetic data with the goal
20
22
  of optimizing fine-tuning performance over in-context learning performance.
@@ -32,6 +34,7 @@ class MitraModel(AbstractModel):
32
34
  ag_name = "Mitra"
33
35
  weights_file_name = "model.pt"
34
36
  ag_priority = 55
37
+ seed_name = "seed"
35
38
 
36
39
  def __init__(self, **kwargs):
37
40
  super().__init__(**kwargs)
@@ -77,9 +80,6 @@ class MitraModel(AbstractModel):
77
80
 
78
81
  return X
79
82
 
80
- def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
81
- return hyperparameters.get("seed", "N/A")
82
-
83
83
  def _fit(
84
84
  self,
85
85
  X: pd.DataFrame,
@@ -157,16 +157,13 @@ class MitraModel(AbstractModel):
157
157
  if "verbose" not in hyp:
158
158
  hyp["verbose"] = verbosity >= 3
159
159
 
160
- self.model = model_cls(
161
- seed=self.random_seed,
162
- **hyp,
163
- )
160
+ self.model = model_cls(**hyp)
164
161
 
165
162
  X = self.preprocess(X, is_train=True)
166
163
  if X_val is not None:
167
164
  X_val = self.preprocess(X_val)
168
165
 
169
- self.model = self.model.fit(
166
+ model = self.model.fit(
170
167
  X=X,
171
168
  y=y,
172
169
  X_val=X_val,
@@ -174,6 +171,11 @@ class MitraModel(AbstractModel):
174
171
  time_limit=time_limit,
175
172
  )
176
173
 
174
+ for i in range(len(model.trainers)):
175
+ model.trainers[i].post_fit_optimize()
176
+
177
+ self.model = model
178
+
177
179
  if need_to_reset_torch_threads:
178
180
  torch.set_num_threads(torch_threads_og)
179
181
 
@@ -195,42 +197,63 @@ class MitraModel(AbstractModel):
195
197
  )
196
198
  return default_auxiliary_params
197
199
 
198
- @property
199
- def weights_path(self) -> str:
200
- return os.path.join(self.path, self.weights_file_name)
200
+ def weights_path(self, path: str | None = None) -> str:
201
+ if path is None:
202
+ path = self.path
203
+ return str(Path(path) / self.weights_file_name)
201
204
 
202
205
  def save(self, path: str = None, verbose=True) -> str:
203
206
  _model_weights_list = None
204
207
  if self.model is not None:
208
+ self._save_model_artifact(path=path)
205
209
  _model_weights_list = []
206
210
  for i in range(len(self.model.trainers)):
207
211
  _model_weights_list.append(self.model.trainers[i].model)
208
- self.model.trainers[i].checkpoint = None
209
212
  self.model.trainers[i].model = None
210
- self.model.trainers[i].optimizer = None
211
- self.model.trainers[i].scheduler_warmup = None
212
- self.model.trainers[i].scheduler_reduce_on_plateau = None
213
- self._weights_saved = True
213
+
214
214
  path = super().save(path=path, verbose=verbose)
215
215
  if _model_weights_list is not None:
216
- import torch
217
-
218
- os.makedirs(self.path, exist_ok=True)
219
- torch.save(_model_weights_list, self.weights_path)
220
216
  for i in range(len(self.model.trainers)):
221
217
  self.model.trainers[i].model = _model_weights_list[i]
222
218
  return path
223
219
 
220
+ def _save_model_artifact(self, path: str | None):
221
+ if path is None:
222
+ path = self.path
223
+ import torch
224
+ device_og = self.device
225
+ self.set_device("cpu")
226
+
227
+ _model_weights_list = []
228
+ for i in range(len(self.model.trainers)):
229
+ _model_weights_list.append(self.model.trainers[i].model)
230
+
231
+ os.makedirs(path, exist_ok=True)
232
+ torch.save(_model_weights_list, self.weights_path(path=path))
233
+ self.set_device(device_og)
234
+ self._weights_saved = True
235
+
236
+ def _load_model_artifact(self):
237
+ import torch
238
+ device = self.suggest_device_infer()
239
+ model_weights_list = torch.load(self.weights_path(), weights_only=False) # nosec B614
240
+ for i in range(len(self.model.trainers)):
241
+ self.model.trainers[i].model = model_weights_list[i]
242
+ self.set_device(device)
243
+
244
+ def _set_device(self, device: str):
245
+ for i in range(len(self.model.trainers)):
246
+ self.model.trainers[i].set_device(device)
247
+
248
+ def get_device(self) -> str:
249
+ return self.model.trainers[0].device
250
+
224
251
  @classmethod
225
- def load(cls, path: str, reset_paths=False, verbose=True):
252
+ def load(cls, path: str, reset_paths=True, verbose=True) -> Self:
226
253
  model: MitraModel = super().load(path=path, reset_paths=reset_paths, verbose=verbose)
227
254
 
228
255
  if model._weights_saved:
229
- import torch
230
-
231
- model_weights_list = torch.load(model.weights_path, weights_only=False) # nosec B614
232
- for i in range(len(model.model.trainers)):
233
- model.model.trainers[i].model = model_weights_list[i]
256
+ model._load_model_artifact()
234
257
  model._weights_saved = False
235
258
  return model
236
259
 
@@ -375,9 +398,12 @@ class MitraModel(AbstractModel):
375
398
  return int(gpu_memory_mb * 1e6)
376
399
 
377
400
  @classmethod
378
- def _class_tags(cls) -> dict:
401
+ def _class_tags(cls):
379
402
  return {
380
403
  "can_estimate_memory_usage_static": True,
404
+ "can_set_device": True,
405
+ "set_device_on_save_to": None,
406
+ "set_device_on_load": False,
381
407
  }
382
408
 
383
409
  def _more_tags(self) -> dict:
@@ -16,7 +16,7 @@ from sklearn.impute import SimpleImputer
16
16
 
17
17
  from autogluon.common.utils.pandas_utils import get_approximate_df_mem_usage
18
18
  from autogluon.common.utils.resource_utils import ResourceManager
19
- from autogluon.core.models import AbstractModel
19
+ from autogluon.tabular.models.abstract.abstract_torch_model import AbstractTorchModel
20
20
  from autogluon.tabular import __version__
21
21
 
22
22
  logger = logging.getLogger(__name__)
@@ -34,7 +34,7 @@ def set_logger_level(logger_name: str, level: int):
34
34
 
35
35
 
36
36
  # pip install pytabkit
37
- class RealMLPModel(AbstractModel):
37
+ class RealMLPModel(AbstractTorchModel):
38
38
  """
39
39
  RealMLP is an improved multilayer perception (MLP) model
40
40
  through a bag of tricks and better default hyperparameters.
@@ -51,6 +51,7 @@ class RealMLPModel(AbstractModel):
51
51
  ag_key = "REALMLP"
52
52
  ag_name = "RealMLP"
53
53
  ag_priority = 75
54
+ seed_name = "random_state"
54
55
 
55
56
  def __init__(self, **kwargs):
56
57
  super().__init__(**kwargs)
@@ -82,8 +83,11 @@ class RealMLPModel(AbstractModel):
82
83
  model_cls = RealMLP_TD_S_Regressor
83
84
  return model_cls
84
85
 
85
- def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
86
- return hyperparameters.get("random_state", "N/A")
86
+ def get_device(self) -> str:
87
+ return self.model.device
88
+
89
+ def _set_device(self, device: str):
90
+ self.model.to(device)
87
91
 
88
92
  def _fit(
89
93
  self,
@@ -178,7 +182,6 @@ class RealMLPModel(AbstractModel):
178
182
  self.model = model_cls(
179
183
  n_threads=num_cpus,
180
184
  device=device,
181
- random_state=self.random_seed,
182
185
  **init_kwargs,
183
186
  **hyp,
184
187
  )
@@ -30,6 +30,7 @@ class RFModel(AbstractModel):
30
30
  ag_key = "RF"
31
31
  ag_name = "RandomForest"
32
32
  ag_priority = 80
33
+ seed_name = "random_state"
33
34
 
34
35
  def __init__(self, **kwargs):
35
36
  super().__init__(**kwargs)
@@ -107,9 +108,6 @@ class RFModel(AbstractModel):
107
108
  for param, val in default_params.items():
108
109
  self._set_default_param_value(param, val)
109
110
 
110
- def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
111
- return hyperparameters.get("random_state", "N/A")
112
-
113
111
  # TODO: Add in documentation that Categorical default is the first index
114
112
  # TODO: enable HPO for RF models
115
113
  def _get_default_searchspace(self):
@@ -153,13 +151,13 @@ class RFModel(AbstractModel):
153
151
  hyperparameters = {}
154
152
  n_estimators_final = hyperparameters.get("n_estimators", 300)
155
153
  if isinstance(n_estimators_final, int):
156
- n_estimators_minimum = min(40, n_estimators_final)
154
+ n_estimators = n_estimators_final
157
155
  else: # if search space
158
- n_estimators_minimum = 40
156
+ n_estimators = 40
159
157
  num_trees_per_estimator = cls._get_num_trees_per_estimator_static(problem_type=problem_type, num_classes=num_classes)
160
158
  bytes_per_estimator = num_trees_per_estimator * len(X) / 60000 * 1e6 # Underestimates by 3x on ExtraTrees
161
- expected_min_memory_usage = int(bytes_per_estimator * n_estimators_minimum)
162
- return expected_min_memory_usage
159
+ expected_memory_usage = int(bytes_per_estimator * n_estimators)
160
+ return expected_memory_usage
163
161
 
164
162
  def _validate_fit_memory_usage(self, mem_error_threshold: float = 0.5, mem_warning_threshold: float = 0.4, mem_size_threshold: int = 1e7, **kwargs):
165
163
  return super()._validate_fit_memory_usage(
@@ -208,7 +206,7 @@ class RFModel(AbstractModel):
208
206
  # FIXME: This is inefficient but sklearnex doesn't support computing oob_score after training
209
207
  params["oob_score"] = True
210
208
 
211
- model = model_cls(random_state=self.random_seed, **params)
209
+ model = model_cls(**params)
212
210
 
213
211
  time_train_start = time.time()
214
212
  for i, n_estimators in enumerate(n_estimator_increments):
File without changes