autogluon.tabular 1.4.1b20250916__py3-none-any.whl → 1.4.1b20251212__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.tabular might be problematic. Click here for more details.

Files changed (29) hide show
  1. autogluon/tabular/models/catboost/catboost_model.py +3 -4
  2. autogluon/tabular/models/ebm/ebm_model.py +2 -6
  3. autogluon/tabular/models/fastainn/tabular_nn_fastai.py +4 -2
  4. autogluon/tabular/models/knn/knn_model.py +6 -2
  5. autogluon/tabular/models/lgb/lgb_model.py +56 -24
  6. autogluon/tabular/models/lr/lr_model.py +6 -4
  7. autogluon/tabular/models/lr/lr_preprocessing_utils.py +6 -7
  8. autogluon/tabular/models/mitra/mitra_model.py +2 -7
  9. autogluon/tabular/models/realmlp/realmlp_model.py +1 -4
  10. autogluon/tabular/models/rf/rf_model.py +10 -8
  11. autogluon/tabular/models/tabicl/tabicl_model.py +1 -4
  12. autogluon/tabular/models/tabm/tabm_model.py +76 -3
  13. autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +7 -5
  14. autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +1 -4
  15. autogluon/tabular/models/tabular_nn/torch/tabular_nn_torch.py +2 -5
  16. autogluon/tabular/models/xgboost/xgboost_model.py +8 -5
  17. autogluon/tabular/predictor/predictor.py +3 -2
  18. autogluon/tabular/testing/fit_helper.py +28 -0
  19. autogluon/tabular/version.py +1 -1
  20. autogluon.tabular-1.4.1b20251212-py3.11-nspkg.pth +1 -0
  21. {autogluon.tabular-1.4.1b20250916.dist-info → autogluon_tabular-1.4.1b20251212.dist-info}/METADATA +89 -80
  22. {autogluon.tabular-1.4.1b20250916.dist-info → autogluon_tabular-1.4.1b20251212.dist-info}/RECORD +28 -28
  23. {autogluon.tabular-1.4.1b20250916.dist-info → autogluon_tabular-1.4.1b20251212.dist-info}/WHEEL +1 -1
  24. autogluon.tabular-1.4.1b20250916-py3.9-nspkg.pth +0 -1
  25. {autogluon.tabular-1.4.1b20250916.dist-info → autogluon_tabular-1.4.1b20251212.dist-info/licenses}/LICENSE +0 -0
  26. {autogluon.tabular-1.4.1b20250916.dist-info → autogluon_tabular-1.4.1b20251212.dist-info/licenses}/NOTICE +0 -0
  27. {autogluon.tabular-1.4.1b20250916.dist-info → autogluon_tabular-1.4.1b20251212.dist-info}/namespace_packages.txt +0 -0
  28. {autogluon.tabular-1.4.1b20250916.dist-info → autogluon_tabular-1.4.1b20251212.dist-info}/top_level.txt +0 -0
  29. {autogluon.tabular-1.4.1b20250916.dist-info → autogluon_tabular-1.4.1b20251212.dist-info}/zip-safe +0 -0
@@ -39,6 +39,7 @@ class CatBoostModel(AbstractModel):
39
39
  ag_priority_by_problem_type = MappingProxyType({
40
40
  SOFTCLASS: 60
41
41
  })
42
+ seed_name = "random_seed"
42
43
 
43
44
  def __init__(self, **kwargs):
44
45
  super().__init__(**kwargs)
@@ -116,9 +117,6 @@ class CatBoostModel(AbstractModel):
116
117
  approx_mem_size_req = data_mem_usage_bytes + histogram_mem_usage_bytes + baseline_memory_bytes
117
118
  return approx_mem_size_req
118
119
 
119
- def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
120
- return hyperparameters.get("random_seed", "N/A")
121
-
122
120
  # TODO: Use Pool in preprocess, optimize bagging to do Pool.split() to avoid re-computing pool for each fold! Requires stateful + y
123
121
  # Pool is much more memory efficient, avoids copying data twice in memory
124
122
  def _fit(self, X, y, X_val=None, y_val=None, time_limit=None, num_gpus=0, num_cpus=-1, sample_weight=None, sample_weight_val=None, **kwargs):
@@ -128,7 +126,6 @@ class CatBoostModel(AbstractModel):
128
126
 
129
127
  ag_params = self._get_ag_params()
130
128
  params = self._get_model_params()
131
- params["random_seed"] = self.random_seed
132
129
 
133
130
  params["thread_count"] = num_cpus
134
131
  if self.problem_type == SOFTCLASS:
@@ -314,6 +311,8 @@ class CatBoostModel(AbstractModel):
314
311
  max_memory_iters = math.floor(available_mem * max_memory_proportion / mem_usage_per_iter)
315
312
 
316
313
  final_iters = min(default_iters, min(max_memory_iters, estimated_iters_in_time))
314
+ if final_iters < 1:
315
+ raise TimeLimitExceeded
317
316
  return final_iters
318
317
 
319
318
  def _predict_proba(self, X, **kwargs):
@@ -56,6 +56,7 @@ class EBMModel(AbstractModel):
56
56
  ag_key = "EBM"
57
57
  ag_name = "EBM"
58
58
  ag_priority = 35
59
+ seed_name = "random_state"
59
60
 
60
61
  def _fit(
61
62
  self,
@@ -89,7 +90,7 @@ class EBMModel(AbstractModel):
89
90
 
90
91
  # Init Class
91
92
  model_cls = get_class_from_problem_type(self.problem_type)
92
- self.model = model_cls(random_state=self.random_seed, **params)
93
+ self.model = model_cls(**params)
93
94
 
94
95
  # Handle validation data format for EBM
95
96
  fit_X = X
@@ -112,11 +113,6 @@ class EBMModel(AbstractModel):
112
113
  )
113
114
  self.model.fit(fit_X, fit_y, sample_weight=fit_sample_weight, bags=bags)
114
115
 
115
- def _get_random_seed_from_hyperparameters(
116
- self, hyperparameters: dict
117
- ) -> int | None | str:
118
- return hyperparameters.get("random_state", "N/A")
119
-
120
116
  def _set_default_params(self):
121
117
  default_params = get_param_baseline(problem_type=self.problem_type, num_classes=self.num_classes)
122
118
  for param, val in default_params.items():
@@ -103,6 +103,7 @@ class NNFastAiTabularModel(AbstractModel):
103
103
  ag_priority_by_problem_type = MappingProxyType({
104
104
  MULTICLASS: 95,
105
105
  })
106
+ seed_name = "random_seed"
106
107
 
107
108
  model_internals_file_name = "model-internals.pkl"
108
109
 
@@ -322,8 +323,9 @@ class NNFastAiTabularModel(AbstractModel):
322
323
  # Make deterministic
323
324
  from fastai.torch_core import set_seed
324
325
 
325
- set_seed(self.random_seed, True)
326
- dls.rng.seed(self.random_seed)
326
+ random_seed = params.pop(self.seed_name, self.default_random_seed)
327
+ set_seed(random_seed, True)
328
+ dls.rng.seed(random_seed)
327
329
 
328
330
  if self.problem_type == QUANTILE:
329
331
  dls.c = len(self.quantile_levels)
@@ -255,9 +255,13 @@ class KNNModel(AbstractModel):
255
255
  self._X_unused_index = [i for i in range(num_rows_max) if i not in idx]
256
256
  return self.model
257
257
 
258
- def _get_maximum_resources(self) -> Dict[str, Union[int, float]]:
258
+ def _get_maximum_resources(self) -> dict[str, int | float]:
259
259
  # use at most 32 cpus to avoid OpenBLAS error: https://github.com/autogluon/autogluon/issues/1020
260
- return {"num_cpus": 32}
260
+ # no GPU support
261
+ return {
262
+ "num_cpus": 32,
263
+ "num_gpus": 0,
264
+ }
261
265
 
262
266
  def _get_default_resources(self):
263
267
  # use at most 32 cpus to avoid OpenBLAS error: https://github.com/autogluon/autogluon/issues/1020
@@ -46,6 +46,8 @@ class LGBModel(AbstractModel):
46
46
  ag_priority_by_problem_type = MappingProxyType({
47
47
  SOFTCLASS: 100
48
48
  })
49
+ seed_name = "seed"
50
+ seed_name_alt = ["seed_value", "random_seed", "random_state"]
49
51
 
50
52
  def __init__(self, **kwargs):
51
53
  super().__init__(**kwargs)
@@ -128,13 +130,6 @@ class LGBModel(AbstractModel):
128
130
  approx_mem_size_req = data_mem_usage_bytes + histogram_mem_usage_bytes + mem_size_estimators
129
131
  return approx_mem_size_req
130
132
 
131
- def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
132
- if "seed_value" in hyperparameters:
133
- return hyperparameters["seed_value"]
134
- if "seed" in hyperparameters:
135
- return hyperparameters["seed"]
136
- return "N/A"
137
-
138
133
  def _fit(self, X, y, X_val=None, y_val=None, time_limit=None, num_gpus=0, num_cpus=0, sample_weight=None, sample_weight_val=None, verbosity=2, **kwargs):
139
134
  try_import_lightgbm() # raise helpful error message if LightGBM isn't installed
140
135
  start_time = time.time()
@@ -168,7 +163,7 @@ class LGBModel(AbstractModel):
168
163
  # Before enabling GPU, we should add code to detect that GPU-enabled version is installed and that a valid GPU exists.
169
164
  # GPU training heavily alters accuracy, often in a negative manner. We will have to be careful about when to use GPU.
170
165
  params["device"] = "gpu"
171
- logger.log(20, f"\tTraining {self.name} with GPU, note that this may negatively impact model quality compared to CPU training.")
166
+ logger.log(20, f"\tWarning: Training LightGBM with GPU. This may negatively impact model quality compared to CPU training.")
172
167
  logger.log(15, f"\tFitting {num_boost_round} rounds... Hyperparameters: {params}")
173
168
 
174
169
  if "num_threads" not in params:
@@ -292,8 +287,6 @@ class LGBModel(AbstractModel):
292
287
  elif self.problem_type == QUANTILE:
293
288
  train_params["params"]["quantile_levels"] = self.quantile_levels
294
289
 
295
- train_params["params"]["seed"] = self.random_seed
296
-
297
290
  # Train LightGBM model:
298
291
  # Note that self.model contains a <class 'lightgbm.basic.Booster'> not a LightBGMClassifier or LightGBMRegressor object
299
292
  from lightgbm.basic import LightGBMError
@@ -305,16 +298,28 @@ class LGBModel(AbstractModel):
305
298
  try:
306
299
  self.model = train_lgb_model(early_stopping_callback_kwargs=early_stopping_callback_kwargs, **train_params)
307
300
  except LightGBMError:
308
- if train_params["params"].get("device", "cpu") != "gpu":
301
+ if train_params["params"].get("device", "cpu") not in ["gpu", "cuda"]:
309
302
  raise
310
303
  else:
311
- logger.warning(
312
- "Warning: GPU mode might not be installed for LightGBM, GPU training raised an exception. Falling back to CPU training..."
313
- "Refer to LightGBM GPU documentation: https://github.com/Microsoft/LightGBM/tree/master/python-package#build-gpu-version"
314
- "One possible method is:"
315
- "\tpip uninstall lightgbm -y"
316
- "\tpip install lightgbm --install-option=--gpu"
317
- )
304
+ if train_params["params"]["device"] == "gpu":
305
+ logger.warning(
306
+ "Warning: GPU mode might not be installed for LightGBM, "
307
+ "GPU training raised an exception. Falling back to CPU training..."
308
+ "Refer to LightGBM GPU documentation: "
309
+ "https://github.com/Microsoft/LightGBM/tree/master/python-package#build-gpu-version"
310
+ "One possible method is:"
311
+ "\tpip uninstall lightgbm -y"
312
+ "\tpip install lightgbm --install-option=--gpu"
313
+ )
314
+ elif train_params["params"]["device"] == "cuda":
315
+ # Current blocker for using CUDA over GPU: https://github.com/microsoft/LightGBM/issues/6828
316
+ # Note that device="cuda" works if AutoGluon (and therefore LightGBM) is installed via conda.
317
+ logger.warning(
318
+ "Warning: CUDA mode might not be installed for LightGBM, "
319
+ "CUDA training raised an exception. Falling back to CPU training..."
320
+ "Refer to LightGBM CUDA documentation: "
321
+ "https://github.com/Microsoft/LightGBM/tree/master/python-package#build-cuda-version"
322
+ )
318
323
  train_params["params"]["device"] = "cpu"
319
324
  self.model = train_lgb_model(early_stopping_callback_kwargs=early_stopping_callback_kwargs, **train_params)
320
325
  retrain = False
@@ -515,17 +520,44 @@ class LGBModel(AbstractModel):
515
520
  default_auxiliary_params.update(extra_auxiliary_params)
516
521
  return default_auxiliary_params
517
522
 
518
- def _is_gpu_lgbm_installed(self):
523
+ @staticmethod
524
+ def _is_gpu_lgbm_installed():
525
+ # Taken from https://github.com/microsoft/LightGBM/issues/3939
526
+ try_import_lightgbm()
527
+ import lightgbm
528
+
529
+ rng = np.random.RandomState(42)
530
+ data = rng.rand(25, 2)
531
+ label = rng.randint(2, size=25)
532
+
533
+ try:
534
+ train_data = lightgbm.Dataset(data, label=label)
535
+ params = {
536
+ "device": "gpu",
537
+ "verbose": -1,
538
+ }
539
+ gbm = lightgbm.train(params, num_boost_round=10, train_set=train_data)
540
+ return True
541
+ except Exception as e:
542
+ return False
543
+
544
+ @staticmethod
545
+ def _is_cuda_lgbm_installed():
519
546
  # Taken from https://github.com/microsoft/LightGBM/issues/3939
520
547
  try_import_lightgbm()
521
548
  import lightgbm
522
549
 
550
+ rng = np.random.RandomState(42)
551
+ data = rng.rand(25, 2)
552
+ label = rng.randint(2, size=25)
553
+
523
554
  try:
524
- data = np.random.rand(50, 2)
525
- label = np.random.randint(2, size=50)
526
555
  train_data = lightgbm.Dataset(data, label=label)
527
- params = {"device": "gpu"}
528
- gbm = lightgbm.train(params, train_set=train_data, verbose=-1)
556
+ params = {
557
+ "device": "cuda",
558
+ "verbose": -1,
559
+ }
560
+ gbm = lightgbm.train(params, num_boost_round=10, train_set=train_data)
529
561
  return True
530
562
  except Exception as e:
531
563
  return False
@@ -534,7 +566,7 @@ class LGBModel(AbstractModel):
534
566
  minimum_resources = {
535
567
  "num_cpus": 1,
536
568
  }
537
- if is_gpu_available and self._is_gpu_lgbm_installed():
569
+ if is_gpu_available:
538
570
  minimum_resources["num_gpus"] = 0.5
539
571
  return minimum_resources
540
572
 
@@ -43,6 +43,7 @@ class LinearModel(AbstractModel):
43
43
  ag_key = "LR"
44
44
  ag_name = "LinearModel"
45
45
  ag_priority = 30
46
+ seed_name = "random_state"
46
47
 
47
48
  def __init__(self, **kwargs):
48
49
  super().__init__(**kwargs)
@@ -162,9 +163,6 @@ class LinearModel(AbstractModel):
162
163
  for param, val in default_params.items():
163
164
  self._set_default_param_value(param, val)
164
165
 
165
- def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
166
- return hyperparameters.get("random_seed", "N/A")
167
-
168
166
  def _get_default_searchspace(self):
169
167
  return get_default_searchspace(self.problem_type)
170
168
 
@@ -218,7 +216,7 @@ class LinearModel(AbstractModel):
218
216
  total_iter = 0
219
217
  total_iter_used = 0
220
218
  total_max_iter = sum(max_iter_list)
221
- model = model_cls(max_iter=max_iter_list[0], random_state=self.random_seed, **params)
219
+ model = model_cls(max_iter=max_iter_list[0], **params)
222
220
  early_stop = False
223
221
  for i, cur_max_iter in enumerate(max_iter_list):
224
222
  if time_left is not None and (i > 0):
@@ -322,6 +320,10 @@ class LinearModel(AbstractModel):
322
320
  ) -> int:
323
321
  return 4 * get_approximate_df_mem_usage(X).sum()
324
322
 
323
+ def _get_maximum_resources(self) -> dict[str, int | float]:
324
+ # no GPU support
325
+ return {"num_gpus": 0}
326
+
325
327
  @classmethod
326
328
  def supported_problem_types(cls) -> list[str] | None:
327
329
  return ["binary", "multiclass", "regression"]
@@ -5,20 +5,19 @@ from autogluon.features.generators import OneHotEncoderFeatureGenerator
5
5
 
6
6
  class OheFeaturesGenerator(BaseEstimator, TransformerMixin):
7
7
  def __init__(self):
8
- self._feature_names = []
9
- self._encoder = None
8
+ pass
10
9
 
11
10
  def fit(self, X, y=None):
12
- self._encoder = OneHotEncoderFeatureGenerator(max_levels=10000, verbosity=0)
13
- self._encoder.fit(X)
14
- self._feature_names = self._encoder.features_out
11
+ self.encoder_ = OneHotEncoderFeatureGenerator(max_levels=10000, verbosity=0)
12
+ self.encoder_.fit(X)
13
+ self.feature_names_ = self.encoder_.features_out
15
14
  return self
16
15
 
17
16
  def transform(self, X, y=None):
18
- return self._encoder.transform_ohe(X)
17
+ return self.encoder_.transform_ohe(X)
19
18
 
20
19
  def get_feature_names(self):
21
- return self._feature_names
20
+ return self.feature_names_
22
21
 
23
22
 
24
23
  class NlpDataPreprocessor(BaseEstimator, TransformerMixin):
@@ -32,6 +32,7 @@ class MitraModel(AbstractModel):
32
32
  ag_name = "Mitra"
33
33
  weights_file_name = "model.pt"
34
34
  ag_priority = 55
35
+ seed_name = "seed"
35
36
 
36
37
  def __init__(self, **kwargs):
37
38
  super().__init__(**kwargs)
@@ -77,9 +78,6 @@ class MitraModel(AbstractModel):
77
78
 
78
79
  return X
79
80
 
80
- def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
81
- return hyperparameters.get("seed", "N/A")
82
-
83
81
  def _fit(
84
82
  self,
85
83
  X: pd.DataFrame,
@@ -157,10 +155,7 @@ class MitraModel(AbstractModel):
157
155
  if "verbose" not in hyp:
158
156
  hyp["verbose"] = verbosity >= 3
159
157
 
160
- self.model = model_cls(
161
- seed=self.random_seed,
162
- **hyp,
163
- )
158
+ self.model = model_cls(**hyp)
164
159
 
165
160
  X = self.preprocess(X, is_train=True)
166
161
  if X_val is not None:
@@ -51,6 +51,7 @@ class RealMLPModel(AbstractModel):
51
51
  ag_key = "REALMLP"
52
52
  ag_name = "RealMLP"
53
53
  ag_priority = 75
54
+ seed_name = "random_state"
54
55
 
55
56
  def __init__(self, **kwargs):
56
57
  super().__init__(**kwargs)
@@ -82,9 +83,6 @@ class RealMLPModel(AbstractModel):
82
83
  model_cls = RealMLP_TD_S_Regressor
83
84
  return model_cls
84
85
 
85
- def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
86
- return hyperparameters.get("random_state", "N/A")
87
-
88
86
  def _fit(
89
87
  self,
90
88
  X: pd.DataFrame,
@@ -178,7 +176,6 @@ class RealMLPModel(AbstractModel):
178
176
  self.model = model_cls(
179
177
  n_threads=num_cpus,
180
178
  device=device,
181
- random_state=self.random_seed,
182
179
  **init_kwargs,
183
180
  **hyp,
184
181
  )
@@ -30,6 +30,7 @@ class RFModel(AbstractModel):
30
30
  ag_key = "RF"
31
31
  ag_name = "RandomForest"
32
32
  ag_priority = 80
33
+ seed_name = "random_state"
33
34
 
34
35
  def __init__(self, **kwargs):
35
36
  super().__init__(**kwargs)
@@ -107,9 +108,6 @@ class RFModel(AbstractModel):
107
108
  for param, val in default_params.items():
108
109
  self._set_default_param_value(param, val)
109
110
 
110
- def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
111
- return hyperparameters.get("random_state", "N/A")
112
-
113
111
  # TODO: Add in documentation that Categorical default is the first index
114
112
  # TODO: enable HPO for RF models
115
113
  def _get_default_searchspace(self):
@@ -153,13 +151,13 @@ class RFModel(AbstractModel):
153
151
  hyperparameters = {}
154
152
  n_estimators_final = hyperparameters.get("n_estimators", 300)
155
153
  if isinstance(n_estimators_final, int):
156
- n_estimators_minimum = min(40, n_estimators_final)
154
+ n_estimators = n_estimators_final
157
155
  else: # if search space
158
- n_estimators_minimum = 40
156
+ n_estimators = 40
159
157
  num_trees_per_estimator = cls._get_num_trees_per_estimator_static(problem_type=problem_type, num_classes=num_classes)
160
158
  bytes_per_estimator = num_trees_per_estimator * len(X) / 60000 * 1e6 # Underestimates by 3x on ExtraTrees
161
- expected_min_memory_usage = int(bytes_per_estimator * n_estimators_minimum)
162
- return expected_min_memory_usage
159
+ expected_memory_usage = int(bytes_per_estimator * n_estimators)
160
+ return expected_memory_usage
163
161
 
164
162
  def _validate_fit_memory_usage(self, mem_error_threshold: float = 0.5, mem_warning_threshold: float = 0.4, mem_size_threshold: int = 1e7, **kwargs):
165
163
  return super()._validate_fit_memory_usage(
@@ -208,7 +206,7 @@ class RFModel(AbstractModel):
208
206
  # FIXME: This is inefficient but sklearnex doesn't support computing oob_score after training
209
207
  params["oob_score"] = True
210
208
 
211
- model = model_cls(random_state=self.random_seed, **params)
209
+ model = model_cls(**params)
212
210
 
213
211
  time_train_start = time.time()
214
212
  for i, n_estimators in enumerate(n_estimator_increments):
@@ -370,6 +368,10 @@ class RFModel(AbstractModel):
370
368
 
371
369
  return self._convert_proba_to_unified_form(y_oof_pred_proba)
372
370
 
371
+ def _get_maximum_resources(self) -> dict[str, int | float]:
372
+ # no GPU support
373
+ return {"num_gpus": 0}
374
+
373
375
  def _get_default_auxiliary_params(self) -> dict:
374
376
  default_auxiliary_params = super()._get_default_auxiliary_params()
375
377
  extra_auxiliary_params = dict(
@@ -35,6 +35,7 @@ class TabICLModel(AbstractModel):
35
35
  ag_key = "TABICL"
36
36
  ag_name = "TabICL"
37
37
  ag_priority = 65
38
+ seed_name = "random_state"
38
39
 
39
40
  def get_model_cls(self):
40
41
  from tabicl import TabICLClassifier
@@ -89,7 +90,6 @@ class TabICLModel(AbstractModel):
89
90
  **hyp,
90
91
  device=device,
91
92
  n_jobs=num_cpus,
92
- random_state=self.random_seed,
93
93
  )
94
94
  X = self.preprocess(X)
95
95
  self.model = self.model.fit(
@@ -97,9 +97,6 @@ class TabICLModel(AbstractModel):
97
97
  y=y,
98
98
  )
99
99
 
100
- def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
101
- return hyperparameters.get("random_state", "N/A")
102
-
103
100
  def _get_default_auxiliary_params(self) -> dict:
104
101
  default_auxiliary_params = super()._get_default_auxiliary_params()
105
102
  default_auxiliary_params.update(
@@ -39,6 +39,7 @@ class TabMModel(AbstractModel):
39
39
  ag_key = "TABM"
40
40
  ag_name = "TabM"
41
41
  ag_priority = 85
42
+ seed_name = "random_state"
42
43
 
43
44
  def __init__(self, **kwargs):
44
45
  super().__init__(**kwargs)
@@ -48,6 +49,7 @@ class TabMModel(AbstractModel):
48
49
  self._indicator_columns = None
49
50
  self._features_bool = None
50
51
  self._bool_to_cat = None
52
+ self.device = None
51
53
 
52
54
  def _fit(
53
55
  self,
@@ -106,7 +108,6 @@ class TabMModel(AbstractModel):
106
108
  device=device,
107
109
  problem_type=self.problem_type,
108
110
  early_stopping_metric=self.stopping_metric,
109
- random_state=self.random_seed,
110
111
  **hyp,
111
112
  )
112
113
 
@@ -142,8 +143,80 @@ class TabMModel(AbstractModel):
142
143
 
143
144
  return X
144
145
 
145
- def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
146
- return hyperparameters.get("random_state", "N/A")
146
+ def save(self, path: str = None, verbose=True) -> str:
147
+ """
148
+ Need to set device to CPU to be able to load on a non-GPU environment
149
+ """
150
+ import torch
151
+
152
+ # Save on CPU to ensure the model can be loaded without GPU
153
+ if self.model is not None:
154
+ self.device = self.model.device_
155
+ device_cpu = torch.device("cpu")
156
+ self.model.model_ = self.model.model_.to(device_cpu)
157
+ self.model.device_ = device_cpu
158
+ path = super().save(path=path, verbose=verbose)
159
+ # Put the model back to the device after the save
160
+ if self.model is not None:
161
+ self.model.model_.to(self.device)
162
+ self.model.device_ = self.device
163
+
164
+ return path
165
+
166
+ @classmethod
167
+ def load(cls, path: str, reset_paths=True, verbose=True):
168
+ """
169
+ Loads the model from disk to memory.
170
+ The loaded model will be on the same device it was trained on (cuda/mps);
171
+ if the device is not available (trained on GPU, deployed on CPU), then `cpu` will be used.
172
+
173
+ Parameters
174
+ ----------
175
+ path : str
176
+ Path to the saved model, minus the file name.
177
+ This should generally be a directory path ending with a '/' character (or appropriate path separator value depending on OS).
178
+ The model file is typically located in os.path.join(path, cls.model_file_name).
179
+ reset_paths : bool, default True
180
+ Whether to reset the self.path value of the loaded model to be equal to path.
181
+ It is highly recommended to keep this value as True unless accessing the original self.path value is important.
182
+ If False, the actual valid path and self.path may differ, leading to strange behaviour and potential exceptions if the model needs to load any other files at a later time.
183
+ verbose : bool, default True
184
+ Whether to log the location of the loaded file.
185
+
186
+ Returns
187
+ -------
188
+ model : cls
189
+ Loaded model object.
190
+ """
191
+ import torch
192
+
193
+ model: TabMModel = super().load(path=path, reset_paths=reset_paths, verbose=verbose)
194
+
195
+ # Put the model on the same device it was trained on (GPU/MPS) if it is available; otherwise use CPU
196
+ if model.model is not None:
197
+ original_device_type = model.device.type
198
+ if "cuda" in original_device_type:
199
+ # cuda: nvidia GPU
200
+ device = torch.device(original_device_type if torch.cuda.is_available() else "cpu")
201
+ elif "mps" in original_device_type:
202
+ # mps: Apple Silicon
203
+ device = torch.device(original_device_type if torch.backends.mps.is_available() else "cpu")
204
+ else:
205
+ device = torch.device(original_device_type)
206
+
207
+ if verbose and (original_device_type != device.type):
208
+ logger.log(15, f"Model is trained on {original_device_type}, but the device is not available - loading on {device.type}")
209
+
210
+ model.set_device(device=device)
211
+
212
+ return model
213
+
214
+ def set_device(self, device):
215
+ self.device = device
216
+ if self.model is not None:
217
+ self.model.device_ = device
218
+ if self.model.model_ is not None:
219
+ self.model.model_ = self.model.model_.to(device)
147
220
 
148
221
  @classmethod
149
222
  def supported_problem_types(cls) -> list[str] | None:
@@ -42,6 +42,7 @@ class TabPFNMixModel(AbstractModel):
42
42
  ag_key = "TABPFNMIX"
43
43
  ag_name = "TabPFNMix"
44
44
  ag_priority = 45
45
+ seed_name = "random_state"
45
46
 
46
47
  weights_file_name = "model.pt"
47
48
 
@@ -123,6 +124,7 @@ class TabPFNMixModel(AbstractModel):
123
124
  raise AssertionError(f"Max allowed classes for the model is {max_classes}, " f"but found {self.num_classes} classes.")
124
125
 
125
126
  params = self._get_model_params()
127
+ random_state = params.pop(self.seed_name, self.default_random_seed)
126
128
  sample_rows = ag_params.get("sample_rows", None)
127
129
  sample_rows_val = ag_params.get("sample_rows_val", None)
128
130
  max_rows = ag_params.get("max_rows", None)
@@ -133,11 +135,11 @@ class TabPFNMixModel(AbstractModel):
133
135
 
134
136
  # TODO: Make sample_rows generic
135
137
  if sample_rows is not None and isinstance(sample_rows, int) and len(X) > sample_rows:
136
- X, y = self._subsample_data(X=X, y=y, num_rows=sample_rows)
138
+ X, y = self._subsample_data(X=X, y=y, num_rows=sample_rows, random_state=random_state)
137
139
 
138
140
  # TODO: Make sample_rows generic
139
141
  if X_val is not None and y_val is not None and sample_rows_val is not None and isinstance(sample_rows_val, int) and len(X_val) > sample_rows_val:
140
- X_val, y_val = self._subsample_data(X=X_val, y=y_val, num_rows=sample_rows_val)
142
+ X_val, y_val = self._subsample_data(X=X_val, y=y_val, num_rows=sample_rows_val, random_state=random_state)
141
143
 
142
144
  from ._internal.core.enums import Task
143
145
  if self.problem_type in [REGRESSION, QUANTILE]:
@@ -178,7 +180,7 @@ class TabPFNMixModel(AbstractModel):
178
180
  elif weights_path is not None:
179
181
  logger.log(15, f'\tLoading pre-trained weights from file... (weights_path="{weights_path}")')
180
182
 
181
- cfg = ConfigRun(hyperparams=params, task=task, device=device, seed=self.random_seed)
183
+ cfg = ConfigRun(hyperparams=params, task=task, device=device, seed=random_state)
182
184
 
183
185
  if cfg.hyperparams["max_epochs"] == 0 and cfg.hyperparams["n_ensembles"] != 1:
184
186
  logger.log(
@@ -242,14 +244,14 @@ class TabPFNMixModel(AbstractModel):
242
244
  return self
243
245
 
244
246
  # TODO: Make this generic by creating a generic `preprocess_train` and putting this logic prior to `_preprocess`.
245
- def _subsample_data(self, X: pd.DataFrame, y: pd.Series, num_rows: int) -> (pd.DataFrame, pd.Series):
247
+ def _subsample_data(self, X: pd.DataFrame, y: pd.Series, num_rows: int, random_state: int | None = 0) -> (pd.DataFrame, pd.Series):
246
248
  num_rows_to_drop = len(X) - num_rows
247
249
  X, _, y, _ = generate_train_test_split(
248
250
  X=X,
249
251
  y=y,
250
252
  problem_type=self.problem_type,
251
253
  test_size=num_rows_to_drop,
252
- random_state=self.random_seed,
254
+ random_state=random_state,
253
255
  min_cls_count_train=1,
254
256
  )
255
257
  return X, y
@@ -122,6 +122,7 @@ class TabPFNV2Model(AbstractModel):
122
122
  ag_key = "TABPFNV2"
123
123
  ag_name = "TabPFNv2"
124
124
  ag_priority = 105
125
+ seed_name = "random_state"
125
126
 
126
127
  def __init__(self, **kwargs):
127
128
  super().__init__(**kwargs)
@@ -201,7 +202,6 @@ class TabPFNV2Model(AbstractModel):
201
202
  X = self.preprocess(X, is_train=True)
202
203
 
203
204
  hps = self._get_model_params()
204
- hps["random_state"] = self.random_seed
205
205
  hps["device"] = device
206
206
  hps["n_jobs"] = num_cpus
207
207
  hps["categorical_features_indices"] = self._cat_indices
@@ -306,9 +306,6 @@ class TabPFNV2Model(AbstractModel):
306
306
  for param, val in default_params.items():
307
307
  self._set_default_param_value(param, val)
308
308
 
309
- def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
310
- return hyperparameters.get("random_state", "N/A")
311
-
312
309
  @classmethod
313
310
  def supported_problem_types(cls) -> list[str] | None:
314
311
  return ["binary", "multiclass", "regression"]
@@ -50,6 +50,7 @@ class TabularNeuralNetTorchModel(AbstractNeuralNetworkModel):
50
50
  ag_key = "NN_TORCH"
51
51
  ag_name = "NeuralNetTorch"
52
52
  ag_priority = 25
53
+ seed_name = "seed_value"
53
54
 
54
55
  # Constants used throughout this class:
55
56
  unique_category_str = np.nan # string used to represent missing values and unknown categories for categorical features.
@@ -164,9 +165,6 @@ class TabularNeuralNetTorchModel(AbstractNeuralNetworkModel):
164
165
 
165
166
  return processor_kwargs, optimizer_kwargs, fit_kwargs, loss_kwargs, params
166
167
 
167
- def _get_random_seed_from_hyperparameters(self, hyperparameters: dict) -> int | None | str:
168
- return hyperparameters.get("seed_value", "N/A")
169
-
170
168
  def _fit(
171
169
  self,
172
170
  X: pd.DataFrame,
@@ -194,7 +192,7 @@ class TabularNeuralNetTorchModel(AbstractNeuralNetworkModel):
194
192
 
195
193
  processor_kwargs, optimizer_kwargs, fit_kwargs, loss_kwargs, params = self._prepare_params(params=params)
196
194
 
197
- seed_value = self.random_seed
195
+ seed_value = params.pop(self.seed_name, self.default_random_seed)
198
196
 
199
197
  self._num_cpus_infer = params.pop("_num_cpus_infer", 1)
200
198
  if seed_value is not None: # Set seeds
@@ -373,7 +371,6 @@ class TabularNeuralNetTorchModel(AbstractNeuralNetworkModel):
373
371
  best_epoch = 0
374
372
  best_val_metric = -np.inf # higher = better
375
373
  best_val_update = 0
376
- val_improve_epoch = 0 # most recent epoch where validation-score strictly improved
377
374
  start_fit_time = time.time()
378
375
  if time_limit is not None:
379
376
  time_limit = time_limit - (start_fit_time - start_time)