autogluon.tabular 1.5.1b20260105__py3-none-any.whl → 1.5.1b20260116__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.tabular might be problematic. Click here for more details.

Files changed (135) hide show
  1. autogluon/tabular/__init__.py +1 -0
  2. autogluon/tabular/configs/config_helper.py +18 -6
  3. autogluon/tabular/configs/feature_generator_presets.py +3 -1
  4. autogluon/tabular/configs/hyperparameter_configs.py +42 -9
  5. autogluon/tabular/configs/presets_configs.py +38 -14
  6. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2023.py +84 -14
  7. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2025.py +48 -48
  8. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_cpu_2025_12_18.py +774 -1
  9. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_gpu_2025_12_18.py +421 -1
  10. autogluon/tabular/experimental/_scikit_mixin.py +6 -2
  11. autogluon/tabular/experimental/_tabular_classifier.py +3 -1
  12. autogluon/tabular/experimental/_tabular_regressor.py +3 -1
  13. autogluon/tabular/experimental/plot_leaderboard.py +73 -19
  14. autogluon/tabular/learner/abstract_learner.py +160 -42
  15. autogluon/tabular/learner/default_learner.py +78 -22
  16. autogluon/tabular/models/__init__.py +2 -2
  17. autogluon/tabular/models/_utils/rapids_utils.py +3 -1
  18. autogluon/tabular/models/abstract/abstract_torch_model.py +2 -0
  19. autogluon/tabular/models/automm/automm_model.py +12 -3
  20. autogluon/tabular/models/automm/ft_transformer.py +5 -1
  21. autogluon/tabular/models/catboost/callbacks.py +2 -2
  22. autogluon/tabular/models/catboost/catboost_model.py +93 -29
  23. autogluon/tabular/models/catboost/catboost_softclass_utils.py +4 -1
  24. autogluon/tabular/models/catboost/catboost_utils.py +3 -1
  25. autogluon/tabular/models/ebm/ebm_model.py +8 -13
  26. autogluon/tabular/models/ebm/hyperparameters/parameters.py +1 -0
  27. autogluon/tabular/models/ebm/hyperparameters/searchspaces.py +1 -0
  28. autogluon/tabular/models/fastainn/callbacks.py +20 -3
  29. autogluon/tabular/models/fastainn/hyperparameters/searchspaces.py +11 -1
  30. autogluon/tabular/models/fastainn/quantile_helpers.py +10 -2
  31. autogluon/tabular/models/fastainn/tabular_nn_fastai.py +65 -18
  32. autogluon/tabular/models/fasttext/fasttext_model.py +3 -1
  33. autogluon/tabular/models/image_prediction/image_predictor.py +7 -2
  34. autogluon/tabular/models/knn/knn_model.py +41 -8
  35. autogluon/tabular/models/lgb/callbacks.py +32 -9
  36. autogluon/tabular/models/lgb/hyperparameters/searchspaces.py +3 -1
  37. autogluon/tabular/models/lgb/lgb_model.py +150 -34
  38. autogluon/tabular/models/lgb/lgb_utils.py +12 -4
  39. autogluon/tabular/models/lr/hyperparameters/searchspaces.py +5 -1
  40. autogluon/tabular/models/lr/lr_model.py +40 -10
  41. autogluon/tabular/models/lr/lr_rapids_model.py +22 -13
  42. autogluon/tabular/models/mitra/_internal/__init__.py +1 -1
  43. autogluon/tabular/models/mitra/_internal/config/__init__.py +1 -1
  44. autogluon/tabular/models/mitra/_internal/config/config_pretrain.py +36 -40
  45. autogluon/tabular/models/mitra/_internal/config/config_run.py +2 -14
  46. autogluon/tabular/models/mitra/_internal/config/enums.py +27 -26
  47. autogluon/tabular/models/mitra/_internal/core/__init__.py +1 -1
  48. autogluon/tabular/models/mitra/_internal/core/callbacks.py +14 -21
  49. autogluon/tabular/models/mitra/_internal/core/get_loss.py +10 -12
  50. autogluon/tabular/models/mitra/_internal/core/get_optimizer.py +17 -32
  51. autogluon/tabular/models/mitra/_internal/core/get_scheduler.py +12 -27
  52. autogluon/tabular/models/mitra/_internal/core/prediction_metrics.py +16 -21
  53. autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py +130 -111
  54. autogluon/tabular/models/mitra/_internal/data/__init__.py +1 -1
  55. autogluon/tabular/models/mitra/_internal/data/collator.py +30 -26
  56. autogluon/tabular/models/mitra/_internal/data/dataset_finetune.py +18 -26
  57. autogluon/tabular/models/mitra/_internal/data/dataset_split.py +10 -7
  58. autogluon/tabular/models/mitra/_internal/data/preprocessor.py +70 -100
  59. autogluon/tabular/models/mitra/_internal/models/__init__.py +1 -1
  60. autogluon/tabular/models/mitra/_internal/models/base.py +7 -10
  61. autogluon/tabular/models/mitra/_internal/models/embedding.py +46 -56
  62. autogluon/tabular/models/mitra/_internal/models/tab2d.py +140 -120
  63. autogluon/tabular/models/mitra/_internal/utils/__init__.py +1 -1
  64. autogluon/tabular/models/mitra/_internal/utils/set_seed.py +3 -1
  65. autogluon/tabular/models/mitra/mitra_model.py +16 -11
  66. autogluon/tabular/models/mitra/sklearn_interface.py +178 -162
  67. autogluon/tabular/models/realmlp/realmlp_model.py +28 -15
  68. autogluon/tabular/models/rf/compilers/onnx.py +1 -1
  69. autogluon/tabular/models/rf/rf_model.py +45 -12
  70. autogluon/tabular/models/rf/rf_quantile.py +4 -2
  71. autogluon/tabular/models/tabdpt/tabdpt_model.py +8 -17
  72. autogluon/tabular/models/tabicl/tabicl_model.py +8 -1
  73. autogluon/tabular/models/tabm/_tabm_internal.py +6 -4
  74. autogluon/tabular/models/tabm/rtdl_num_embeddings.py +80 -127
  75. autogluon/tabular/models/tabm/tabm_model.py +8 -4
  76. autogluon/tabular/models/tabm/tabm_reference.py +53 -85
  77. autogluon/tabular/models/tabpfnmix/_internal/core/callbacks.py +7 -16
  78. autogluon/tabular/models/tabpfnmix/_internal/core/collator.py +16 -24
  79. autogluon/tabular/models/tabpfnmix/_internal/core/dataset_split.py +5 -7
  80. autogluon/tabular/models/tabpfnmix/_internal/core/enums.py +0 -2
  81. autogluon/tabular/models/tabpfnmix/_internal/core/get_loss.py +0 -1
  82. autogluon/tabular/models/tabpfnmix/_internal/core/get_optimizer.py +7 -18
  83. autogluon/tabular/models/tabpfnmix/_internal/core/get_scheduler.py +3 -14
  84. autogluon/tabular/models/tabpfnmix/_internal/core/trainer_finetune.py +79 -64
  85. autogluon/tabular/models/tabpfnmix/_internal/core/y_transformer.py +3 -5
  86. autogluon/tabular/models/tabpfnmix/_internal/data/dataset_finetune.py +17 -30
  87. autogluon/tabular/models/tabpfnmix/_internal/data/preprocessor.py +15 -35
  88. autogluon/tabular/models/tabpfnmix/_internal/models/foundation/embedding.py +21 -38
  89. autogluon/tabular/models/tabpfnmix/_internal/models/foundation/foundation_transformer.py +33 -51
  90. autogluon/tabular/models/tabpfnmix/_internal/results/prediction_metrics.py +4 -4
  91. autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_classifier.py +32 -12
  92. autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_regressor.py +32 -13
  93. autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +55 -19
  94. autogluon/tabular/models/tabpfnv2/tabpfnv2_5_model.py +21 -48
  95. autogluon/tabular/models/tabprep/prep_mixin.py +34 -26
  96. autogluon/tabular/models/tabular_nn/compilers/onnx.py +36 -8
  97. autogluon/tabular/models/tabular_nn/torch/tabular_nn_torch.py +130 -36
  98. autogluon/tabular/models/tabular_nn/torch/tabular_torch_dataset.py +8 -4
  99. autogluon/tabular/models/tabular_nn/torch/torch_network_modules.py +26 -5
  100. autogluon/tabular/models/tabular_nn/utils/categorical_encoders.py +41 -24
  101. autogluon/tabular/models/tabular_nn/utils/data_preprocessor.py +33 -8
  102. autogluon/tabular/models/tabular_nn/utils/nn_architecture_utils.py +21 -6
  103. autogluon/tabular/models/xgboost/callbacks.py +9 -3
  104. autogluon/tabular/models/xgboost/xgboost_model.py +59 -11
  105. autogluon/tabular/models/xt/xt_model.py +1 -0
  106. autogluon/tabular/predictor/interpretable_predictor.py +3 -1
  107. autogluon/tabular/predictor/predictor.py +409 -128
  108. autogluon/tabular/registry/__init__.py +1 -1
  109. autogluon/tabular/registry/_ag_model_registry.py +4 -5
  110. autogluon/tabular/registry/_model_registry.py +1 -0
  111. autogluon/tabular/testing/fit_helper.py +55 -15
  112. autogluon/tabular/testing/generate_datasets.py +1 -1
  113. autogluon/tabular/testing/model_fit_helper.py +10 -4
  114. autogluon/tabular/trainer/abstract_trainer.py +644 -230
  115. autogluon/tabular/trainer/auto_trainer.py +19 -8
  116. autogluon/tabular/trainer/model_presets/presets.py +33 -9
  117. autogluon/tabular/trainer/model_presets/presets_distill.py +16 -2
  118. autogluon/tabular/version.py +1 -1
  119. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/METADATA +26 -26
  120. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/RECORD +127 -135
  121. autogluon/tabular/models/tabpfnv2/rfpfn/__init__.py +0 -20
  122. autogluon/tabular/models/tabpfnv2/rfpfn/configs.py +0 -40
  123. autogluon/tabular/models/tabpfnv2/rfpfn/scoring_utils.py +0 -201
  124. autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_decision_tree_tabpfn.py +0 -1464
  125. autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_random_forest_tabpfn.py +0 -747
  126. autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_compat.py +0 -863
  127. autogluon/tabular/models/tabpfnv2/rfpfn/utils.py +0 -106
  128. autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +0 -466
  129. /autogluon.tabular-1.5.1b20260105-py3.11-nspkg.pth → /autogluon.tabular-1.5.1b20260116-py3.11-nspkg.pth +0 -0
  130. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/WHEEL +0 -0
  131. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/licenses/LICENSE +0 -0
  132. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/licenses/NOTICE +0 -0
  133. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/namespace_packages.txt +0 -0
  134. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/top_level.txt +0 -0
  135. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/zip-safe +0 -0
@@ -15,16 +15,28 @@ from .categorical_encoders import OneHotMergeRaresHandleUnknownEncoder, OrdinalM
15
15
 
16
16
 
17
17
  def create_preprocessor(
18
- impute_strategy, max_category_levels, unique_category_str, continuous_features, skewed_features, onehot_features, embed_features, bool_features
18
+ impute_strategy,
19
+ max_category_levels,
20
+ unique_category_str,
21
+ continuous_features,
22
+ skewed_features,
23
+ onehot_features,
24
+ embed_features,
25
+ bool_features,
19
26
  ):
20
27
  """Creates sklearn ColumnTransformer that can be fit to training data to preprocess it for tabular neural network."""
21
28
  transformers = [] # order of various column transformers in this list is important!
22
29
  if continuous_features:
23
- continuous_transformer = Pipeline(steps=[("imputer", SimpleImputer(strategy=impute_strategy)), ("scaler", StandardScaler())])
30
+ continuous_transformer = Pipeline(
31
+ steps=[("imputer", SimpleImputer(strategy=impute_strategy)), ("scaler", StandardScaler())]
32
+ )
24
33
  transformers.append(("continuous", continuous_transformer, continuous_features))
25
34
  if skewed_features:
26
35
  power_transformer = Pipeline(
27
- steps=[("imputer", SimpleImputer(strategy=impute_strategy)), ("quantile", QuantileTransformer(output_distribution="normal"))]
36
+ steps=[
37
+ ("imputer", SimpleImputer(strategy=impute_strategy)),
38
+ ("quantile", QuantileTransformer(output_distribution="normal")),
39
+ ]
28
40
  ) # Or output_distribution = 'uniform'
29
41
  transformers.append(("skewed", power_transformer, skewed_features))
30
42
  if onehot_features:
@@ -39,24 +51,30 @@ def create_preprocessor(
39
51
  transformers.append(("ordinal", ordinal_transformer, embed_features))
40
52
  try:
41
53
  out = ColumnTransformer(
42
- transformers=transformers, remainder="passthrough", force_int_remainder_cols=False,
54
+ transformers=transformers,
55
+ remainder="passthrough",
56
+ force_int_remainder_cols=False,
43
57
  ) # numeric features are processed in the same order as in numeric_features vector, so feature-names remain the same.
44
58
  except:
45
59
  # TODO: Avoid try/except once scikit-learn 1.5 is minimum
46
60
  # Needed for scikit-learn 1.4 and 1.9+, force_int_remainder_cols is deprecated in 1.7 and introduced in 1.5
47
61
  # ref: https://github.com/autogluon/autogluon/issues/5289
48
62
  out = ColumnTransformer(
49
- transformers=transformers, remainder="passthrough",
63
+ transformers=transformers,
64
+ remainder="passthrough",
50
65
  ) # numeric features are processed in the same order as in numeric_features vector, so feature-names remain the same.
51
66
  return out
52
67
 
68
+
53
69
  def convert_df_dtype_to_str(df):
54
70
  return df.astype(str)
55
71
 
56
72
 
57
73
  def get_feature_arraycol_map(processor, max_category_levels):
58
74
  """Returns OrderedDict of feature-name -> list of column-indices in processed data array corresponding to this feature"""
59
- feature_preserving_transforms = set(["continuous", "skewed", "ordinal", "bool", "remainder"]) # these transforms do not alter dimensionality of feature
75
+ feature_preserving_transforms = set(
76
+ ["continuous", "skewed", "ordinal", "bool", "remainder"]
77
+ ) # these transforms do not alter dimensionality of feature
60
78
  feature_arraycol_map = {} # unordered version
61
79
  current_colindex = 0
62
80
  for transformer in processor.transformers_:
@@ -85,8 +103,15 @@ def get_feature_arraycol_map(processor, max_category_levels):
85
103
  def get_feature_type_map(feature_arraycol_map, types_of_features):
86
104
  """Returns OrderedDict of feature-name -> feature_type string (options: 'vector', 'embed')."""
87
105
  if feature_arraycol_map is None:
88
- raise ValueError("Must first call get_feature_arraycol_map() to set feature_arraycol_map before calling get_feature_type_map()")
89
- vector_features = types_of_features["continuous"] + types_of_features["skewed"] + types_of_features["onehot"] + types_of_features["bool"]
106
+ raise ValueError(
107
+ "Must first call get_feature_arraycol_map() to set feature_arraycol_map before calling get_feature_type_map()"
108
+ )
109
+ vector_features = (
110
+ types_of_features["continuous"]
111
+ + types_of_features["skewed"]
112
+ + types_of_features["onehot"]
113
+ + types_of_features["bool"]
114
+ )
90
115
  feature_type_map = OrderedDict()
91
116
  for feature_name in feature_arraycol_map:
92
117
  if feature_name in vector_features:
@@ -14,7 +14,8 @@ def get_embed_sizes(train_dataset, params, num_categs_per_feature):
14
14
  embed_exponent = params["embed_exponent"]
15
15
  size_factor = params["embedding_size_factor"]
16
16
  embed_dims = [
17
- int(size_factor * max(2, min(max_embedding_dim, 1.6 * num_categs_per_feature[i] ** embed_exponent))) for i in range(len(num_categs_per_feature))
17
+ int(size_factor * max(2, min(max_embedding_dim, 1.6 * num_categs_per_feature[i] ** embed_exponent)))
18
+ for i in range(len(num_categs_per_feature))
18
19
  ]
19
20
  return embed_dims
20
21
 
@@ -39,11 +40,16 @@ def infer_y_range(y_vals, y_range_extend):
39
40
  def get_default_layers(problem_type, num_net_outputs, max_layer_width):
40
41
  """Default sizes for NN layers."""
41
42
  if problem_type == REGRESSION:
42
- default_layer_sizes = [256, 128] # overall network will have 4 layers. Input layer, 256-unit hidden layer, 128-unit hidden layer, output layer.
43
+ default_layer_sizes = [
44
+ 256,
45
+ 128,
46
+ ] # overall network will have 4 layers. Input layer, 256-unit hidden layer, 128-unit hidden layer, output layer.
43
47
  else:
44
48
  default_sizes = [256, 128] # will be scaled adaptively
45
49
  # base_size = max(1, min(num_net_outputs, 20)/2.0) # scale layer width based on number of classes
46
- base_size = max(1, min(num_net_outputs, 100) / 50) # TODO: Updated because it improved model quality and made training far faster
50
+ base_size = max(
51
+ 1, min(num_net_outputs, 100) / 50
52
+ ) # TODO: Updated because it improved model quality and made training far faster
47
53
  default_layer_sizes = [defaultsize * base_size for defaultsize in default_sizes]
48
54
  layer_expansion_factor = 1 # TODO: consider scaling based on num_rows, eg: layer_expansion_factor = 2-np.exp(-max(0,train_dataset.num_examples-10000))
49
55
  return [int(min(max_layer_width, layer_expansion_factor * defaultsize)) for defaultsize in default_layer_sizes]
@@ -51,8 +57,17 @@ def get_default_layers(problem_type, num_net_outputs, max_layer_width):
51
57
 
52
58
  def default_numeric_embed_dim(train_dataset, max_layer_width, first_layer_width):
53
59
  """Default embedding dimensionality for numeric features."""
54
- vector_dim = train_dataset.dataset._data[train_dataset.vectordata_index].shape[1] # total dimensionality of vector features
55
- prop_vector_features = train_dataset.num_vector_features() / float(train_dataset.num_features) # Fraction of features that are numeric
60
+ vector_dim = train_dataset.dataset._data[train_dataset.vectordata_index].shape[
61
+ 1
62
+ ] # total dimensionality of vector features
63
+ prop_vector_features = train_dataset.num_vector_features() / float(
64
+ train_dataset.num_features
65
+ ) # Fraction of features that are numeric
56
66
  min_numeric_embed_dim = 32
57
67
  max_numeric_embed_dim = max_layer_width
58
- return int(min(max_numeric_embed_dim, max(min_numeric_embed_dim, first_layer_width * prop_vector_features * np.log10(vector_dim + 10))))
68
+ return int(
69
+ min(
70
+ max_numeric_embed_dim,
71
+ max(min_numeric_embed_dim, first_layer_width * prop_vector_features * np.log10(vector_dim + 10)),
72
+ )
73
+ )
@@ -51,8 +51,12 @@ class CustomMetricCallback(TrainingCallback):
51
51
  """
52
52
 
53
53
  def __init__(self, scorers, eval_sets, problem_type, use_error=True):
54
- self.metrics = [learning_curve_func_generator(scorer, problem_type=problem_type, use_error=use_error) for scorer in scorers]
55
- self.eval_sets = [(name, DMatrix(eval_set[0], label=eval_set[1]), eval_set[1]) for name, eval_set in eval_sets.items()]
54
+ self.metrics = [
55
+ learning_curve_func_generator(scorer, problem_type=problem_type, use_error=use_error) for scorer in scorers
56
+ ]
57
+ self.eval_sets = [
58
+ (name, DMatrix(eval_set[0], label=eval_set[1]), eval_set[1]) for name, eval_set in eval_sets.items()
59
+ ]
56
60
 
57
61
  def after_iteration(self, model, epoch, evals_log):
58
62
  y_preds = [model.predict(eval_set[1]) for eval_set in self.eval_sets]
@@ -155,7 +159,9 @@ class EarlyStoppingCustom(EarlyStopping):
155
159
  logger.warning(
156
160
  f"Warning: Early stopped XGB model prior to optimal result to avoid OOM error. Please increase available memory to avoid subpar model quality.\n"
157
161
  )
158
- logger.warning(f"Early stopping. Best iteration is: \t[{model.attr('best_iteration')}]\t{model.attr('best_score')}")
162
+ logger.warning(
163
+ f"Early stopping. Best iteration is: \t[{model.attr('best_iteration')}]\t{model.attr('best_score')}"
164
+ )
159
165
  return True
160
166
  elif self.verbose and (model_size_memory_ratio > 0.25):
161
167
  logger.log(15, f"Available Memory: {available_mb} MB")
@@ -29,6 +29,7 @@ class XGBoostModel(AbstractModel):
29
29
 
30
30
  Hyperparameter options: https://xgboost.readthedocs.io/en/latest/parameter.html
31
31
  """
32
+
32
33
  ag_key = "XGB"
33
34
  ag_name = "XGBoost"
34
35
  ag_priority = 40
@@ -58,7 +59,9 @@ class XGBoostModel(AbstractModel):
58
59
 
59
60
  # Use specialized XGBoost metric if available (fast), otherwise use custom func generator
60
61
  def get_eval_metric(self):
61
- eval_metric = xgboost_utils.convert_ag_metric_to_xgbm(ag_metric_name=self.stopping_metric.name, problem_type=self.problem_type)
62
+ eval_metric = xgboost_utils.convert_ag_metric_to_xgbm(
63
+ ag_metric_name=self.stopping_metric.name, problem_type=self.problem_type
64
+ )
62
65
  if eval_metric is None:
63
66
  eval_metric = xgboost_utils.func_generator(metric=self.stopping_metric, problem_type=self.problem_type)
64
67
  return eval_metric
@@ -76,7 +79,20 @@ class XGBoostModel(AbstractModel):
76
79
 
77
80
  return X
78
81
 
79
- def _fit(self, X, y, X_val=None, y_val=None, time_limit=None, num_gpus=0, num_cpus=None, sample_weight=None, sample_weight_val=None, verbosity=2, **kwargs):
82
+ def _fit(
83
+ self,
84
+ X,
85
+ y,
86
+ X_val=None,
87
+ y_val=None,
88
+ time_limit=None,
89
+ num_gpus=0,
90
+ num_cpus=None,
91
+ sample_weight=None,
92
+ sample_weight_val=None,
93
+ verbosity=2,
94
+ **kwargs,
95
+ ):
80
96
  # TODO: utilize sample_weight_val in early-stopping if provided
81
97
  start_time = time.time()
82
98
  ag_params = self._get_ag_params()
@@ -123,7 +139,9 @@ class XGBoostModel(AbstractModel):
123
139
  params["eval_metric"] = eval_metric
124
140
  eval_metric_name = eval_metric.__name__ if not isinstance(eval_metric, str) else eval_metric
125
141
  else:
126
- eval_metric_name = params["eval_metric"].__name__ if not isinstance(params["eval_metric"], str) else params["eval_metric"]
142
+ eval_metric_name = (
143
+ params["eval_metric"].__name__ if not isinstance(params["eval_metric"], str) else params["eval_metric"]
144
+ )
127
145
 
128
146
  if X_val is None:
129
147
  early_stopping_rounds = None
@@ -133,7 +151,9 @@ class XGBoostModel(AbstractModel):
133
151
  eval_set["val"] = (X_val, y_val)
134
152
  early_stopping_rounds = ag_params.get("early_stop", "adaptive")
135
153
  if isinstance(early_stopping_rounds, (str, tuple, list)):
136
- early_stopping_rounds = self._get_early_stopping_rounds(num_rows_train=num_rows_train, strategy=early_stopping_rounds)
154
+ early_stopping_rounds = self._get_early_stopping_rounds(
155
+ num_rows_train=num_rows_train, strategy=early_stopping_rounds
156
+ )
137
157
 
138
158
  if generate_curves and eval_set is not None:
139
159
  scorers = ag_params.get("curve_metrics", [self.eval_metric])
@@ -161,7 +181,14 @@ class XGBoostModel(AbstractModel):
161
181
  if eval_set is not None and "callbacks" not in params:
162
182
  callbacks = []
163
183
  if generate_curves:
164
- callbacks.append(CustomMetricCallback(scorers=scorers, eval_sets=eval_set, problem_type=self.problem_type, use_error=use_curve_metric_error))
184
+ callbacks.append(
185
+ CustomMetricCallback(
186
+ scorers=scorers,
187
+ eval_sets=eval_set,
188
+ problem_type=self.problem_type,
189
+ use_error=use_curve_metric_error,
190
+ )
191
+ )
165
192
  if log_period is not None:
166
193
  callbacks.append(EvaluationMonitor(period=log_period))
167
194
 
@@ -254,7 +281,13 @@ class XGBoostModel(AbstractModel):
254
281
 
255
282
  def _estimate_memory_usage(self, X: pd.DataFrame, **kwargs) -> int:
256
283
  hyperparameters = self._get_model_params()
257
- return self.estimate_memory_usage_static(X=X, problem_type=self.problem_type, num_classes=self.num_classes, hyperparameters=hyperparameters, **kwargs)
284
+ return self.estimate_memory_usage_static(
285
+ X=X,
286
+ problem_type=self.problem_type,
287
+ num_classes=self.num_classes,
288
+ hyperparameters=hyperparameters,
289
+ **kwargs,
290
+ )
258
291
 
259
292
  @classmethod
260
293
  def _estimate_memory_usage_static(
@@ -267,9 +300,13 @@ class XGBoostModel(AbstractModel):
267
300
  ) -> int:
268
301
  if hyperparameters is None:
269
302
  hyperparameters = {}
270
- num_classes = num_classes if num_classes else 1 # self.num_classes could be None after initialization if it's a regression problem
303
+ num_classes = (
304
+ num_classes if num_classes else 1
305
+ ) # self.num_classes could be None after initialization if it's a regression problem
271
306
  data_mem_usage = get_approximate_df_mem_usage(X).sum()
272
- data_mem_usage_bytes = data_mem_usage * 7 + data_mem_usage / 4 * num_classes # TODO: Extremely crude approximation, can be vastly improved
307
+ data_mem_usage_bytes = (
308
+ data_mem_usage * 7 + data_mem_usage / 4 * num_classes
309
+ ) # TODO: Extremely crude approximation, can be vastly improved
273
310
 
274
311
  max_bin = hyperparameters.get("max_bin", 256)
275
312
  max_depth = hyperparameters.get("max_depth", 6)
@@ -299,14 +336,25 @@ class XGBoostModel(AbstractModel):
299
336
  mem_size_per_estimator = num_classes * max_depth * 500 # very rough estimate
300
337
  n_estimators = hyperparameters.get("n_estimators", 10000)
301
338
  n_estimators_min = min(n_estimators, 1000)
302
- mem_size_estimators = n_estimators_min * mem_size_per_estimator # memory estimate after fitting up to 1000 estimators
339
+ mem_size_estimators = (
340
+ n_estimators_min * mem_size_per_estimator
341
+ ) # memory estimate after fitting up to 1000 estimators
303
342
 
304
343
  approx_mem_size_req = data_mem_usage_bytes + histogram_mem_usage_bytes + mem_size_estimators
305
344
  return approx_mem_size_req
306
345
 
307
- def _validate_fit_memory_usage(self, mem_error_threshold: float = 1.0, mem_warning_threshold: float = 0.75, mem_size_threshold: int = 1e9, **kwargs):
346
+ def _validate_fit_memory_usage(
347
+ self,
348
+ mem_error_threshold: float = 1.0,
349
+ mem_warning_threshold: float = 0.75,
350
+ mem_size_threshold: int = 1e9,
351
+ **kwargs,
352
+ ):
308
353
  return super()._validate_fit_memory_usage(
309
- mem_error_threshold=mem_error_threshold, mem_warning_threshold=mem_warning_threshold, mem_size_threshold=mem_size_threshold, **kwargs
354
+ mem_error_threshold=mem_error_threshold,
355
+ mem_warning_threshold=mem_warning_threshold,
356
+ mem_size_threshold=mem_size_threshold,
357
+ **kwargs,
310
358
  )
311
359
 
312
360
  def get_minimum_resources(self, is_gpu_available=False):
@@ -9,6 +9,7 @@ class XTModel(RFModel):
9
9
  """
10
10
  Extra Trees model (scikit-learn): https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html#sklearn.ensemble.ExtraTreesClassifier
11
11
  """
12
+
12
13
  ag_key = "XT"
13
14
  ag_name = "ExtraTrees"
14
15
  ag_priority = 60
@@ -135,5 +135,7 @@ class InterpretableTabularPredictor(TabularPredictor):
135
135
  labels = data[self.label]
136
136
  data_transformed = self.transform_features(data=data, model=model)
137
137
  labels_transformed = self.transform_labels(labels=labels)
138
- cls, columns = imodels.explain_classification_errors(data_transformed, predictions, labels_transformed, print_rules=print_rules)
138
+ cls, columns = imodels.explain_classification_errors(
139
+ data_transformed, predictions, labels_transformed, print_rules=print_rules
140
+ )
139
141
  return cls