PyPI - autogluon.tabular - Versions diffs - 1.5.0b20251228__py3-none-any.whl → 1.5.1b20260116__py3-none-any.whl - Mend

autogluon.tabular 1.5.0b20251228py3-none-any.whl → 1.5.1b20260116py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of autogluon.tabular might be problematic. Click here for more details.

Files changed (135) hide show

autogluon/tabular/learner/abstract_learner.py CHANGED Viewed

@@ -70,7 +70,10 @@ class AbstractTabularLearner(AbstractLearner):
         if isinstance(quantile_levels, Iterable):
             for quantile in quantile_levels:
                 if quantile <= 0.0 or quantile >= 1.0:
-                    raise ValueError("quantile values have to be non-negative and less than 1.0 (0.0 < q < 1.0). " "For example, 0.95 quantile = 95 percentile")
+                    raise ValueError(
+                        "quantile values have to be non-negative and less than 1.0 (0.0 < q < 1.0). "
+                        "For example, 0.95 quantile = 95 percentile"
+                    )
             quantile_levels = np.sort(np.array(quantile_levels))
         self.quantile_levels = quantile_levels
@@ -188,7 +191,11 @@ class AbstractTabularLearner(AbstractLearner):
                 X = self.transform_features(X)
             y_pred_proba = self.load_trainer().predict_proba(X, model=model)
         y_pred_proba = self._post_process_predict_proba(
-            y_pred_proba=y_pred_proba, as_pandas=as_pandas, index=X_index, as_multiclass=as_multiclass, inverse_transform=inverse_transform
+            y_pred_proba=y_pred_proba,
+            as_pandas=as_pandas,
+            index=X_index,
+            as_multiclass=as_multiclass,
+            inverse_transform=inverse_transform,
         )
         return y_pred_proba
@@ -206,11 +213,20 @@ class AbstractTabularLearner(AbstractLearner):
             decision_threshold = 0.5
         X_index = copy.deepcopy(X.index) if as_pandas else None
         y_pred_proba = self.predict_proba(
-            X=X, model=model, as_pandas=False, as_multiclass=False, inverse_transform=False, transform_features=transform_features
+            X=X,
+            model=model,
+            as_pandas=False,
+            as_multiclass=False,
+            inverse_transform=False,
+            transform_features=transform_features,
         )
         problem_type = self.label_cleaner.problem_type_transform or self.problem_type
-        y_pred = get_pred_from_proba(y_pred_proba=y_pred_proba, problem_type=problem_type, decision_threshold=decision_threshold)
-        y_pred = self._post_process_predict(y_pred=y_pred, as_pandas=as_pandas, index=X_index, inverse_transform=inverse_transform)
+        y_pred = get_pred_from_proba(
+            y_pred_proba=y_pred_proba, problem_type=problem_type, decision_threshold=decision_threshold
+        )
+        y_pred = self._post_process_predict(
+            y_pred=y_pred, as_pandas=as_pandas, index=X_index, inverse_transform=inverse_transform
+        )
         return y_pred
     def _post_process_predict(
@@ -242,7 +258,12 @@ class AbstractTabularLearner(AbstractLearner):
         return y_pred
     def _post_process_predict_proba(
-        self, y_pred_proba: np.ndarray, as_pandas: bool = True, index=None, as_multiclass: bool = True, inverse_transform: bool = True
+        self,
+        y_pred_proba: np.ndarray,
+        as_pandas: bool = True,
+        index=None,
+        as_multiclass: bool = True,
+        inverse_transform: bool = True,
     ):
         """
         Given internal prediction probabilities, post-process them to vend to user.
@@ -338,7 +359,11 @@ class AbstractTabularLearner(AbstractLearner):
         # Inverse Transform labels
         for m, pred_proba in predict_proba_dict.items():
             predict_proba_dict[m] = self._post_process_predict_proba(
-                y_pred_proba=pred_proba, as_pandas=as_pandas, as_multiclass=as_multiclass, index=X_index, inverse_transform=inverse_transform
+                y_pred_proba=pred_proba,
+                as_pandas=as_pandas,
+                as_multiclass=as_multiclass,
+                index=X_index,
+                inverse_transform=inverse_transform,
             )
         return predict_proba_dict
@@ -369,18 +394,29 @@ class AbstractTabularLearner(AbstractLearner):
         predict_dict = {}
         for m in predict_proba_dict:
             predict_dict[m] = self.get_pred_from_proba(
-                y_pred_proba=predict_proba_dict[m], decision_threshold=decision_threshold, inverse_transform=inverse_transform
+                y_pred_proba=predict_proba_dict[m],
+                decision_threshold=decision_threshold,
+                inverse_transform=inverse_transform,
             )
         return predict_dict
     def get_pred_from_proba(
-        self, y_pred_proba: np.ndarray | pd.DataFrame, decision_threshold: float | None = None, inverse_transform: bool = True
+        self,
+        y_pred_proba: np.ndarray | pd.DataFrame,
+        decision_threshold: float | None = None,
+        inverse_transform: bool = True,
     ) -> np.array | pd.Series:
         if isinstance(y_pred_proba, pd.DataFrame):
-            y_pred = get_pred_from_proba_df(y_pred_proba, problem_type=self.problem_type, decision_threshold=decision_threshold)
+            y_pred = get_pred_from_proba_df(
+                y_pred_proba, problem_type=self.problem_type, decision_threshold=decision_threshold
+            )
         else:
-            y_pred = get_pred_from_proba(y_pred_proba, problem_type=self.problem_type, decision_threshold=decision_threshold)
-            y_pred = self._post_process_predict(y_pred=y_pred, as_pandas=False, index=None, inverse_transform=inverse_transform)
+            y_pred = get_pred_from_proba(
+                y_pred_proba, problem_type=self.problem_type, decision_threshold=decision_threshold
+            )
+            y_pred = self._post_process_predict(
+                y_pred=y_pred, as_pandas=False, index=None, inverse_transform=inverse_transform
+            )
         return y_pred
     def _validate_fit_input(self, X: DataFrame, **kwargs):
@@ -398,7 +434,9 @@ class AbstractTabularLearner(AbstractLearner):
         Ensure that the label column is present in the training data
         """
         if self.label not in X.columns:
-            raise KeyError(f"Label column '{self.label}' is missing from training data. Training data columns: {list(X.columns)}")
+            raise KeyError(
+                f"Label column '{self.label}' is missing from training data. Training data columns: {list(X.columns)}"
+            )
     def _validate_sample_weight(self, X, X_val):
         if self.sample_weight is not None:
@@ -408,7 +446,9 @@ class AbstractTabularLearner(AbstractLearner):
                     prefix += " Warning: We do not recommend weight_evaluation=True with predefined sample weighting."
             else:
                 if self.sample_weight not in X.columns:
-                    raise KeyError(f"sample_weight column '{self.sample_weight}' is missing from training data. Training data columns: {list(X.columns)}")
+                    raise KeyError(
+                        f"sample_weight column '{self.sample_weight}' is missing from training data. Training data columns: {list(X.columns)}"
+                    )
                 weight_vals = X[self.sample_weight]
                 if weight_vals.isna().sum() > 0:
                     raise ValueError(f"Sample weights in column '{self.sample_weight}' cannot be nan")
@@ -417,8 +457,12 @@ class AbstractTabularLearner(AbstractLearner):
                 if weight_vals.min() < 0:
                     raise ValueError(f"Sample weights in column '{self.sample_weight}' must be nonnegative")
                 if self.weight_evaluation and X_val is not None and self.sample_weight not in X_val.columns:
-                    raise KeyError(f"sample_weight column '{self.sample_weight}' cannot be missing from validation data if weight_evaluation=True")
-                prefix = f"Values in column '{self.sample_weight}' used as sample weights instead of predictive features."
+                    raise KeyError(
+                        f"sample_weight column '{self.sample_weight}' cannot be missing from validation data if weight_evaluation=True"
+                    )
+                prefix = (
+                    f"Values in column '{self.sample_weight}' used as sample weights instead of predictive features."
+                )
             if self.weight_evaluation:
                 suffix = " Evaluation will report weighted metrics, so ensure same column exists in test data."
             else:
@@ -428,12 +472,18 @@ class AbstractTabularLearner(AbstractLearner):
     def _validate_groups(self, X, X_val):
         if self.groups is not None:
             if self.groups not in X.columns:
-                raise KeyError(f"groups column '{self.groups}' is missing from training data. Training data columns: {list(X.columns)}")
+                raise KeyError(
+                    f"groups column '{self.groups}' is missing from training data. Training data columns: {list(X.columns)}"
+                )
             groups_vals = X[self.groups]
             if len(groups_vals.unique()) < 2:
-                raise ValueError(f"Groups in column '{self.groups}' cannot have fewer than 2 unique values. Values: {list(groups_vals.unique())}")
+                raise ValueError(
+                    f"Groups in column '{self.groups}' cannot have fewer than 2 unique values. Values: {list(groups_vals.unique())}"
+                )
             if X_val is not None and self.groups in X_val.columns:
-                raise KeyError(f"groups column '{self.groups}' cannot be in validation data. Validation data columns: {list(X_val.columns)}")
+                raise KeyError(
+                    f"groups column '{self.groups}' cannot be in validation data. Validation data columns: {list(X_val.columns)}"
+                )
             logger.log(
                 20,
                 f"Values in column '{self.groups}' used as split folds instead of being automatically set. Bagged models will have {len(groups_vals.unique())} splits.",
@@ -534,7 +584,12 @@ class AbstractTabularLearner(AbstractLearner):
         set_refit_score_to_parent=False,
         display=False,
     ):
-        leaderboard_df = self.leaderboard(extra_info=extra_info, refit_full=refit_full, set_refit_score_to_parent=set_refit_score_to_parent, display=display)
+        leaderboard_df = self.leaderboard(
+            extra_info=extra_info,
+            refit_full=refit_full,
+            set_refit_score_to_parent=set_refit_score_to_parent,
+            display=display,
+        )
         if extra_metrics is None:
             extra_metrics = []
         if y is None:
@@ -559,14 +614,21 @@ class AbstractTabularLearner(AbstractLearner):
         all_trained_models = [m for m in all_trained_models if m in leaderboard_models]
         all_trained_models_can_infer = trainer.get_model_names(models=all_trained_models, can_infer=True)
         all_trained_models_original = all_trained_models.copy()
-        model_pred_proba_dict, pred_time_test_marginal = trainer.get_model_pred_proba_dict(X=X, models=all_trained_models_can_infer, record_pred_time=True)
+        model_pred_proba_dict, pred_time_test_marginal = trainer.get_model_pred_proba_dict(
+            X=X, models=all_trained_models_can_infer, record_pred_time=True
+        )
         if compute_oracle:
             pred_probas = list(model_pred_proba_dict.values())
             ensemble_selection = EnsembleSelection(
-                ensemble_size=100, problem_type=trainer.problem_type, metric=self.eval_metric, quantile_levels=self.quantile_levels
+                ensemble_size=100,
+                problem_type=trainer.problem_type,
+                metric=self.eval_metric,
+                quantile_levels=self.quantile_levels,
             )
-            ensemble_selection.fit(predictions=pred_probas, labels=y_internal, identifiers=None, sample_weight=w)  # TODO: Only fit non-nan
+            ensemble_selection.fit(
+                predictions=pred_probas, labels=y_internal, identifiers=None, sample_weight=w
+            )  # TODO: Only fit non-nan
             oracle_weights = ensemble_selection.weights_
             oracle_pred_time_start = time.time()
@@ -585,14 +647,20 @@ class AbstractTabularLearner(AbstractLearner):
                 scores[model_name] = np.nan
             else:
                 scores[model_name] = self.score_with_pred_proba(
-                    y_pred_proba_internal=y_pred_proba_internal, metric=self.eval_metric, decision_threshold=decision_threshold, **scoring_args
+                    y_pred_proba_internal=y_pred_proba_internal,
+                    metric=self.eval_metric,
+                    decision_threshold=decision_threshold,
+                    **scoring_args,
                 )
             for metric in extra_metrics:
                 metric = get_metric(metric, self.problem_type, "leaderboard_metric")
                 if metric.name not in extra_scores:
                     extra_scores[metric.name] = {}
                 extra_scores[metric.name][model_name] = self.score_with_pred_proba(
-                    y_pred_proba_internal=y_pred_proba_internal, metric=metric, decision_threshold=decision_threshold, **scoring_args
+                    y_pred_proba_internal=y_pred_proba_internal,
+                    metric=metric,
+                    decision_threshold=decision_threshold,
+                    **scoring_args,
                 )
         if extra_scores:
@@ -629,8 +697,6 @@ class AbstractTabularLearner(AbstractLearner):
                 pred_time_test[model] = None
                 pred_time_test_marginal[model] = None
-        logger.debug("Model scores:")
-        logger.debug(str(scores))
         model_names_final = list(scores.keys())
         df = pd.DataFrame(
             data={
@@ -645,7 +711,8 @@ class AbstractTabularLearner(AbstractLearner):
         df_merged = pd.merge(df, leaderboard_df, on="model", how="left")
         df_merged = df_merged.sort_values(
-            by=["score_test", "pred_time_test", "score_val", "pred_time_val", "model"], ascending=[False, True, False, True, False]
+            by=["score_test", "pred_time_test", "score_val", "pred_time_val", "model"],
+            ascending=[False, True, False, True, False],
         ).reset_index(drop=True)
         df_columns_lst = df_merged.columns.tolist()
         explicit_order = [
@@ -692,7 +759,9 @@ class AbstractTabularLearner(AbstractLearner):
         if metric.needs_pred or metric.needs_quantile:
             if self.problem_type == BINARY:
                 # Use 1 and 0, otherwise f1 can crash due to unknown pos_label.
-                y_pred = self.get_pred_from_proba(y_pred_proba_internal, decision_threshold=decision_threshold, inverse_transform=False)
+                y_pred = self.get_pred_from_proba(
+                    y_pred_proba_internal, decision_threshold=decision_threshold, inverse_transform=False
+                )
                 y_pred_proba = None
                 y_tmp = y_internal
             else:
@@ -777,7 +846,16 @@ class AbstractTabularLearner(AbstractLearner):
                     f"\n\t  Known classes: {self.class_labels}"
                 )
-    def evaluate_predictions(self, y_true, y_pred, sample_weight=None, decision_threshold=None, display=False, auxiliary_metrics=True, detailed_report=False):
+    def evaluate_predictions(
+        self,
+        y_true,
+        y_pred,
+        sample_weight=None,
+        decision_threshold=None,
+        display=False,
+        auxiliary_metrics=True,
+        detailed_report=False,
+    ):
         """Evaluate predictions. Does not support sample weights since this method reports a variety of metrics.
         Args:
             display (bool): Should we print which metric is being used as well as performance.
@@ -868,13 +946,18 @@ class AbstractTabularLearner(AbstractLearner):
             if isinstance(aux_metric, str):
                 aux_metric = get_metric(metric=aux_metric, problem_type=self.problem_type, metric_type="aux_metric")
             if not aux_metric.needs_pred and y_pred_proba_internal is None:
-                logger.log(15, f"Skipping {aux_metric.name} because no prediction probabilities are available to score.")
+                logger.log(
+                    15, f"Skipping {aux_metric.name} because no prediction probabilities are available to score."
+                )
                 continue
             if aux_metric.name not in performance_dict:
                 if y_pred_proba_internal is not None:
                     score = self.score_with_pred_proba(
-                        y_pred_proba_internal=y_pred_proba_internal, metric=aux_metric, decision_threshold=decision_threshold, **scoring_args
+                        y_pred_proba_internal=y_pred_proba_internal,
+                        metric=aux_metric,
+                        decision_threshold=decision_threshold,
+                        **scoring_args,
                     )
                 else:
                     score = self.score_with_pred(y_pred_internal=y_pred_internal, metric=aux_metric, **scoring_args)
@@ -885,7 +968,10 @@ class AbstractTabularLearner(AbstractLearner):
                 score_eval = performance_dict[self.eval_metric.name]
                 logger.log(20, f"Evaluation: {self.eval_metric.name} on test data: {score_eval}")
                 if not self.eval_metric.greater_is_better_internal:
-                    logger.log(20, f"\tNote: Scores are always higher_is_better. This metric score can be multiplied by -1 to get the metric value.")
+                    logger.log(
+                        20,
+                        f"\tNote: Scores are always higher_is_better. This metric score can be multiplied by -1 to get the metric value.",
+                    )
             logger.log(20, "Evaluations on test data:")
             logger.log(20, json.dumps(performance_dict, indent=4))
@@ -951,7 +1037,9 @@ class AbstractTabularLearner(AbstractLearner):
             if extra_metrics:
                 raise AssertionError("`extra_metrics` is only valid when data is specified.")
             trainer = self.load_trainer()
-            leaderboard = trainer.leaderboard(extra_info=extra_info, refit_full=refit_full, set_refit_score_to_parent=set_refit_score_to_parent)
+            leaderboard = trainer.leaderboard(
+                extra_info=extra_info, refit_full=refit_full, set_refit_score_to_parent=set_refit_score_to_parent
+            )
         if only_pareto_frontier:
             if "score_test" in leaderboard.columns and "pred_time_test" in leaderboard.columns:
                 score_col = "score_test"
@@ -959,7 +1047,9 @@ class AbstractTabularLearner(AbstractLearner):
             else:
                 score_col = "score_val"
                 inference_time_col = "pred_time_val"
-            leaderboard = get_leaderboard_pareto_frontier(leaderboard=leaderboard, score_col=score_col, inference_time_col=inference_time_col)
+            leaderboard = get_leaderboard_pareto_frontier(
+                leaderboard=leaderboard, score_col=score_col, inference_time_col=inference_time_col
+            )
         if score_format == "error":
             leaderboard.rename(
                 columns={
@@ -988,7 +1078,15 @@ class AbstractTabularLearner(AbstractLearner):
     # features: list of feature names that feature importances are calculated for and returned, specify None to get all feature importances.
     # feature_stage: Whether to compute feature importance on raw original features ('original'), transformed features ('transformed') or on the features used by the particular model ('transformed_model').
     def get_feature_importance(
-        self, model=None, X=None, y=None, features: list = None, feature_stage="original", subsample_size=5000, silent=False, **kwargs
+        self,
+        model=None,
+        X=None,
+        y=None,
+        features: list = None,
+        feature_stage="original",
+        subsample_size=5000,
+        silent=False,
+        **kwargs,
     ) -> DataFrame:
         valid_feature_stages = ["original", "transformed", "transformed_model"]
         if feature_stage not in valid_feature_stages:
@@ -1003,20 +1101,34 @@ class AbstractTabularLearner(AbstractLearner):
                 X = X.drop(columns=self.ignored_columns, errors="ignore")
             unused_features = [f for f in list(X.columns) if f not in self.features]
             if len(unused_features) > 0:
-                logger.log(30, f"These features in provided data are not utilized by the predictor and will be ignored: {unused_features}")
+                logger.log(
+                    30,
+                    f"These features in provided data are not utilized by the predictor and will be ignored: {unused_features}",
+                )
                 X = X.drop(columns=unused_features)
             if feature_stage == "original":
                 return trainer._get_feature_importance_raw(
-                    model=model, X=X, y=y, features=features, subsample_size=subsample_size, transform_func=self.transform_features, silent=silent, **kwargs
+                    model=model,
+                    X=X,
+                    y=y,
+                    features=features,
+                    subsample_size=subsample_size,
+                    transform_func=self.transform_features,
+                    silent=silent,
+                    **kwargs,
                 )
             X = self.transform_features(X)
         else:
             if feature_stage == "original":
-                raise AssertionError("Feature importance `dataset` cannot be None if `feature_stage=='original'`. A test dataset must be specified.")
+                raise AssertionError(
+                    "Feature importance `dataset` cannot be None if `feature_stage=='original'`. A test dataset must be specified."
+                )
             y = None
         raw = feature_stage == "transformed"
-        return trainer.get_feature_importance(X=X, y=y, model=model, features=features, raw=raw, subsample_size=subsample_size, silent=silent, **kwargs)
+        return trainer.get_feature_importance(
+            X=X, y=y, model=model, features=features, raw=raw, subsample_size=subsample_size, silent=silent, **kwargs
+        )
     @staticmethod
     def _remove_nan_label_rows(X, y):
@@ -1029,7 +1141,9 @@ class AbstractTabularLearner(AbstractLearner):
         problem_type = self._infer_problem_type(y, silent=silent)
         if problem_type == QUANTILE:
             if self.quantile_levels is None:
-                raise AssertionError(f"problem_type is inferred to be {QUANTILE}, yet quantile_levels is not specified.")
+                raise AssertionError(
+                    f"problem_type is inferred to be {QUANTILE}, yet quantile_levels is not specified."
+                )
         elif self.quantile_levels is not None:
             if problem_type == REGRESSION:
                 problem_type = QUANTILE
@@ -1073,7 +1187,11 @@ class AbstractTabularLearner(AbstractLearner):
     ):
         """See abstract_trainer.distill() for details."""
         if X is not None:
-            if (self.eval_metric is not None) and (self.eval_metric.name == "log_loss") and (self.problem_type == MULTICLASS):
+            if (
+                (self.eval_metric is not None)
+                and (self.eval_metric.name == "log_loss")
+                and (self.problem_type == MULTICLASS)
+            ):
                 X = augment_rare_classes(X, self.label, self.threshold)
             if y is None:
                 X, y = self.extract_label(X)

autogluon/tabular/learner/default_learner.py CHANGED Viewed

@@ -92,16 +92,27 @@ class DefaultLearner(AbstractTabularLearner):
             num_bag_folds = len(X[self.groups].unique())
         X_og = None if infer_limit_batch_size is None else X
         logger.log(20, "Preprocessing data ...")
-        X, y, X_val, y_val, X_test, y_test, X_unlabeled, holdout_frac, num_bag_folds, groups = self.general_data_processing(
-            X=X, X_val=X_val, X_test=X_test, X_unlabeled=X_unlabeled, holdout_frac=holdout_frac, num_bag_folds=num_bag_folds
+        X, y, X_val, y_val, X_test, y_test, X_unlabeled, holdout_frac, num_bag_folds, groups = (
+            self.general_data_processing(
+                X=X,
+                X_val=X_val,
+                X_test=X_test,
+                X_unlabeled=X_unlabeled,
+                holdout_frac=holdout_frac,
+                num_bag_folds=num_bag_folds,
+            )
         )
         if X_og is not None:
-            infer_limit = self._update_infer_limit(X=X_og, infer_limit_batch_size=infer_limit_batch_size, infer_limit=infer_limit)
+            infer_limit = self._update_infer_limit(
+                X=X_og, infer_limit_batch_size=infer_limit_batch_size, infer_limit=infer_limit
+            )
         self._post_X_rows = len(X)
         time_preprocessing_end = time.time()
         self._time_fit_preprocessing = time_preprocessing_end - time_preprocessing_start
-        logger.log(20, f"Data preprocessing and feature engineering runtime = {round(self._time_fit_preprocessing, 2)}s ...")
+        logger.log(
+            20, f"Data preprocessing and feature engineering runtime = {round(self._time_fit_preprocessing, 2)}s ..."
+        )
         if time_limit:
             time_limit_trainer = time_limit - self._time_fit_preprocessing
         else:
@@ -152,12 +163,18 @@ class DefaultLearner(AbstractTabularLearner):
         self._time_fit_total = time_end - time_preprocessing_start
         log_throughput = ""
         if trainer.model_best is not None:
-            predict_n_time_per_row = trainer.get_model_attribute_full(model=trainer.model_best, attribute="predict_n_time_per_row")
-            predict_n_size = trainer.get_model_attribute_full(model=trainer.model_best, attribute="predict_n_size", func=min)
+            predict_n_time_per_row = trainer.get_model_attribute_full(
+                model=trainer.model_best, attribute="predict_n_time_per_row"
+            )
+            predict_n_size = trainer.get_model_attribute_full(
+                model=trainer.model_best, attribute="predict_n_size", func=min
+            )
             if predict_n_time_per_row is not None and predict_n_size is not None:
-                log_throughput = f" | Estimated inference throughput: {1/(predict_n_time_per_row if predict_n_time_per_row else np.finfo(np.float16).eps):.1f} rows/s ({int(predict_n_size)} batch size)"
+                log_throughput = f" | Estimated inference throughput: {1 / (predict_n_time_per_row if predict_n_time_per_row else np.finfo(np.float16).eps):.1f} rows/s ({int(predict_n_size)} batch size)"
         logger.log(
-            20, f"AutoGluon training complete, total runtime = {round(self._time_fit_total, 2)}s ... Best model: {trainer.model_best}" f"{log_throughput}"
+            20,
+            f"AutoGluon training complete, total runtime = {round(self._time_fit_total, 2)}s ... Best model: {trainer.model_best}"
+            f"{log_throughput}",
         )
     def _update_infer_limit(self, X: DataFrame, *, infer_limit_batch_size: int, infer_limit: float = None):
@@ -172,7 +189,8 @@ class DefaultLearner(AbstractTabularLearner):
         self.preprocess_1_batch_size = infer_limit_batch_size
         preprocess_1_time_log, time_unit_preprocess_1_time = convert_time_in_s_to_log_friendly(self.preprocess_1_time)
         logger.log(
-            20, f"\t{round(preprocess_1_time_log, 3)}{time_unit_preprocess_1_time}\t= Feature Preprocessing Time (1 row | {infer_limit_batch_size} batch size)"
+            20,
+            f"\t{round(preprocess_1_time_log, 3)}{time_unit_preprocess_1_time}\t= Feature Preprocessing Time (1 row | {infer_limit_batch_size} batch size)",
         )
         if infer_limit is not None:
@@ -182,7 +200,7 @@ class DefaultLearner(AbstractTabularLearner):
             logger.log(
                 20,
-                f"\t\tFeature Preprocessing requires {round(self.preprocess_1_time/infer_limit*100, 2)}% "
+                f"\t\tFeature Preprocessing requires {round(self.preprocess_1_time / infer_limit * 100, 2)}% "
                 f"of the overall inference constraint ({infer_limit_log}{time_unit_infer_limit})\n"
                 f"\t\t{round(infer_limit_new_log, 3)}{time_unit_infer_limit_new} inference time budget remaining for models...",
             )
@@ -199,7 +217,13 @@ class DefaultLearner(AbstractTabularLearner):
     # TODO: Add default values to X_val, X_unlabeled, holdout_frac, and num_bag_folds
     def general_data_processing(
-        self, X: DataFrame, X_val: DataFrame = None, X_test: DataFrame = None, X_unlabeled: DataFrame = None, holdout_frac: float = 1, num_bag_folds: int = 0
+        self,
+        X: DataFrame,
+        X_val: DataFrame = None,
+        X_test: DataFrame = None,
+        X_unlabeled: DataFrame = None,
+        holdout_frac: float = 1,
+        num_bag_folds: int = 0,
     ):
         """General data processing steps used for all models."""
         X = self._check_for_non_finite_values(X, name="train", is_train=True)
@@ -231,7 +255,9 @@ class DefaultLearner(AbstractTabularLearner):
         self.cleaner = Cleaner.construct(problem_type=self.problem_type, label=self.label, threshold=self.threshold)
         X = self.cleaner.fit_transform(X)  # TODO: Consider merging cleaner into label_cleaner
         X, y = self.extract_label(X)
-        self.label_cleaner = LabelCleaner.construct(problem_type=self.problem_type, y=y, y_uncleaned=y_uncleaned, positive_class=self._positive_class)
+        self.label_cleaner = LabelCleaner.construct(
+            problem_type=self.problem_type, y=y, y_uncleaned=y_uncleaned, positive_class=self._positive_class
+        )
         y = self.label_cleaner.transform(y)
         X = self.set_predefined_weights(X, y)
         X, w = extract_column(X, self.sample_weight)
@@ -240,10 +266,20 @@ class DefaultLearner(AbstractTabularLearner):
             logger.log(20, f"Train Data Class Count: {self.label_cleaner.num_classes}")
         X_val, y_val, w_val, holdout_frac = self._apply_cleaner_transform(
-            X=X_val, y_uncleaned=y_uncleaned, holdout_frac=holdout_frac, holdout_frac_og=holdout_frac_og, name="val", is_test=False
+            X=X_val,
+            y_uncleaned=y_uncleaned,
+            holdout_frac=holdout_frac,
+            holdout_frac_og=holdout_frac_og,
+            name="val",
+            is_test=False,
         )
         X_test, y_test, w_test, _ = self._apply_cleaner_transform(
-            X=X_test, y_uncleaned=y_uncleaned, holdout_frac=holdout_frac, holdout_frac_og=holdout_frac_og, name="test", is_test=True
+            X=X_test,
+            y_uncleaned=y_uncleaned,
+            holdout_frac=holdout_frac,
+            holdout_frac_og=holdout_frac_og,
+            name="test",
+            is_test=True,
         )
         self._original_features = list(X.columns)
@@ -281,7 +317,9 @@ class DefaultLearner(AbstractTabularLearner):
             y_unlabeled = pd.Series(np.nan, index=X_unlabeled.index) if X_unlabeled is not None else None
             y_list = [y, y_val, y_test_super, y_unlabeled]
             y_super = pd.concat(y_list, ignore_index=True)
-            X_super = self.fit_transform_features(X_super, y_super, problem_type=self.label_cleaner.problem_type_transform, eval_metric=self.eval_metric)
+            X_super = self.fit_transform_features(
+                X_super, y_super, problem_type=self.label_cleaner.problem_type_transform, eval_metric=self.eval_metric
+            )
             if not transform_with_test and X_test is not None:
                 X_test = self.feature_generator.transform(X_test)
@@ -360,7 +398,13 @@ class DefaultLearner(AbstractTabularLearner):
         return X
     def _apply_cleaner_transform(
-        self, X: DataFrame, y_uncleaned: Series, holdout_frac: float | int, holdout_frac_og: float | int, name: str, is_test: bool = False
+        self,
+        X: DataFrame,
+        y_uncleaned: Series,
+        holdout_frac: float | int,
+        holdout_frac_og: float | int,
+        name: str,
+        is_test: bool = False,
     ) -> tuple[DataFrame, Series, Series | None, float | int]:
         if X is not None and self.label in X.columns:
             y_og = X[self.label]
@@ -387,7 +431,9 @@ class DefaultLearner(AbstractTabularLearner):
                     logger.warning(f"\t{name}   Class Dtype: {y_og.dtype}")
                     missing_classes = [c for c in val_classes if c not in train_classes]
                     logger.warning(f"\tClasses missing from Training Data: {missing_classes}")
-                logger.warning("############################################################################################################")
+                logger.warning(
+                    "############################################################################################################"
+                )
                 X = None
                 y = None
@@ -405,15 +451,23 @@ class DefaultLearner(AbstractTabularLearner):
         return X, y, w, holdout_frac
     def adjust_threshold_if_necessary(self, y, threshold, holdout_frac, num_bag_folds):
-        new_threshold, new_holdout_frac, new_num_bag_folds = self._adjust_threshold_if_necessary(y, threshold, holdout_frac, num_bag_folds)
+        new_threshold, new_holdout_frac, new_num_bag_folds = self._adjust_threshold_if_necessary(
+            y, threshold, holdout_frac, num_bag_folds
+        )
         if new_threshold != threshold:
             if new_threshold < threshold:
-                logger.warning(f"Warning: Updated label_count_threshold from {threshold} to {new_threshold} to avoid cutting too many classes.")
+                logger.warning(
+                    f"Warning: Updated label_count_threshold from {threshold} to {new_threshold} to avoid cutting too many classes."
+                )
         if new_holdout_frac != holdout_frac:
             if new_holdout_frac > holdout_frac:
-                logger.warning(f"Warning: Updated holdout_frac from {holdout_frac} to {new_holdout_frac} to avoid cutting too many classes.")
+                logger.warning(
+                    f"Warning: Updated holdout_frac from {holdout_frac} to {new_holdout_frac} to avoid cutting too many classes."
+                )
         if new_num_bag_folds != num_bag_folds:
-            logger.warning(f"Warning: Updated num_bag_folds from {num_bag_folds} to {new_num_bag_folds} to avoid cutting too many classes.")
+            logger.warning(
+                f"Warning: Updated num_bag_folds from {num_bag_folds} to {new_num_bag_folds} to avoid cutting too many classes."
+            )
         return new_threshold, new_holdout_frac, new_num_bag_folds
     def _adjust_threshold_if_necessary(self, y, threshold, holdout_frac, num_bag_folds):
@@ -462,7 +516,9 @@ class DefaultLearner(AbstractTabularLearner):
     def get_info(self, include_model_info=False, include_model_failures=False, **kwargs):
         learner_info = super().get_info(**kwargs)
         trainer = self.load_trainer()
-        trainer_info = trainer.get_info(include_model_info=include_model_info, include_model_failures=include_model_failures)
+        trainer_info = trainer.get_info(
+            include_model_info=include_model_info, include_model_failures=include_model_failures
+        )
         learner_info.update(
             {
                 "time_fit_preprocessing": self._time_fit_preprocessing,

autogluon/tabular/models/__init__.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from autogluon.core.models.abstract.abstract_model import AbstractModel
-from .tabprep.prep_lgb_model import PrepLGBModel
 from .automm.automm_model import MultiModalPredictorModel
 from .automm.ft_transformer import FTTransformerModel
 from .catboost.catboost_model import CatBoostModel
@@ -19,6 +18,7 @@ from .imodels.imodels_models import (
 from .knn.knn_model import KNNModel
 from .lgb.lgb_model import LGBModel
 from .lr.lr_model import LinearModel
+from .mitra.mitra_model import MitraModel
 from .realmlp.realmlp_model import RealMLPModel
 from .rf.rf_model import RFModel
 from .tabdpt.tabdpt_model import TabDPTModel
@@ -26,7 +26,7 @@ from .tabicl.tabicl_model import TabICLModel
 from .tabm.tabm_model import TabMModel
 from .tabpfnmix.tabpfnmix_model import TabPFNMixModel
 from .tabpfnv2.tabpfnv2_5_model import RealTabPFNv2Model, RealTabPFNv25Model
-from .mitra.mitra_model import MitraModel
+from .tabprep.prep_lgb_model import PrepLGBModel
 from .tabular_nn.torch.tabular_nn_torch import TabularNeuralNetTorchModel
 from .text_prediction.text_prediction_v1_model import TextPredictorModel
 from .xgboost.xgboost_model import XGBoostModel

autogluon/tabular/models/_utils/rapids_utils.py CHANGED Viewed

@@ -16,7 +16,9 @@ class RapidsModelMixin:
     def _get_default_resources(self):
         num_cpus, _ = super()._get_default_resources()
-        num_gpus = min(ResourceManager.get_gpu_count_torch(), 1)  # Use single gpu training by default. Consider revising it later.
+        num_gpus = min(
+            ResourceManager.get_gpu_count_torch(), 1
+        )  # Use single gpu training by default. Consider revising it later.
         return num_cpus, num_gpus
     def get_minimum_resources(self, is_gpu_available=False) -> Dict[str, int]:

autogluon/tabular/models/abstract/abstract_torch_model.py CHANGED Viewed

@@ -12,6 +12,7 @@ class AbstractTorchModel(AbstractModel):
     """
     .. versionadded:: 1.5.0
     """
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
         self.device = None
@@ -51,6 +52,7 @@ class AbstractTorchModel(AbstractModel):
     @classmethod
     def to_torch_device(cls, device: str):
         import torch
         return torch.device(device)
     def get_device(self) -> str:

autogluon.tabular 1.5.0b20251228__py3-none-any.whl → 1.5.1b20260116__py3-none-any.whl

Potentially problematic release.

autogluon.tabular 1.5.0b20251228py3-none-any.whl → 1.5.1b20260116py3-none-any.whl