PyPI - autogluon.tabular - Versions diffs - 1.5.1b20260105__py3-none-any.whl → 1.5.1b20260116__py3-none-any.whl - Mend

autogluon.tabular 1.5.1b20260105py3-none-any.whl → 1.5.1b20260116py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of autogluon.tabular might be problematic. Click here for more details.

Files changed (135) hide show

autogluon/tabular/models/fastainn/tabular_nn_fastai.py CHANGED Viewed

@@ -95,14 +95,17 @@ class NNFastAiTabularModel(AbstractModel):
         'early.stopping.min_delta': 0.0001,
         'early.stopping.patience': 10,
     """
     ag_key = "FASTAI"
     ag_name = "NeuralNetFastAI"
     ag_priority = 50
     # Increase priority for multiclass since neural networks
     # scale better than trees as a function of n_classes.
-    ag_priority_by_problem_type = MappingProxyType({
-        MULTICLASS: 95,
-    })
+    ag_priority_by_problem_type = MappingProxyType(
+        {
+            MULTICLASS: 95,
+        }
+    )
     seed_name = "random_seed"
     model_internals_file_name = "model-internals.pkl"
@@ -136,8 +139,15 @@ class NNFastAiTabularModel(AbstractModel):
         if self.problem_type in [REGRESSION, QUANTILE] and self.y_scaler is not None:
             y_norm = pd.Series(self.y_scaler.fit_transform(y.values.reshape(-1, 1)).reshape(-1))
-            y_val_norm = pd.Series(self.y_scaler.transform(y_val.values.reshape(-1, 1)).reshape(-1)) if y_val is not None else None
-            logger.log(0, f"Training with scaled targets: {self.y_scaler} - !!! NN training metric will be different from the final results !!!")
+            y_val_norm = (
+                pd.Series(self.y_scaler.transform(y_val.values.reshape(-1, 1)).reshape(-1))
+                if y_val is not None
+                else None
+            )
+            logger.log(
+                0,
+                f"Training with scaled targets: {self.y_scaler} - !!! NN training metric will be different from the final results !!!",
+            )
         else:
             y_norm = y
             y_val_norm = y_val
@@ -170,14 +180,20 @@ class NNFastAiTabularModel(AbstractModel):
                 unique_vals = X[self.cont_columns].nunique()
                 self.cont_columns = [c for c in self.cont_columns if unique_vals[c] > 1]
             if self.cont_columns:
-                self._cont_normalization = (np.array(X[self.cont_columns].mean()), np.array(X[self.cont_columns].std()))
+                self._cont_normalization = (
+                    np.array(X[self.cont_columns].mean()),
+                    np.array(X[self.cont_columns].std()),
+                )
             num_cat_cols_og = len(self.cat_columns)
             if self.cat_columns:
                 try:
                     X_stats = X[self.cat_columns].describe(include="all").T.reset_index()
                     cat_cols_to_drop = list(
-                        X_stats[(X_stats["unique"] > self.params.get("max_unique_categorical_values", 10000)) | (X_stats["unique"].isna())]["index"].values
+                        X_stats[
+                            (X_stats["unique"] > self.params.get("max_unique_categorical_values", 10000))
+                            | (X_stats["unique"].isna())
+                        ]["index"].values
                     )
                 except:
                     cat_cols_to_drop = []
@@ -187,7 +203,9 @@ class NNFastAiTabularModel(AbstractModel):
             num_cat_cols_use = len(self.cat_columns)
             logger.log(15, f"Using {num_cat_cols_use}/{num_cat_cols_og} categorical features")
-            nullable_numeric_features = self._feature_metadata.get_features(valid_raw_types=[R_FLOAT, R_DATETIME], invalid_special_types=[S_TEXT_SPECIAL])
+            nullable_numeric_features = self._feature_metadata.get_features(
+                valid_raw_types=[R_FLOAT, R_DATETIME], invalid_special_types=[S_TEXT_SPECIAL]
+            )
             self.columns_fills = dict()
             self._columns_fills_names = nullable_numeric_features
             for c in self._columns_fills_names:  # No need to do this for int features, int can't have null
@@ -227,7 +245,9 @@ class NNFastAiTabularModel(AbstractModel):
             df = df.copy()
         return df
-    def _fit(self, X, y, X_val=None, y_val=None, time_limit=None, num_cpus=None, num_gpus=0, sample_weight=None, **kwargs):
+    def _fit(
+        self, X, y, X_val=None, y_val=None, time_limit=None, num_cpus=None, num_gpus=0, sample_weight=None, **kwargs
+    ):
         try_import_fastai()
         import torch
         from fastai import torch_core
@@ -240,7 +260,10 @@ class NNFastAiTabularModel(AbstractModel):
         torch.set_num_threads(num_cpus)
         start_time = time.time()
         if sample_weight is not None:  # TODO: support
-            logger.log(15, "sample_weight not yet supported for NNFastAiTabularModel, this model will ignore them in training.")
+            logger.log(
+                15,
+                "sample_weight not yet supported for NNFastAiTabularModel, this model will ignore them in training.",
+            )
         params = self._get_model_params()
         self._num_cpus_infer = params.pop("_num_cpus_infer", 1)
@@ -341,13 +364,19 @@ class NNFastAiTabularModel(AbstractModel):
         fname = "model"
         save_callback = AgSaveModelCallback(
-            monitor=objective_func_name_to_monitor, comp=objective_optim_mode, fname=fname, best_epoch_stop=best_epoch_stop, with_opt=True
+            monitor=objective_func_name_to_monitor,
+            comp=objective_optim_mode,
+            fname=fname,
+            best_epoch_stop=best_epoch_stop,
+            with_opt=True,
         )
         if time_limit is not None:
             time_elapsed = time.time() - start_time
             time_left = time_limit - time_elapsed
-            if time_left <= time_limit * 0.7:  # if 30% of time was spent preprocessing, likely not enough time to train model
+            if (
+                time_left <= time_limit * 0.7
+            ):  # if 30% of time was spent preprocessing, likely not enough time to train model
                 raise TimeLimitExceeded
         else:
             time_left = None
@@ -371,7 +400,12 @@ class NNFastAiTabularModel(AbstractModel):
                     self.model.path = Path(temp_dir)
                     len_val = len(X_val) if X_val is not None else 0
-                    epochs = self._get_epochs_number(samples_num=len(X) + len_val, epochs=params["epochs"], batch_size=batch_size, time_left=time_left)
+                    epochs = self._get_epochs_number(
+                        samples_num=len(X) + len_val,
+                        epochs=params["epochs"],
+                        batch_size=batch_size,
+                        time_left=time_left,
+                    )
                     if epochs == 0:
                         # Stop early if there is not enough time to train a full epoch
                         raise TimeLimitExceeded
@@ -474,7 +508,9 @@ class NNFastAiTabularModel(AbstractModel):
                 objective_func_name = "pinball_loss"
             else:
                 objective_func_name = "log_loss"
-            logger.warning(f"Metric {stopping_metric.name} is not supported by this model - using {objective_func_name} instead")
+            logger.warning(
+                f"Metric {stopping_metric.name} is not supported by this model - using {objective_func_name} instead"
+            )
         nn_metric = metrics_map.get(objective_func_name, None)
@@ -482,7 +518,11 @@ class NNFastAiTabularModel(AbstractModel):
     def __get_objective_func_to_monitor(self, objective_func_name):
         monitor_obj_func = {
-            **{k: m.name if hasattr(m, "name") else m.__name__ for k, m in self.__get_metrics_map().items() if m is not None},
+            **{
+                k: m.name if hasattr(m, "name") else m.__name__
+                for k, m in self.__get_metrics_map().items()
+                if m is not None
+            },
             "log_loss": "valid_loss",
         }
         objective_func_name_to_monitor = objective_func_name
@@ -534,13 +574,14 @@ class NNFastAiTabularModel(AbstractModel):
         self.model = __model
         # Export model
         if self._load_model:
-            save_pkl.save_with_fn(self._model_internals_path, self.model, pickle_fn=lambda m, buffer: export(m, buffer), verbose=verbose)
+            save_pkl.save_with_fn(
+                self._model_internals_path, self.model, pickle_fn=lambda m, buffer: export(m, buffer), verbose=verbose
+            )
         self._load_model = None
         return path
     @classmethod
     def load(cls, path: str, reset_paths=True, verbose=True):
         from fastai.learner import load_learner
         model = super().load(path, reset_paths=reset_paths, verbose=verbose)
@@ -627,7 +668,13 @@ class NNFastAiTabularModel(AbstractModel):
     def _estimate_memory_usage(self, X: pd.DataFrame, **kwargs) -> int:
         hyperparameters = self._get_model_params()
-        return self.estimate_memory_usage_static(X=X, problem_type=self.problem_type, num_classes=self.num_classes, hyperparameters=hyperparameters, **kwargs)
+        return self.estimate_memory_usage_static(
+            X=X,
+            problem_type=self.problem_type,
+            num_classes=self.num_classes,
+            hyperparameters=hyperparameters,
+            **kwargs,
+        )
     @classmethod
     def _estimate_memory_usage_static(

autogluon/tabular/models/fasttext/fasttext_model.py CHANGED Viewed

@@ -79,7 +79,9 @@ class FastTextModel(AbstractModel):
                 params["verbose"] = 2
         if sample_weight is not None:
-            logger.log(15, "sample_weight not yet supported for FastTextModel, this model will ignore them in training.")
+            logger.log(
+                15, "sample_weight not yet supported for FastTextModel, this model will ignore them in training."
+            )
         X = self.preprocess(X)

autogluon/tabular/models/image_prediction/image_predictor.py CHANGED Viewed

@@ -24,6 +24,7 @@ class ImagePredictorModel(MultiModalPredictorModel):
     Additionally has special null image handling to improve performance in the presence of null images (aka image path of '')
         Note: null handling has not been compared to the built-in null handling of MultimodalPredictor yet.
     """
     ag_key = "AG_IMAGE_NN"
     ag_name = "ImagePredictor"
@@ -61,14 +62,18 @@ class ImagePredictorModel(MultiModalPredictorModel):
         X, y, X_val, y_val = super().preprocess_fit(X=X, y=y, X_val=X_val, y_val=y_val, **kwargs)
         X_features = list(X.columns)
         if len(X_features) != 1:
-            raise AssertionError(f"ImagePredictorModel only supports one image feature, but {len(X_features)} were given: {X_features}")
+            raise AssertionError(
+                f"ImagePredictorModel only supports one image feature, but {len(X_features)} were given: {X_features}"
+            )
         self._image_col_name = X_features[0]
         null_indices = X[self._image_col_name] == ""
         # TODO: Consider some kind of weighting of the two options so there isn't a harsh cutoff at 50
         # FIXME: What if all rows in a class are null? Will probably crash.
         if null_indices.sum() > 50:
-            self._dummy_pred_proba = self._compute_dummy_pred_proba(y[null_indices])  # FIXME: Do this one for better results
+            self._dummy_pred_proba = self._compute_dummy_pred_proba(
+                y[null_indices]
+            )  # FIXME: Do this one for better results
         else:
             # Not enough null to get a confident estimate of null label average, instead use all data average
             self._dummy_pred_proba = self._compute_dummy_pred_proba(y)

autogluon/tabular/models/knn/knn_model.py CHANGED Viewed

@@ -24,6 +24,7 @@ class KNNModel(AbstractModel):
     """
     KNearestNeighbors model (scikit-learn): https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html
     """
     ag_key = "KNN"
     ag_name = "KNeighbors"
     ag_priority = 100
@@ -106,11 +107,19 @@ class KNNModel(AbstractModel):
         if time_limit is None or num_rows_max <= 10000:
             self.model = self._get_model_type()(**params).fit(X, y)
         else:
-            self.model = self._fit_with_samples(X=X, y=y, model_params=params, time_limit=time_limit - (time.time() - time_start))
+            self.model = self._fit_with_samples(
+                X=X, y=y, model_params=params, time_limit=time_limit - (time.time() - time_start)
+            )
     def _estimate_memory_usage(self, X: pd.DataFrame, **kwargs) -> int:
         hyperparameters = self._get_model_params()
-        return self.estimate_memory_usage_static(X=X, problem_type=self.problem_type, num_classes=self.num_classes, hyperparameters=hyperparameters, **kwargs)
+        return self.estimate_memory_usage_static(
+            X=X,
+            problem_type=self.problem_type,
+            num_classes=self.num_classes,
+            hyperparameters=hyperparameters,
+            **kwargs,
+        )
     @classmethod
     def _estimate_memory_usage_static(
@@ -120,12 +129,23 @@ class KNNModel(AbstractModel):
         **kwargs,
     ) -> int:
         model_size_bytes = 4 * X.shape[0] * X.shape[1]  # Assuming float32 types
-        expected_final_model_size_bytes = int(model_size_bytes * 3.6)  # Roughly what can be expected of the final KNN model in memory size
+        expected_final_model_size_bytes = int(
+            model_size_bytes * 3.6
+        )  # Roughly what can be expected of the final KNN model in memory size
         return expected_final_model_size_bytes
-    def _validate_fit_memory_usage(self, mem_error_threshold: float = 0.2, mem_warning_threshold: float = 0.15, mem_size_threshold: int = 1e7, **kwargs):
+    def _validate_fit_memory_usage(
+        self,
+        mem_error_threshold: float = 0.2,
+        mem_warning_threshold: float = 0.15,
+        mem_size_threshold: int = 1e7,
+        **kwargs,
+    ):
         return super()._validate_fit_memory_usage(
-            mem_error_threshold=mem_error_threshold, mem_warning_threshold=mem_warning_threshold, mem_size_threshold=mem_size_threshold, **kwargs
+            mem_error_threshold=mem_error_threshold,
+            mem_warning_threshold=mem_warning_threshold,
+            mem_size_threshold=mem_size_threshold,
+            **kwargs,
         )
     # TODO: Won't work for RAPIDS without modification
@@ -167,7 +187,17 @@ class KNNModel(AbstractModel):
         return y_oof_pred_proba
     # TODO: Consider making this fully generic and available to all models
-    def _fit_with_samples(self, X, y, model_params, time_limit, start_samples=10000, max_samples=None, sample_growth_factor=2, sample_time_growth_factor=8):
+    def _fit_with_samples(
+        self,
+        X,
+        y,
+        model_params,
+        time_limit,
+        start_samples=10000,
+        max_samples=None,
+        sample_growth_factor=2,
+        sample_time_growth_factor=8,
+    ):
         """
         Fit model with samples of the data repeatedly, gradually increasing the amount of data until time_limit is reached or all data is used.
@@ -243,11 +273,14 @@ class KNNModel(AbstractModel):
             time_limit_left = time_limit - (time_fit_end_sample - time_start)
             time_fit_sample = time_limit_left_prior - time_limit_left
             time_required_for_next = time_fit_sample * sample_time_growth_factor
-            logger.log(15, f"\t{round(time_fit_sample, 2)}s \t= Train Time (Using {samples}/{num_rows_max} rows) ({round(time_limit_left, 2)}s remaining time)")
+            logger.log(
+                15,
+                f"\t{round(time_fit_sample, 2)}s \t= Train Time (Using {samples}/{num_rows_max} rows) ({round(time_limit_left, 2)}s remaining time)",
+            )
             if time_required_for_next > time_limit_left and i != len(num_rows_samples) - 1:
                 logger.log(
                     20,
-                    f"\tNot enough time to train KNN model on all training rows. Fit {samples}/{num_rows_max} rows. (Training KNN model on {num_rows_samples[i+1]} rows is expected to take {round(time_required_for_next, 2)}s)",
+                    f"\tNot enough time to train KNN model on all training rows. Fit {samples}/{num_rows_max} rows. (Training KNN model on {num_rows_samples[i + 1]} rows is expected to take {round(time_required_for_next, 2)}s)",
                 )
                 break
         if idx is not None:

autogluon/tabular/models/lgb/callbacks.py CHANGED Viewed

@@ -74,12 +74,15 @@ def early_stopping_custom(
     def _init(env):
         if not ignore_dart_warning:
-            enabled[0] = not any((boost_alias in env.params and env.params[boost_alias] == "dart") for boost_alias in ("boosting", "boosting_type", "boost"))
+            enabled[0] = not any(
+                (boost_alias in env.params and env.params[boost_alias] == "dart")
+                for boost_alias in ("boosting", "boosting_type", "boost")
+            )
         if not enabled[0]:
             warnings.warn("Early stopping is not available in dart mode")
             return
         if not env.evaluation_result_list:
-            raise ValueError("For early stopping, " "at least one dataset and eval metric is required for evaluation")
+            raise ValueError("For early stopping, at least one dataset and eval metric is required for evaluation")
         if verbose:
             msg = "Training until validation scores don't improve for {} rounds."
@@ -179,7 +182,9 @@ def early_stopping_custom(
         if not enabled[0]:
             return
         if train_loss_name is not None:
-            train_loss_evals = [eval for eval in env.evaluation_result_list if eval[0] == "train_set" and eval[1] == train_loss_name]
+            train_loss_evals = [
+                eval for eval in env.evaluation_result_list if eval[0] == "train_set" and eval[1] == train_loss_name
+            ]
             train_loss_val = train_loss_evals[0][2]
         else:
             train_loss_val = 0.0
@@ -194,7 +199,9 @@ def early_stopping_custom(
                 best_score_list[i] = env.evaluation_result_list
                 best_trainloss[i] = train_loss_val
             if reporter is not None:  # Report current best scores for iteration, used in HPO
-                if i == indices_to_check[0]:  # TODO: documentation needs to note that we assume 0th index is the 'official' validation performance metric.
+                if (
+                    i == indices_to_check[0]
+                ):  # TODO: documentation needs to note that we assume 0th index is the 'official' validation performance metric.
                     if cmp_op[i] == gt:
                         validation_perf = score
                     else:
@@ -214,7 +221,10 @@ def early_stopping_custom(
                     logger.log(
                         15,
                         "Early stopping, best iteration is:\n[%d]\t%s"
-                        % (best_iter[i] + 1, "\t".join([_format_eval_result(x, show_stdv=False) for x in best_score_list[i]])),
+                        % (
+                            best_iter[i] + 1,
+                            "\t".join([_format_eval_result(x, show_stdv=False) for x in best_score_list[i]]),
+                        ),
                     )
                 raise EarlyStopException(best_iter[i], best_score_list[i])
             elif (max_diff is not None) and (abs(score - best_score[i]) > max_diff):
@@ -224,7 +234,10 @@ def early_stopping_custom(
                     logger.log(
                         15,
                         "Early stopping, best iteration is:\n[%d]\t%s"
-                        % (best_iter[i] + 1, "\t".join([_format_eval_result(x, show_stdv=False) for x in best_score_list[i]])),
+                        % (
+                            best_iter[i] + 1,
+                            "\t".join([_format_eval_result(x, show_stdv=False) for x in best_score_list[i]]),
+                        ),
                     )
                 raise EarlyStopException(best_iter[i], best_score_list[i])
             if env.iteration == env.end_iteration - 1:
@@ -232,7 +245,10 @@ def early_stopping_custom(
                     logger.log(
                         15,
                         "Did not meet early stopping criterion. Best iteration is:\n[%d]\t%s"
-                        % (best_iter[i] + 1, "\t".join([_format_eval_result(x, show_stdv=False) for x in best_score_list[i]])),
+                        % (
+                            best_iter[i] + 1,
+                            "\t".join([_format_eval_result(x, show_stdv=False) for x in best_score_list[i]]),
+                        ),
                     )
                 raise EarlyStopException(best_iter[i], best_score_list[i])
             if verbose:
@@ -243,7 +259,10 @@ def early_stopping_custom(
                 logger.log(
                     20,
                     "Found manual stop file, early stopping. Best iteration is:\n[%d]\t%s"
-                    % (best_iter[i] + 1, "\t".join([_format_eval_result(x, show_stdv=False) for x in best_score_list[i]])),
+                    % (
+                        best_iter[i] + 1,
+                        "\t".join([_format_eval_result(x, show_stdv=False) for x in best_score_list[i]]),
+                    ),
                 )
                 raise EarlyStopException(best_iter[i], best_score_list[i])
         if time_limit:
@@ -255,7 +274,11 @@ def early_stopping_custom(
                     20,
                     "\tRan out of time, early stopping on iteration "
                     + str(env.iteration + 1)
-                    + ". Best iteration is:\n\t[%d]\t%s" % (best_iter[i] + 1, "\t".join([_format_eval_result(x, show_stdv=False) for x in best_score_list[i]])),
+                    + ". Best iteration is:\n\t[%d]\t%s"
+                    % (
+                        best_iter[i] + 1,
+                        "\t".join([_format_eval_result(x, show_stdv=False) for x in best_score_list[i]]),
+                    ),
                 )
                 raise EarlyStopException(best_iter[i], best_score_list[i])

autogluon/tabular/models/lgb/hyperparameters/searchspaces.py CHANGED Viewed

@@ -19,7 +19,9 @@ def get_searchspace_multiclass_baseline():
     params = {
         "learning_rate": space.Real(lower=5e-3, upper=0.2, default=0.05, log=True),
         "feature_fraction": space.Real(lower=0.75, upper=1.0, default=1.0),
-        "min_data_in_leaf": space.Int(lower=2, upper=60, default=20),  # TODO: Use size of dataset to set upper, if row count is small upper should be small
+        "min_data_in_leaf": space.Int(
+            lower=2, upper=60, default=20
+        ),  # TODO: Use size of dataset to set upper, if row count is small upper should be small
         "num_leaves": space.Int(
             lower=16, upper=96, default=31
         ),  # TODO: Use row count and feature count to set this, the higher feature count the higher num_leaves upper

autogluon.tabular 1.5.1b20260105__py3-none-any.whl → 1.5.1b20260116__py3-none-any.whl

Potentially problematic release.

autogluon.tabular 1.5.1b20260105py3-none-any.whl → 1.5.1b20260116py3-none-any.whl