PyPI - autogluon.timeseries - Versions diffs - 1.3.2b20250712__py3-none-any.whl → 1.4.1b20251116__py3-none-any.whl - Mend

autogluon.timeseries 1.3.2b20250712py3-none-any.whl → 1.4.1b20251116py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

autogluon/timeseries/models/autogluon_tabular/mlforecast.py CHANGED Viewed

@@ -3,7 +3,7 @@ import logging
 import math
 import time
 import warnings
-from typing import Any, Callable, Collection, Dict, List, Optional, Tuple, Type, Union
+from typing import Any, Callable, Collection, Optional, Type, Union
 import numpy as np
 import pandas as pd
@@ -13,7 +13,7 @@ import autogluon.core as ag
 from autogluon.core.models import AbstractModel as AbstractTabularModel
 from autogluon.features import AutoMLPipelineFeatureGenerator
 from autogluon.tabular.registry import ag_model_registry
-from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TIMESTAMP, TimeSeriesDataFrame
+from autogluon.timeseries.dataset import TimeSeriesDataFrame
 from autogluon.timeseries.metrics.abstract import TimeSeriesScorer
 from autogluon.timeseries.metrics.utils import in_sample_squared_seasonal_error
 from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
@@ -36,7 +36,7 @@ class TabularModel(BaseEstimator):
     def __init__(self, model_class: Type[AbstractTabularModel], model_kwargs: Optional[dict] = None):
         self.model_class = model_class
         self.model_kwargs = {} if model_kwargs is None else model_kwargs
-        self.feature_pipeline = AutoMLPipelineFeatureGenerator()
+        self.feature_pipeline = AutoMLPipelineFeatureGenerator(verbosity=0)
     def fit(self, X: pd.DataFrame, y: pd.Series, X_val: pd.DataFrame, y_val: pd.Series, **kwargs):
         self.model = self.model_class(**self.model_kwargs)
@@ -68,7 +68,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
         path: Optional[str] = None,
         name: Optional[str] = None,
         eval_metric: Optional[Union[str, TimeSeriesScorer]] = None,
-        hyperparameters: Optional[Dict[str, Any]] = None,
+        hyperparameters: Optional[dict[str, Any]] = None,
         **kwargs,
     ):
         super().__init__(
@@ -86,12 +86,12 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
         self._sum_of_differences: int = 0  # number of time steps removed from each series by differencing
         self._max_ts_length: Optional[int] = None
         self._target_lags: np.ndarray
-        self._date_features: List[Callable]
+        self._date_features: list[Callable]
         self._mlf: MLForecast
         self._scaler: Optional[BaseTargetTransform] = None
         self._residuals_std_per_item: pd.Series
         self._train_target_median: Optional[float] = None
-        self._non_boolean_real_covariates: List[str] = []
+        self._non_boolean_real_covariates: list[str] = []
     def _initialize_transforms_and_regressor(self):
         super()._initialize_transforms_and_regressor()
@@ -99,7 +99,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
         self.target_scaler = None
     @property
-    def allowed_hyperparameters(self) -> List[str]:
+    def allowed_hyperparameters(self) -> list[str]:
         return super().allowed_hyperparameters + [
             "lags",
             "date_features",
@@ -117,10 +117,12 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
         known_covariates: Optional[TimeSeriesDataFrame] = None,
         is_train: bool = False,
         **kwargs,
-    ) -> Tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
+    ) -> tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
         if is_train:
             # All-NaN series are removed; partially-NaN series in train_data are handled inside _generate_train_val_dfs
-            all_nan_items = data.item_ids[data[self.target].isna().groupby(ITEMID, sort=False).all()]
+            all_nan_items = data.item_ids[
+                data[self.target].isna().groupby(TimeSeriesDataFrame.ITEMID, sort=False).all()
+            ]
             if len(all_nan_items):
                 data = data.query("item_id not in @all_nan_items")
         else:
@@ -130,32 +132,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
                 data[self.target] = data[self.target].fillna(value=self._train_target_median)
         return data, known_covariates
-    def _process_deprecated_hyperparameters(self, model_params: Dict[str, Any]) -> Dict[str, Any]:
-        if "tabular_hyperparameters" in model_params:
-            logger.warning(
-                f"Hyperparameter 'tabular_hyperparameters' for {self.name} is deprecated and will be removed in v1.5. "
-                "Please use 'model_name' to specify the tabular model alias and 'model_hyperparameters' "
-                "to provide the tabular model hyperparameters."
-            )
-            tabular_hyperparameters = model_params.pop("tabular_hyperparameters")
-            if len(tabular_hyperparameters) == 1:
-                # We can automatically convert the hyperparameters if only one model is used
-                model_params["model_name"] = list(tabular_hyperparameters.keys())[0]
-                model_params["model_hyperparameters"] = tabular_hyperparameters[model_params["model_name"]]
-            else:
-                raise ValueError(
-                    f"Provided 'tabular_hyperparameters' {tabular_hyperparameters} cannot be automatically converted "
-                    f"to the new 'model_name' and 'model_hyperparameters' API for {self.name}."
-                )
-        if "tabular_fit_kwargs" in model_params:
-            logger.warning(
-                f"Hyperparameters 'tabular_fit_kwargs' for {self.name} is deprecated and is ignored by the model. "
-                "Please use 'model_name' to specify the tabular model alias and 'model_hyperparameters' "
-                "to provide the tabular model hyperparameters."
-            )
-        return model_params
-    def _get_default_hyperparameters(self) -> Dict[str, Any]:
+    def _get_default_hyperparameters(self) -> dict[str, Any]:
         return {
             "max_num_items": 20_000,
             "max_num_samples": 1_000_000,
@@ -163,12 +140,12 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
             "model_hyperparameters": {},
         }
-    def _create_tabular_model(self, model_name: str, model_hyperparameters: Dict[str, Any]) -> TabularModel:
+    def _create_tabular_model(self, model_name: str, model_hyperparameters: dict[str, Any]) -> TabularModel:
         raise NotImplementedError
     def _get_mlforecast_init_args(
-        self, train_data: TimeSeriesDataFrame, model_params: Dict[str, Any]
-    ) -> Dict[str, Any]:
+        self, train_data: TimeSeriesDataFrame, model_params: dict[str, Any]
+    ) -> dict[str, Any]:
         from mlforecast.target_transforms import Differences
         from .transforms import MLForecastScaler
@@ -236,7 +213,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
     def _generate_train_val_dfs(
         self, data: TimeSeriesDataFrame, max_num_items: Optional[int] = None, max_num_samples: Optional[int] = None
-    ) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    ) -> tuple[pd.DataFrame, pd.DataFrame]:
         # Exclude items that are too short for chosen differences - otherwise exception will be raised
         if self._sum_of_differences > 0:
             ts_lengths = data.num_timesteps_per_item()
@@ -298,18 +275,28 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
         """
         # TODO: Add support for past_covariates
         selected_columns = self.covariate_metadata.known_covariates.copy()
-        column_name_mapping = {ITEMID: MLF_ITEMID, TIMESTAMP: MLF_TIMESTAMP}
+        column_name_mapping = {TimeSeriesDataFrame.ITEMID: MLF_ITEMID, TimeSeriesDataFrame.TIMESTAMP: MLF_TIMESTAMP}
         if include_target:
             selected_columns += [self.target]
             column_name_mapping[self.target] = MLF_TARGET
         df = pd.DataFrame(data)[selected_columns].reset_index()
         if static_features is not None:
-            df = pd.merge(df, static_features, how="left", on=ITEMID, suffixes=(None, "_static_feat"))
+            df = pd.merge(
+                df, static_features, how="left", on=TimeSeriesDataFrame.ITEMID, suffixes=(None, "_static_feat")
+            )
         for col in self._non_boolean_real_covariates:
             # Normalize non-boolean features using mean_abs scaling
-            df[f"__scaled_{col}"] = df[col] / df[col].abs().groupby(df[ITEMID]).mean().reindex(df[ITEMID]).values
+            df[f"__scaled_{col}"] = (
+                df[col]
+                / df[col]
+                .abs()
+                .groupby(df[TimeSeriesDataFrame.ITEMID])
+                .mean()
+                .reindex(df[TimeSeriesDataFrame.ITEMID])
+                .values
+            )
         # Convert float64 to float32 to reduce memory usage
         float64_cols = list(df.select_dtypes(include="float64"))
@@ -338,7 +325,6 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
             if not set(train_data[col].unique()) == set([0, 1]):
                 self._non_boolean_real_covariates.append(col)
         model_params = self.get_hyperparameters()
-        model_params = self._process_deprecated_hyperparameters(model_params)
         mlforecast_init_args = self._get_mlforecast_init_args(train_data, model_params)
         assert self.freq is not None
@@ -400,16 +386,16 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
         self,
         data: TimeSeriesDataFrame,
         known_covariates: Optional[TimeSeriesDataFrame] = None,
-    ) -> Tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame], Optional[TimeSeriesDataFrame]]:
+    ) -> tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame], Optional[TimeSeriesDataFrame]]:
         """Remove series that are too short for chosen differencing from data and generate naive forecast for them.
         Returns
         -------
-        data_long : TimeSeriesDataFrame
+        data_long
             Data containing only time series that are long enough for the model to predict.
-        known_covariates_long : TimeSeriesDataFrame or None
+        known_covariates_long
             Future known covariates containing only time series that are long enough for the model to predict.
-        forecast_for_short_series : TimeSeriesDataFrame or None
+        forecast_for_short_series
             Seasonal naive forecast for short series, if there are any in the dataset.
         """
         ts_lengths = data.num_timesteps_per_item()
@@ -468,12 +454,12 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
             predictions[str(q)] = predictions["mean"] + norm.ppf(q) * std_per_timestep.to_numpy()
         return predictions
-    def _more_tags(self) -> Dict[str, Any]:
+    def _more_tags(self) -> dict[str, Any]:
         return {"allow_nan": True, "can_refit_full": True}
 class DirectTabularModel(AbstractMLForecastModel):
-    """Predict all future time series values simultaneously using tabular regression models.
+    """Predict all future time series values simultaneously using a regression model from AutoGluon-Tabular.
     A single tabular model is used to forecast all future time series values using the following features:
@@ -493,35 +479,37 @@ class DirectTabularModel(AbstractMLForecastModel):
     Other Parameters
     ----------------
-    lags : List[int], default = None
+    lags : list[int], default = None
         Lags of the target that will be used as features for predictions. If None, will be determined automatically
         based on the frequency of the data.
-    date_features : List[Union[str, Callable]], default = None
+    date_features : list[Union[str, Callable]], default = None
         Features computed from the dates. Can be pandas date attributes or functions that will take the dates as input.
         If None, will be determined automatically based on the frequency of the data.
-    differences : List[int], default = []
+    differences : list[int], default = []
         Differences to take of the target before computing the features. These are restored at the forecasting step.
         Defaults to no differencing.
     target_scaler : {"standard", "mean_abs", "min_max", "robust", None}, default = "mean_abs"
         Scaling applied to each time series. Scaling is applied after differencing.
     model_name : str, default = "GBM"
-        Name of the tabular regression model. See `autogluon.tabular.registry.ag_model_registry` or
+        Name of the tabular regression model. See ``autogluon.tabular.registry.ag_model_registry`` or
         `the documentation <https://auto.gluon.ai/stable/api/autogluon.tabular.models.html>`_ for the list of available
         tabular models.
-    model_hyperparameters : Dict[str, Any], optional
+    model_hyperparameters : dict[str, Any], optional
         Hyperparameters passed to the tabular regression model.
     max_num_items : int or None, default = 20_000
         If not None, the model will randomly select this many time series for training and validation.
     max_num_samples : int or None, default = 1_000_000
-        If not None, training dataset passed to TabularPredictor will contain at most this many rows (starting from the
-        end of each time series).
+        If not None, training dataset passed to the tabular regression model will contain at most this many rows
+        (starting from the end of each time series).
     """
+    ag_priority = 85
     @property
     def is_quantile_model(self) -> bool:
         return self.eval_metric.needs_quantile
-    def get_hyperparameters(self) -> Dict[str, Any]:
+    def get_hyperparameters(self) -> dict[str, Any]:
         model_params = super().get_hyperparameters()
         # We don't set 'target_scaler' if user already provided 'scaler' to avoid overriding the user-provided value
         if "scaler" not in model_params:
@@ -610,12 +598,14 @@ class DirectTabularModel(AbstractMLForecastModel):
                 predictions, repeated_item_ids=predictions[MLF_ITEMID], past_target=data[self.target]
             )
         predictions_tsdf: TimeSeriesDataFrame = TimeSeriesDataFrame(
-            predictions.rename(columns={MLF_ITEMID: ITEMID, MLF_TIMESTAMP: TIMESTAMP})
+            predictions.rename(
+                columns={MLF_ITEMID: TimeSeriesDataFrame.ITEMID, MLF_TIMESTAMP: TimeSeriesDataFrame.TIMESTAMP}
+            )
         )
         if forecast_for_short_series is not None:
             predictions_tsdf = pd.concat([predictions_tsdf, forecast_for_short_series])  # type: ignore
-            predictions_tsdf = predictions_tsdf.reindex(original_item_id_order, level=ITEMID)
+            predictions_tsdf = predictions_tsdf.reindex(original_item_id_order, level=TimeSeriesDataFrame.ITEMID)
         return predictions_tsdf
@@ -632,7 +622,7 @@ class DirectTabularModel(AbstractMLForecastModel):
         column_order = ["mean"] + [col for col in predictions_df.columns if col != "mean"]
         return predictions_df[column_order]
-    def _create_tabular_model(self, model_name: str, model_hyperparameters: Dict[str, Any]) -> TabularModel:
+    def _create_tabular_model(self, model_name: str, model_hyperparameters: dict[str, Any]) -> TabularModel:
         model_class = ag_model_registry.key_to_cls(model_name)
         if self.is_quantile_model:
             problem_type = ag.constants.QUANTILE
@@ -654,16 +644,16 @@ class DirectTabularModel(AbstractMLForecastModel):
 class RecursiveTabularModel(AbstractMLForecastModel):
-    """Predict future time series values one by one using TabularPredictor from AutoGluon-Tabular.
+    """Predict future time series values one by one using a regression model from AutoGluon-Tabular.
-    A single TabularPredictor is used to forecast the future time series values using the following features:
+    A single tabular regression model is used to forecast the future time series values using the following features:
     - lag features (observed time series values) based on ``freq`` of the data
     - time features (e.g., day of the week) based on the timestamp of the measurement
     - known covariates (if available)
     - static features of each item (if available)
-    TabularPredictor will always be trained with ``"regression"`` problem type, and dummy quantiles will be
+    The tabular model will always be trained with ``"regression"`` problem type, and dummy quantiles will be
     obtained by assuming that the residuals follow zero-mean normal distribution.
     Based on the `mlforecast <https://github.com/Nixtla/mlforecast>`_ library.
@@ -671,34 +661,36 @@ class RecursiveTabularModel(AbstractMLForecastModel):
     Other Parameters
     ----------------
-    lags : List[int], default = None
+    lags : list[int], default = None
         Lags of the target that will be used as features for predictions. If None, will be determined automatically
         based on the frequency of the data.
-    date_features : List[Union[str, Callable]], default = None
+    date_features : list[Union[str, Callable]], default = None
         Features computed from the dates. Can be pandas date attributes or functions that will take the dates as input.
         If None, will be determined automatically based on the frequency of the data.
-    differences : List[int], default = None
+    differences : list[int], default = None
         Differences to take of the target before computing the features. These are restored at the forecasting step.
         If None, will be set to ``[seasonal_period]``, where seasonal_period is determined based on the data frequency.
     target_scaler : {"standard", "mean_abs", "min_max", "robust", None}, default = "standard"
         Scaling applied to each time series. Scaling is applied after differencing.
-    lag_transforms : Dict[int, List[Callable]], default = None
+    lag_transforms : dict[int, list[Callable]], default = None
         Dictionary mapping lag periods to transformation functions applied to lagged target values (e.g., rolling mean).
         See `MLForecast documentation <https://nixtlaverse.nixtla.io/mlforecast/lag_transforms.html>`_ for more details.
     model_name : str, default = "GBM"
-        Name of the tabular regression model. See `autogluon.tabular.registry.ag_model_registry` or
+        Name of the tabular regression model. See ``autogluon.tabular.registry.ag_model_registry`` or
         `the documentation <https://auto.gluon.ai/stable/api/autogluon.tabular.models.html>`_ for the list of available
         tabular models.
-    model_hyperparameters : Dict[str, Any], optional
+    model_hyperparameters : dict[str, Any], optional
         Hyperparameters passed to the tabular regression model.
     max_num_items : int or None, default = 20_000
         If not None, the model will randomly select this many time series for training and validation.
     max_num_samples : int or None, default = 1_000_000
-        If not None, training dataset passed to TabularPredictor will contain at most this many rows (starting from the
-        end of each time series).
+        If not None, training dataset passed to the tabular regression model will contain at most this many rows
+        (starting from the end of each time series).
     """
-    def get_hyperparameters(self) -> Dict[str, Any]:
+    ag_priority = 90
+    def get_hyperparameters(self) -> dict[str, Any]:
         model_params = super().get_hyperparameters()
         # We don't set 'target_scaler' if user already provided 'scaler' to avoid overriding the user-provided value
         if "scaler" not in model_params:
@@ -741,18 +733,22 @@ class RecursiveTabularModel(AbstractMLForecastModel):
                 X_df=X_df,
             )
         assert isinstance(raw_predictions, pd.DataFrame)
-        raw_predictions = raw_predictions.rename(columns={MLF_ITEMID: ITEMID, MLF_TIMESTAMP: TIMESTAMP})
+        raw_predictions = raw_predictions.rename(
+            columns={MLF_ITEMID: TimeSeriesDataFrame.ITEMID, MLF_TIMESTAMP: TimeSeriesDataFrame.TIMESTAMP}
+        )
         predictions: TimeSeriesDataFrame = TimeSeriesDataFrame(
             self._add_gaussian_quantiles(
-                raw_predictions, repeated_item_ids=raw_predictions[ITEMID], past_target=data[self.target]
+                raw_predictions,
+                repeated_item_ids=raw_predictions[TimeSeriesDataFrame.ITEMID],
+                past_target=data[self.target],
             )
         )
         if forecast_for_short_series is not None:
             predictions = pd.concat([predictions, forecast_for_short_series])  # type: ignore
-        return predictions.reindex(original_item_id_order, level=ITEMID)
+        return predictions.reindex(original_item_id_order, level=TimeSeriesDataFrame.ITEMID)
-    def _create_tabular_model(self, model_name: str, model_hyperparameters: Dict[str, Any]) -> TabularModel:
+    def _create_tabular_model(self, model_name: str, model_hyperparameters: dict[str, Any]) -> TabularModel:
         model_class = ag_model_registry.key_to_cls(model_name)
         return TabularModel(
             model_class=model_class,

autogluon.timeseries 1.3.2b20250712__py3-none-any.whl → 1.4.1b20251116__py3-none-any.whl

autogluon.timeseries 1.3.2b20250712py3-none-any.whl → 1.4.1b20251116py3-none-any.whl