PyPI - autogluon.timeseries - Versions diffs - 0.8.3b20230817__py3-none-any.whl → 0.8.3b20230819__py3-none-any.whl - Mend

autogluon.timeseries 0.8.3b20230817py3-none-any.whl → 0.8.3b20230819py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (23) hide show

autogluon/timeseries/dataset/ts_dataframe.py CHANGED Viewed

@@ -4,7 +4,7 @@ import copy
 import itertools
 import logging
 from collections.abc import Iterable
-from typing import Any, List, Optional, Tuple, Type
+from typing import Any, List, Optional, Tuple, Type, Union
 import numpy as np
 import pandas as pd
@@ -12,6 +12,7 @@ from joblib.parallel import Parallel, delayed
 from pandas.core.internals import ArrayManager, BlockManager
 from autogluon.common.loaders import load_pd
+from autogluon.common.utils.deprecated_utils import Deprecated
 logger = logging.getLogger(__name__)
@@ -322,7 +323,6 @@ class TimeSeriesDataFrame(pd.DataFrame):
         id_column: Optional[str] = None,
         timestamp_column: Optional[str] = None,
     ) -> pd.DataFrame:
         df = df.copy()
         if id_column is not None:
             assert id_column in df.columns, f"Column '{id_column}' not found!"
@@ -497,7 +497,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
         Examples
         --------
-        >>> print(ts_dataframe)
+        >>> ts_df
                             target
         item_id timestamp
         0       2019-01-01       0
@@ -582,7 +582,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
         Returns
         -------
-        ts_df: TimeSeriesDataFrame
+        ts_df : TimeSeriesDataFrame
             A new time series dataframe containing entries of the original time series between start and end timestamps.
         """
@@ -596,7 +596,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
         )
     @classmethod
-    def from_pickle(cls, filepath_or_buffer: Any) -> "TimeSeriesDataFrame":
+    def from_pickle(cls, filepath_or_buffer: Any) -> TimeSeriesDataFrame:
         """Convenience method to read pickled time series data frames. If the read pickle
         file refers to a plain pandas DataFrame, it will be cast to a TimeSeriesDataFrame.
@@ -607,7 +607,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
         Returns
         -------
-        ts_df: TimeSeriesDataFrame
+        ts_df : TimeSeriesDataFrame
             The pickled time series data frame.
         """
         try:
@@ -616,6 +616,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
         except Exception as err:  # noqa
             raise IOError(f"Could not load pickled data set due to error: {str(err)}")
+    @Deprecated(min_version_to_warn="0.9", min_version_to_error="1.0")
     def get_reindexed_view(self, freq: str = "S") -> TimeSeriesDataFrame:
         """Returns a new TimeSeriesDataFrame object with the same underlying data and
         static features as the current data frame, except the time index is replaced by
@@ -649,7 +650,8 @@ class TimeSeriesDataFrame(pd.DataFrame):
         return df_view
-    def to_regular_index(self, freq: str) -> "TimeSeriesDataFrame":
+    @Deprecated(min_version_to_warn="0.9", min_version_to_error="1.0", new="convert_frequency")
+    def to_regular_index(self, freq: str) -> TimeSeriesDataFrame:
         """Fill the gaps in an irregularly-sampled time series with NaNs.
         Parameters
@@ -659,7 +661,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
         Examples
         --------
-        >>> print(ts_dataframe)
+        >>> ts_df
                             target
         item_id timestamp
         0       2019-01-01     NaN
@@ -669,7 +671,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
         1       2019-02-04     3.0
                 2019-02-07     4.0
-        >>> print(ts_dataframe.to_regular_index(freq="D"))
+        >>> ts_df.to_regular_index(freq="D")
                             target
         item_id timestamp
         0       2019-01-01     NaN
@@ -685,30 +687,9 @@ class TimeSeriesDataFrame(pd.DataFrame):
                 2019-02-07     4.0
         """
-        if self.freq is not None:
-            if self.freq != freq:
-                raise ValueError(
-                    f"TimeSeriesDataFrame already has a regular index with freq '{self.freq}' "
-                    f"that cannot be converted to the given freq '{freq}'"
-                )
-            else:
-                return self
-        filled_series = []
-        for item_id, time_series in self.groupby(level=ITEMID, sort=False):
-            time_series = time_series.droplevel(ITEMID)
-            timestamps = time_series.index
-            resampled_ts = time_series.resample(freq).asfreq()
-            if not timestamps.isin(resampled_ts.index).all():
-                raise ValueError(
-                    f"Irregularly-sampled timestamps in this TimeSeriesDataFrame are not compatible "
-                    f"with the given frequency '{freq}'"
-                )
-            filled_series.append(pd.concat({item_id: resampled_ts}, names=[ITEMID]))
+        return self.convert_frequency(freq=freq)
-        return TimeSeriesDataFrame(pd.concat(filled_series), static_features=self.static_features)
-    def fill_missing_values(self, method: str = "auto", value: float = 0.0) -> "TimeSeriesDataFrame":
+    def fill_missing_values(self, method: str = "auto", value: float = 0.0) -> TimeSeriesDataFrame:
         """Fill missing values represented by NaN.
         Parameters
@@ -726,7 +707,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
         Examples
         --------
-        >>> print(ts_dataframe)
+        >>> ts_df
                             target
         item_id timestamp
         0       2019-01-01     NaN
@@ -741,7 +722,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
                 2019-02-06     NaN
                 2019-02-07     4.0
-        >>> print(ts_dataframe.fill_missing_values(method="auto"))
+        >>> ts_df.fill_missing_values(method="auto")
                             target
         item_id timestamp
         0       2019-01-01     1.0
@@ -765,7 +746,9 @@ class TimeSeriesDataFrame(pd.DataFrame):
         grouped_df = pd.DataFrame(self).groupby(level=ITEMID, sort=False, group_keys=False)
         if method == "auto":
-            filled_df = grouped_df.fillna(method="ffill").fillna(method="bfill")
+            filled_df = grouped_df.fillna(method="ffill")
+            # Fill missing values at the start of each time series with bfill
+            filled_df = filled_df.groupby(level=ITEMID, sort=False, group_keys=False).fillna(method="bfill")
         elif method in ["ffill", "pad"]:
             filled_df = grouped_df.fillna(method="ffill")
         elif method in ["bfill", "backfill"]:
@@ -782,13 +765,14 @@ class TimeSeriesDataFrame(pd.DataFrame):
             )
         return TimeSeriesDataFrame(filled_df, static_features=self.static_features)
-    def dropna(self, how: str = "any") -> "TimeSeriesDataFrame":
+    def dropna(self, how: str = "any") -> TimeSeriesDataFrame:
         """Drop rows containing NaNs.
         Parameters
         ----------
         how : {"any", "all"}, default = "any"
             Determine if row or column is removed from TimeSeriesDataFrame, when we have at least one NaN or all NaN.
             - "any" : If any NaN values are present, drop that row or column.
             - "all" : If all values are NaN, drop that row or column.
         """
@@ -864,3 +848,104 @@ class TimeSeriesDataFrame(pd.DataFrame):
                     data.static_features.index = data.static_features.index.astype(str)
                     data.static_features.index += suffix
         return train_data, test_data
+    def convert_frequency(
+        self,
+        freq: Union[str, pd.DateOffset],
+        agg_numeric: str = "mean",
+        agg_categorical: str = "first",
+        **kwargs,
+    ) -> TimeSeriesDataFrame:
+        """Convert each time series in the data frame to the given frequency.
+        This method is useful for two purposes:
+        1. Converting an irregularly-sampled time series to a regular time index.
+        2. Aggregating time series data by downsampling (e.g., convert daily sales into weekly sales)
+        Parameters
+        ----------
+        freq : Union[str, pd.DateOffset]
+            Frequency to which the data should be converted. See [pandas frequency aliases](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases)
+            for supported values.
+        agg_numeric : {"max", "min", "sum", "mean", "median", "first", "last"}, default = "mean"
+            Aggregation method applied to numeric columns.
+        agg_categorical : {"first", "last"}, default = "first"
+            Aggregation method applied to categorical columns.
+        **kwargs
+            Additional keywords arguments that will be passed to ``pandas.DataFrameGroupBy.resample``.
+        Returns
+        -------
+        ts_df : TimeSeriesDataFrame
+            A new time series dataframe with time series resampled at the new frequency. Output may contain missing
+            values represented by ``NaN`` if original data does not have information for the given period.
+        Examples
+        --------
+        Convert irregularly-sampled time series data to a regular index
+        >>> ts_df
+                            target
+        item_id timestamp
+        0       2019-01-01     NaN
+                2019-01-03     1.0
+                2019-01-06     2.0
+                2019-01-07     NaN
+        1       2019-02-04     3.0
+                2019-02-07     4.0
+        >>> ts_df.convert_frequency(freq="D")
+                            target
+        item_id timestamp
+        0       2019-01-01     NaN
+                2019-01-02     NaN
+                2019-01-03     1.0
+                2019-01-04     NaN
+                2019-01-05     NaN
+                2019-01-06     2.0
+                2019-01-07     NaN
+        1       2019-02-04     3.0
+                2019-02-05     NaN
+                2019-02-06     NaN
+                2019-02-07     4.0
+        Downsample quarterly data to yearly frequency
+        >>> ts_df
+                            target
+        item_id timestamp
+        0       2020-03-31     1.0
+                2020-06-30     2.0
+                2020-09-30     3.0
+                2020-12-31     4.0
+                2021-03-31     5.0
+                2021-06-30     6.0
+                2021-09-30     7.0
+                2021-12-31     8.0
+        >>> ts_df.convert_frequency("Y")
+                            target
+        item_id timestamp
+        0       2020-12-31     2.5
+                2021-12-31     6.5
+        >>> ts_df.convert_frequency("Y", agg_numeric="sum")
+                            target
+        item_id timestamp
+        0       2020-12-31    10.0
+                2021-12-31    26.0
+        """
+        if self.freq == pd.tseries.frequencies.to_offset(freq).freqstr:
+            return self
+        # We need to aggregate categorical columns separately because .agg("mean") deletes all non-numeric columns
+        aggregation = {}
+        for col in self.columns:
+            if pd.api.types.is_numeric_dtype(self.dtypes[col]):
+                aggregation[col] = agg_numeric
+            else:
+                aggregation[col] = agg_categorical
+        resampled_df = TimeSeriesDataFrame(
+            self.groupby(level=ITEMID, sort=False).resample(freq, level=TIMESTAMP, **kwargs).agg(aggregation)
+        )
+        resampled_df.static_features = self.static_features
+        return resampled_df

autogluon/timeseries/learner.py CHANGED Viewed

@@ -1,15 +1,13 @@
 import logging
 import time
-from typing import Any, Dict, List, Optional, Tuple, Type, Union
+from typing import Any, Dict, List, Optional, Type, Union
-import numpy as np
 import pandas as pd
 from autogluon.core.learner import AbstractLearner
-from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TimeSeriesDataFrame
+from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
 from autogluon.timeseries.evaluator import TimeSeriesEvaluator
 from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
-from autogluon.timeseries.splitter import AbstractTimeSeriesSplitter, LastWindowSplitter
 from autogluon.timeseries.trainer import AbstractTimeSeriesTrainer, AutoTimeSeriesTrainer
 from autogluon.timeseries.utils.features import TimeSeriesFeatureGenerator
 from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe
@@ -31,7 +29,6 @@ class TimeSeriesLearner(AbstractLearner):
         eval_metric: Optional[str] = None,
         eval_metric_seasonal_period: Optional[int] = None,
         prediction_length: int = 1,
-        ignore_time_index: bool = False,
         cache_predictions: bool = True,
         **kwargs,
     ):
@@ -42,11 +39,7 @@ class TimeSeriesLearner(AbstractLearner):
         self.target = target
         self.known_covariates_names = [] if known_covariates_names is None else known_covariates_names
         self.prediction_length = prediction_length
-        self.quantile_levels = kwargs.get(
-            "quantile_levels",
-            kwargs.get("quantiles", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
-        )
-        self.ignore_time_index = ignore_time_index
+        self.quantile_levels = kwargs.get("quantile_levels", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
         self.cache_predictions = cache_predictions
         self.feature_generator = TimeSeriesFeatureGenerator(
@@ -160,28 +153,13 @@ class TimeSeriesLearner(AbstractLearner):
             )
         forecast_index = get_forecast_horizon_index_ts_dataframe(data, prediction_length=self.prediction_length)
-        if self.ignore_time_index:
-            logger.warning(
-                "Because `ignore_time_index=True`, the predictor will ignore the time index of `known_covariates`. "
-                "Please make sure that `known_covariates` contain only the future values of the known covariates "
-                "(and the past values are not included)."
+        try:
+            known_covariates = known_covariates.loc[forecast_index]
+        except KeyError:
+            raise ValueError(
+                f"known_covariates should include the values for prediction_length={self.prediction_length} "
+                "many time steps into the future."
             )
-            known_covariates = known_covariates.loc[forecast_index.unique(level=ITEMID)]
-            if (known_covariates.num_timesteps_per_item() < self.prediction_length).any():
-                raise ValueError(
-                    f"known_covariates should include the values for prediction_length={self.prediction_length} "
-                    "many time steps into the future."
-                )
-            known_covariates = known_covariates.slice_by_timestep(None, self.prediction_length)
-            known_covariates.index = forecast_index
-        else:
-            try:
-                known_covariates = known_covariates.loc[forecast_index]
-            except KeyError:
-                raise ValueError(
-                    f"known_covariates should include the values for prediction_length={self.prediction_length} "
-                    "many time steps into the future."
-                )
         return known_covariates
     def predict(

autogluon/timeseries/models/__init__.py CHANGED Viewed

@@ -4,9 +4,12 @@ from .local import (
     ARIMAModel,
     AutoARIMAModel,
     AutoETSModel,
+    AverageModel,
     DynamicOptimizedThetaModel,
     ETSModel,
     NaiveModel,
+    NPTSModel,
+    SeasonalAverageModel,
     SeasonalNaiveModel,
     ThetaModel,
     ThetaStatsmodelsModel,
@@ -24,6 +27,7 @@ __all__ = [
     "DirectTabularModel",
     "RecursiveTabularModel",
     "NaiveModel",
+    "NPTSModel",
     "SeasonalNaiveModel",
     "AutoETSModel",
     "AutoARIMAModel",

autogluon/timeseries/models/abstract/abstract_timeseries_model.py CHANGED Viewed

@@ -105,10 +105,7 @@ class AbstractTimeSeriesModel(AbstractModel):
         self.freq: str = freq
         self.prediction_length: int = prediction_length
-        self.quantile_levels = kwargs.get(
-            "quantile_levels",
-            kwargs.get("quantiles", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
-        )
+        self.quantile_levels = kwargs.get("quantile_levels", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
         self._oof_predictions: Optional[TimeSeriesDataFrame] = None
     def __repr__(self) -> str:

autogluon/timeseries/models/local/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import joblib.externals.loky
-from .naive import NaiveModel, SeasonalNaiveModel
+from .naive import AverageModel, NaiveModel, SeasonalAverageModel, SeasonalNaiveModel
+from .npts import NPTSModel
 from .statsforecast import AutoARIMAModel, AutoETSModel, DynamicOptimizedThetaModel, ThetaModel
 from .statsmodels import ARIMAModel, ETSModel, ThetaStatsmodelsModel

autogluon/timeseries/models/local/abstract_local_model.py CHANGED Viewed

@@ -34,6 +34,9 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
         Argument that can be passed to the underlying local model.
     default_n_jobs : Union[int, float]
         Default number of CPU cores used to train models. If float, this fraction of CPU cores will be used.
+    default_max_ts_length : Optional[int]
+        If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
+        This significantly speeds up fitting and usually leads to no change in accuracy.
     init_time_in_seconds : int
         Time that it takes to initialize the model in seconds (e.g., because of JIT compilation by Numba).
         If time_limit is below this number, model won't be trained.
@@ -41,6 +44,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
     allowed_local_model_args: List[str] = []
     default_n_jobs: Union[int, float] = AG_DEFAULT_N_JOBS
+    default_max_ts_length: Optional[int] = 2500
     init_time_in_seconds: int = 0
     def __init__(
@@ -65,7 +69,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
             raise ValueError(f"n_jobs must be a float between 0 and 1 or an integer (received n_jobs = {n_jobs})")
         # Default values, potentially overridden inside _fit()
         self.use_fallback_model = hyperparameters.pop("use_fallback_model", True)
-        self.max_ts_length = hyperparameters.pop("max_ts_length", 2500)
+        self.max_ts_length = hyperparameters.pop("max_ts_length", self.default_max_ts_length)
         super().__init__(
             path=path,

autogluon/timeseries/models/local/naive.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from typing import Callable
 import numpy as np
 import pandas as pd
@@ -11,6 +13,13 @@ class NaiveModel(AbstractLocalModel):
     estimated from the empirical distribution of the residuals.
     As described in https://otexts.com/fpp3/prediction-intervals.html
+    Other Parameters
+    ----------------
+    n_jobs : int or float, default = 0.5
+        Number of CPU cores used to fit the models in parallel.
+        When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
+        When set to a positive integer, that many cores are used.
+        When set to -1, all CPU cores are used.
     """
     allowed_local_model_args = ["seasonal_period"]
@@ -45,6 +54,11 @@ class SeasonalNaiveModel(AbstractLocalModel):
         specified manually by providing an integer > 1.
         If seasonal_period (inferred or provided) is equal to 1, will fall back to Naive forecast.
         Seasonality will also be disabled, if the length of the time series is < seasonal_period.
+    n_jobs : int or float, default = 0.5
+        Number of CPU cores used to fit the models in parallel.
+        When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
+        When set to a positive integer, that many cores are used.
+        When set to -1, all CPU cores are used.
     """
     allowed_local_model_args = ["seasonal_period"]
@@ -60,3 +74,91 @@ class SeasonalNaiveModel(AbstractLocalModel):
             quantile_levels=self.quantile_levels,
             seasonal_period=local_model_args["seasonal_period"],
         )
+def _get_quantile_function(q: float) -> Callable:
+    """Returns a function with name "q" that computes the q'th quantile of a pandas.Series."""
+    def quantile_fn(x: pd.Series) -> pd.Series:
+        return x.quantile(q)
+    quantile_fn.__name__ = str(q)
+    return quantile_fn
+class AverageModel(AbstractLocalModel):
+    """Baseline model that sets the forecast equal to the historic average or quantile.
+    Other Parameters
+    ----------------
+    n_jobs : int or float, default = 0.5
+        Number of CPU cores used to fit the models in parallel.
+        When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
+        When set to a positive integer, that many cores are used.
+        When set to -1, all CPU cores are used.
+    max_ts_length : Optional[int], default = None
+        If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
+        This significantly speeds up fitting and usually leads to no change in accuracy.
+    """
+    allowed_local_model_args = ["seasonal_period"]
+    default_max_ts_length = None
+    def _predict_with_local_model(
+        self,
+        time_series: pd.Series,
+        local_model_args: dict,
+    ) -> pd.DataFrame:
+        agg_functions = ["mean"] + [_get_quantile_function(q) for q in self.quantile_levels]
+        stats_marginal = time_series.agg(agg_functions)
+        stats_repeated = np.tile(stats_marginal.values, [self.prediction_length, 1])
+        return pd.DataFrame(stats_repeated, columns=stats_marginal.index)
+class SeasonalAverageModel(AbstractLocalModel):
+    """Baseline model that sets the forecast equal to the historic average or quantile in the same season.
+    Other Parameters
+    ----------------
+    seasonal_period : int or None, default = None
+        Number of time steps in a complete seasonal cycle for seasonal models. For example, 7 for daily data with a
+        weekly cycle or 12 for monthly data with an annual cycle.
+        When set to None, seasonal_period will be inferred from the frequency of the training data. Can also be
+        specified manually by providing an integer > 1.
+        If seasonal_period (inferred or provided) is equal to 1, will fall back to Naive forecast.
+        Seasonality will also be disabled, if the length of the time series is < seasonal_period.
+    n_jobs : int or float, default = 0.5
+        Number of CPU cores used to fit the models in parallel.
+        When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
+        When set to a positive integer, that many cores are used.
+        When set to -1, all CPU cores are used.
+    max_ts_length : Optional[int], default = None
+        If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
+        This significantly speeds up fitting and usually leads to no change in accuracy.
+    """
+    allowed_local_model_args = ["seasonal_period"]
+    default_max_ts_length = None
+    def _predict_with_local_model(
+        self,
+        time_series: pd.Series,
+        local_model_args: dict,
+    ) -> pd.DataFrame:
+        seasonal_period = local_model_args["seasonal_period"]
+        agg_functions = ["mean"] + [_get_quantile_function(q) for q in self.quantile_levels]
+        # Compute mean & quantiles for each season
+        ts_df = time_series.reset_index(drop=True).to_frame()
+        ts_df["season"] = ts_df.index % seasonal_period
+        stats_per_season = ts_df.groupby("season")[self.target].agg(agg_functions)
+        next_season = ts_df["season"].iloc[-1] + 1
+        season_in_forecast_horizon = np.arange(next_season, next_season + self.prediction_length) % seasonal_period
+        result = stats_per_season.reindex(season_in_forecast_horizon)
+        if np.any(result.isna().values):
+            # Use statistics over all timesteps to fill values for seasons that are missing from training data
+            stats_marginal = time_series.agg(agg_functions)
+            result = result.fillna(stats_marginal)
+        return result

autogluon/timeseries/models/local/npts.py ADDED Viewed

@@ -0,0 +1,59 @@
+import pandas as pd
+from autogluon.timeseries.models.local.abstract_local_model import AbstractLocalModel
+class NPTSModel(AbstractLocalModel):
+    """Non-Parametric Time Series Forecaster.
+    This models is especially well suited for forecasting sparse or intermittent time series with many zero values.
+    Based on `gluonts.model.npts.NPTSPredictor <https://ts.gluon.ai/stable/api/gluonts/gluonts.model.npts.html>`_.
+    See GluonTS documentation for more information about the model.
+    Other Parameters
+    ----------------
+    kernel_type : {"exponential", "uniform"}, default = "exponential"
+        Kernel used by the model.
+    exp_kernel_weights : float, default = 1.0
+        Scaling factor used in the exponential kernel.
+    use_seasonal_variant : bool, default = True
+        Whether to use the seasonal variant of the model.
+    n_jobs : int or float, default = 0.5
+        Number of CPU cores used to fit the models in parallel.
+        When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
+        When set to a positive integer, that many cores are used.
+        When set to -1, all CPU cores are used.
+    max_ts_length : Optional[int], default = 2500
+        If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
+        This significantly speeds up fitting and usually leads to no change in accuracy.
+    """
+    allowed_local_model_args = [
+        "kernel_type",
+        "exp_kernel_weights",
+        "use_seasonal_model",
+        "seasonal_period",
+    ]
+    def _predict_with_local_model(
+        self,
+        time_series: pd.Series,
+        local_model_args: dict,
+    ) -> pd.DataFrame:
+        from gluonts.model.npts import NPTSPredictor
+        local_model_args.pop("seasonal_period")
+        predictor = NPTSPredictor(
+            freq=self.freq,
+            prediction_length=self.prediction_length,
+            **local_model_args,
+        )
+        ts = time_series.copy(deep=False)
+        ts.index = ts.index.to_period()
+        forecast = predictor.predict_time_series(ts, num_samples=100)
+        forecast_dict = {"mean": forecast.mean}
+        for q in self.quantile_levels:
+            forecast_dict[str(q)] = forecast.quantile(q)
+        return pd.DataFrame(forecast_dict)

autogluon/timeseries/models/local/statsforecast.py CHANGED Viewed

@@ -255,8 +255,6 @@ class ThetaModel(AbstractStatsForecastModel):
         This significantly speeds up fitting and usually leads to no change in accuracy.
     """
-    max_ts_length = 3000
     allowed_local_model_args = [
         "decomposition_type",
         "seasonal_period",

autogluon/timeseries/models/presets.py CHANGED Viewed

@@ -4,7 +4,6 @@ import re
 from collections import defaultdict
 from typing import Any, Dict, List, Optional, Type, Union
-import autogluon.timeseries as agts
 from autogluon.common import space
 from autogluon.core import constants
@@ -12,14 +11,17 @@ from . import (
     ARIMAModel,
     AutoARIMAModel,
     AutoETSModel,
+    AverageModel,
     DeepARModel,
     DirectTabularModel,
     DLinearModel,
     DynamicOptimizedThetaModel,
     ETSModel,
     NaiveModel,
+    NPTSModel,
     PatchTSTModel,
     RecursiveTabularModel,
+    SeasonalAverageModel,
     SeasonalNaiveModel,
     SimpleFeedForwardModel,
     TemporalFusionTransformerModel,
@@ -42,11 +44,14 @@ MODEL_TYPES = dict(
     TemporalFusionTransformer=TemporalFusionTransformerModel,
     RecursiveTabular=RecursiveTabularModel,
     DirectTabular=DirectTabularModel,
+    Average=AverageModel,
+    SeasonalAverage=SeasonalAverageModel,
     Naive=NaiveModel,
     SeasonalNaive=SeasonalNaiveModel,
     AutoETS=AutoETSModel,
     AutoARIMA=AutoARIMAModel,
     DynamicOptimizedTheta=DynamicOptimizedThetaModel,
+    NPTS=NPTSModel,
     Theta=ThetaModel,
     ARIMA=ARIMAModel,
     ETS=ETSModel,

autogluon/timeseries/predictor.py CHANGED Viewed

@@ -2,12 +2,12 @@ import logging
 import os
 import pprint
 import time
-import warnings
 from typing import Any, Dict, List, Optional, Type, Union
 import pandas as pd
 import pytorch_lightning as pl
+from autogluon.common.utils.deprecated_utils import Deprecated_args
 from autogluon.common.utils.log_utils import set_logger_verbosity
 from autogluon.common.utils.utils import check_saved_predictor_version, setup_outputdir
 from autogluon.core.utils.decorators import apply_presets
@@ -27,9 +27,9 @@ SUPPORTED_FREQUENCIES = {"D", "W", "M", "Q", "A", "Y", "H", "T", "min", "S"}
 class TimeSeriesPredictor:
     """AutoGluon ``TimeSeriesPredictor`` predicts future values of multiple related time series.
-    ``TimeSeriesPredictor`` provides probabilistic (distributional) multi-step-ahead forecasts for univariate time
-    series. The forecast includes both the mean (i.e., conditional expectation of future values given the past), as
-    well as the quantiles of the forecast distribution, indicating the range of possible future outcomes.
+    ``TimeSeriesPredictor`` provides probabilistic (quantile) multi-step-ahead forecasts for univariate time series.
+    The forecast includes both the mean (i.e., conditional expectation of future values given the past), as well as the
+    quantiles of the forecast distribution, indicating the range of possible future outcomes.
     ``TimeSeriesPredictor`` fits both "global" deep learning models that are shared across all time series
     (e.g., DeepAR, Transformer), as well as "local" statistical models that are fit to each individual time series
@@ -47,6 +47,18 @@ class TimeSeriesPredictor:
         The forecast horizon, i.e., How many time steps into the future the models should be trained to predict.
         For example, if time series contain daily observations, setting ``prediction_length = 3`` will train
         models that predict up to 3 days into the future from the most recent observation.
+    freq : str, optional
+        Frequency of the time series data (see `pandas documentation <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
+        for available frequencies). For example, ``"D"`` for daily data or ``"H"`` for hourly data.
+        By default, the predictor will attempt to automatically infer the frequency from the data. This argument should
+        only be set in two cases:
+        1. The time series data has irregular timestamps, so frequency cannot be inferred automatically.
+        2. You would like to resample the original data at a different frequency (for example, convert hourly measurements into daily measurements).
+        If ``freq`` is provided when creating the predictor, all data passed to the predictor will be automatically
+        resampled at this frequency.
     eval_metric : str, default = "mean_wQuantileLoss"
         Metric by which predictions will be ultimately evaluated on future test data. AutoGluon tunes hyperparameters
         in order to improve this metric on validation data, and ranks models (on validation data) according to this
@@ -62,7 +74,7 @@ class TimeSeriesPredictor:
         For more information about these metrics, see https://docs.aws.amazon.com/forecast/latest/dg/metrics.html.
     eval_metric_seasonal_period : int, optional
         Seasonal period used to compute the mean absolute scaled error (MASE) evaluation metric. This parameter is only
-        used if ``eval_metric="MASE"`. See https://en.wikipedia.org/wiki/Mean_absolute_scaled_error for more details.
+        used if ``eval_metric="MASE"``. See https://en.wikipedia.org/wiki/Mean_absolute_scaled_error for more details.
         Defaults to ``None``, in which case the seasonal period is computed based on the data frequency.
     known_covariates_names: List[str], optional
         Names of the covariates that are known in advance for all time steps in the forecast horizon. These are also
@@ -79,7 +91,6 @@ class TimeSeriesPredictor:
     quantile_levels : List[float], optional
         List of increasing decimals that specifies which quantiles should be estimated when making distributional
         forecasts. Defaults to ``[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]``.
-        Can alternatively be provided with the keyword argument ``quantiles``.
     path : str, optional
         Path to the directory where models and intermediate outputs will be saved. Defaults to a timestamped folder
         ``AutogluonModels/ag-[TIMESTAMP]`` that will be created in the working directory.
@@ -89,10 +100,6 @@ class TimeSeriesPredictor:
         If using ``logging``, you can alternatively control amount of information printed via ``logger.setLevel(L)``,
         where ``L`` ranges from 0 to 50 (Note: higher values of ``L`` correspond to fewer print statements, opposite
         of verbosity levels).
-    ignore_time_index : bool, default = False
-        If True, the predictor will ignore the datetime indexes during both training and testing, and will replace
-        the data indexes with dummy timestamps in second frequency. In this case, the forecast output time indexes will
-        be arbitrary values, and seasonality will be turned off for local models.
     cache_predictions : bool, default = True
         If True, the predictor will cache and reuse the predictions made by individual models whenever
         :meth:`~autogluon.timeseries.TimeSeriesPredictor.predict`, :meth:`~autogluon.timeseries.TimeSeriesPredictor.leaderboard`,
@@ -106,29 +113,28 @@ class TimeSeriesPredictor:
     predictor_file_name = "predictor.pkl"
     _predictor_version_file_name = "__version__"
+    @Deprecated_args(min_version_to_warn="0.9", min_version_to_error="1.0", ignore_time_index=None)
     def __init__(
         self,
         target: Optional[str] = None,
         known_covariates_names: Optional[List[str]] = None,
         prediction_length: int = 1,
+        freq: str = None,
         eval_metric: Optional[str] = None,
         eval_metric_seasonal_period: Optional[int] = None,
         path: Optional[str] = None,
         verbosity: int = 2,
         quantile_levels: Optional[List[float]] = None,
-        ignore_time_index: bool = False,
         cache_predictions: bool = True,
-        learner_type: Type[AbstractLearner] = TimeSeriesLearner,
+        learner_type: Optional[Type[AbstractLearner]] = None,
         learner_kwargs: Optional[dict] = None,
         label: Optional[str] = None,
-        quantiles: Optional[List[float]] = None,
-        validation_splitter: Optional[Any] = None,
+        ignore_time_index: bool = False,
     ):
         self.verbosity = verbosity
         set_logger_verbosity(self.verbosity, logger=logger)
         self.path = setup_outputdir(path)
-        self.ignore_time_index = ignore_time_index
         self.cache_predictions = cache_predictions
         if target is not None and label is not None:
             raise ValueError("Both `label` and `target` are specified. Please specify at most one of these arguments.")
@@ -147,23 +153,19 @@ class TimeSeriesPredictor:
         self.known_covariates_names = known_covariates_names
         self.prediction_length = prediction_length
+        self.freq = freq
+        if self.freq is not None:
+            # Standardize frequency string (e.g., "min" -> "T", "Y" -> "A-DEC")
+            std_freq = pd.tseries.frequencies.to_offset(self.freq).freqstr
+            if std_freq != str(self.freq):
+                logger.info(f"Frequency '{self.freq}' stored as '{std_freq}'")
+            self.freq = std_freq
         self.eval_metric = eval_metric
         self.eval_metric_seasonal_period = eval_metric_seasonal_period
         if quantile_levels is None:
             quantile_levels = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
         self.quantile_levels = sorted(quantile_levels)
-        if validation_splitter is not None:
-            warnings.warn(
-                "`validation_splitter` argument has been deprecated as of v0.8.0. "
-                "Please use the `num_val_windows` argument of `TimeSeriesPredictor.fit` instead."
-            )
-        if quantiles is not None:
-            warnings.warn(
-                "`quantiles` argument has been deprecated as of v0.8.0. "
-                "Please use the `quantile_levels` argument instead."
-            )
         if learner_kwargs is None:
             learner_kwargs = {}
         learner_kwargs = learner_kwargs.copy()
@@ -176,10 +178,12 @@ class TimeSeriesPredictor:
                 known_covariates_names=self.known_covariates_names,
                 prediction_length=self.prediction_length,
                 quantile_levels=self.quantile_levels,
-                ignore_time_index=ignore_time_index,
                 cache_predictions=self.cache_predictions,
             )
         )
+        # Using `TimeSeriesLearner` as default argument breaks doc generation with Sphnix
+        if learner_type is None:
+            learner_type = TimeSeriesLearner
         self._learner: AbstractLearner = learner_type(**learner_kwargs)
         self._learner_type = type(self._learner)
@@ -187,57 +191,109 @@ class TimeSeriesPredictor:
     def _trainer(self) -> AbstractTimeSeriesTrainer:
         return self._learner.load_trainer()  # noqa
-    def _check_and_prepare_data_frame(self, df: Union[TimeSeriesDataFrame, pd.DataFrame]) -> TimeSeriesDataFrame:
-        """Ensure that TimeSeriesDataFrame has a frequency, or replace its time index with a dummy if
-        ``self.ignore_time_index`` is True.
-        """
-        if df is None:
-            return df
-        if not isinstance(df, TimeSeriesDataFrame):
-            if isinstance(df, pd.DataFrame):
-                try:
-                    df = TimeSeriesDataFrame(df)
-                except:
-                    raise ValueError(
-                        f"Provided data of type {type(df)} cannot be automatically converted to a TimeSeriesDataFrame."
-                    )
-            else:
+    def _to_data_frame(
+        self,
+        data: Union[TimeSeriesDataFrame, pd.DataFrame, str],
+        name: str = "data",
+    ) -> "TimeSeriesDataFrame":
+        if isinstance(data, TimeSeriesDataFrame):
+            return data
+        elif isinstance(data, (pd.DataFrame, str)):
+            try:
+                data = TimeSeriesDataFrame(data)
+            except:
                 raise ValueError(
-                    f"Please provide data in TimeSeriesDataFrame format (received an object of type {type(df)})."
+                    f"Provided {name} of type {type(data)} cannot be automatically converted to a TimeSeriesDataFrame."
                 )
-        if self.ignore_time_index:
-            df = df.get_reindexed_view(freq="S")
+            return data
+        else:
+            raise TypeError(
+                f"{name} must be a TimeSeriesDataFrame or pandas.DataFrame or string (path to data) "
+                f"but received an object of type {type(data)}."
+            )
+    def _check_and_prepare_data_frame(
+        self,
+        data: Union[TimeSeriesDataFrame, pd.DataFrame, str],
+        name: str = "data",
+    ) -> TimeSeriesDataFrame:
+        """Ensure that TimeSeriesDataFrame has a sorted index, valid frequency, and contains no missing values.
+        If self.freq is None, then self.freq of the predictor will be set to the frequency of the data.
+        Parameters
+        ----------
+        data : Union[TimeSeriesDataFrame, pd.DataFrame, str]
+            Data as a data frame or path to file storing the data.
+        name : str
+            Name of the data that will be used in log messages (e.g., 'train_data', 'tuning_data', or 'data').
+        Returns
+        -------
+        df : TimeSeriesDataFrame
+            Preprocessed data in TimeSeriesDataFrame format.
+        """
+        df = self._to_data_frame(data, name=name)
         # MultiIndex.is_monotonic_increasing checks if index is sorted by ["item_id", "timestamp"]
         if not df.index.is_monotonic_increasing:
             df = df.sort_index()
             df._cached_freq = None  # in case frequency was incorrectly cached as IRREGULAR_TIME_INDEX_FREQSTR
-        if df.freq is None:
-            raise ValueError(
-                "Frequency not provided and cannot be inferred. This is often due to the "
-                "time index of the data being irregularly sampled. Please ensure that the "
-                "data set used has a uniform time index, or create the `TimeSeriesPredictor` "
-                "setting `ignore_time_index=True`."
-            )
-        # Check if frequency is supported
+        # Ensure that data has a regular frequency that matches the predictor frequency
+        if self.freq is None:
+            if df.freq is None:
+                raise ValueError(
+                    f"Frequency of {name} is not provided and cannot be inferred. Please set the expected data "
+                    f"frequency when creating the predictor with `TimeSeriesPredictor(freq=...)` or ensure that "
+                    f"the data has a regular time index with `{name}.to_regular_index(freq=...)`"
+                )
+            else:
+                self.freq = df.freq
+                logger.info(f"Inferred data frequency: {df.freq}")
+        else:
+            if df.freq != self.freq:
+                logger.warning(f"{name} with frequency '{df.freq}' has been resampled to frequency '{self.freq}'.")
+                df = df.convert_frequency(freq=self.freq)
+        # TODO: Add support for all pandas frequencies
         offset = pd.tseries.frequencies.to_offset(df.freq)
         norm_freq_str = offset.name.split("-")[0]
         if norm_freq_str not in SUPPORTED_FREQUENCIES:
-            warnings.warn(
-                f"Detected frequency '{norm_freq_str}' is not supported by TimeSeriesPredictor. This may lead to some "
+            logger.warning(
+                f"Frequency '{norm_freq_str}' is not supported by TimeSeriesPredictor. This may lead to some "
                 f"models not working as intended. "
                 f"Please convert the timestamps to one of the supported frequencies: {SUPPORTED_FREQUENCIES}. "
                 f"See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases for details."
             )
+        # Fill missing values
         if df.isna().values.any():
-            raise ValueError(
-                "TimeSeriesPredictor does not yet support missing values. "
-                "Please make sure that the provided data contains no NaNs."
+            # FIXME: Do not automatically fill NaNs here, handle missing values at the level of individual models.
+            # FIXME: Current solution leads to incorrect metric computation if missing values are present
+            logger.warning(
+                f"{name} contains missing values represented by NaN. "
+                f"They have been filled by carrying forward the last valid observation."
             )
+            df = df.fill_missing_values()
+            if df.isna().values.any():
+                raise ValueError(f"Some time series in {name} consist completely of NaN values. Please remove them.")
+        # Ensure that time series are long enough
         if (df.num_timesteps_per_item() <= 2).any():
-            # Time series with length <= 2 make frequency inference impossible
-            raise ValueError("Detected time series with length <= 2 in data. Please remove them from the dataset.")
+            # FIXME: Gracefully handle short time series: Ignore time series with length <= 2 in train_data,
+            # FIXME: otherwise generate naive forecast for short time series
+            raise ValueError(f"Detected time series with length <= 2 in {name}. Please remove them from the dataset.")
         return df
+    def _check_data_for_evaluation(self, data: TimeSeriesDataFrame, name: str = "data"):
+        """Make sure that provided evaluation data includes both historic and future time series values."""
+        if data.num_timesteps_per_item().min() <= self.prediction_length:
+            raise ValueError(
+                f"Cannot reserve last prediction_length={self.prediction_length} time steps for evaluation in some "
+                f"time series in {name}. Please make sure that {name} includes both historic and future data, and that"
+                f"all time series have length > prediction_length (at least {self.prediction_length + 1})"
+            )
     def _validate_num_val_windows(
         self,
         train_data: TimeSeriesDataFrame,
@@ -283,8 +339,8 @@ class TimeSeriesPredictor:
     @apply_presets(TIMESERIES_PRESETS_CONFIGS)
     def fit(
         self,
-        train_data: Union[TimeSeriesDataFrame, pd.DataFrame],
-        tuning_data: Optional[Union[TimeSeriesDataFrame, pd.DataFrame]] = None,
+        train_data: Union[TimeSeriesDataFrame, pd.DataFrame, str],
+        tuning_data: Optional[Union[TimeSeriesDataFrame, pd.DataFrame, str]] = None,
         time_limit: Optional[int] = None,
         presets: Optional[str] = None,
         hyperparameters: Dict[Union[str, Type], Any] = None,
@@ -300,7 +356,7 @@ class TimeSeriesPredictor:
         Parameters
         ----------
-        train_data : Union[TimeSeriesDataFrame, pd.DataFrame]
+        train_data : Union[TimeSeriesDataFrame, pd.DataFrame, str]
             Training data in the :class:`~autogluon.timeseries.TimeSeriesDataFrame` format. For best performance, all
             time series should have length ``> 2 * prediction_length``.
@@ -323,7 +379,7 @@ class TimeSeriesPredictor:
             If provided data is an instance of pandas DataFrame, AutoGluon will attempt to automatically convert it
             to a ``TimeSeriesDataFrame``.
-        tuning_data : Union[TimeSeriesDataFrame, pd.DataFrame], optional
+        tuning_data : Union[TimeSeriesDataFrame, pd.DataFrame, str], optional
             Data reserved for model selection and hyperparameter tuning, rather than training individual models. Also
             used to compute the validation scores. Note that only the last ``prediction_length`` time steps of each
             time series are used for computing the validation score.
@@ -479,9 +535,6 @@ class TimeSeriesPredictor:
         if hyperparameters is None:
             hyperparameters = "default"
-        train_data = self._check_and_prepare_data_frame(train_data)
-        tuning_data = self._check_and_prepare_data_frame(tuning_data)
         if verbosity is None:
             verbosity = self.verbosity
         set_logger_verbosity(verbosity)
@@ -489,12 +542,15 @@ class TimeSeriesPredictor:
         fit_args = dict(
             prediction_length=self.prediction_length,
             target=self.target,
+            eval_metric=self.eval_metric,
+            quantile_levels=self.quantile_levels,
+            freq=self.freq,
             time_limit=time_limit,
-            evaluation_metric=self.eval_metric,
             hyperparameters=hyperparameters,
             hyperparameter_tune_kwargs=hyperparameter_tune_kwargs,
             excluded_model_types=excluded_model_types,
             num_val_windows=num_val_windows,
+            refit_full=refit_full,
             enable_ensemble=enable_ensemble,
             random_seed=random_seed,
             verbosity=verbosity,
@@ -504,13 +560,17 @@ class TimeSeriesPredictor:
         if presets is not None:
             logger.info(f"Setting presets to: {presets}")
         logger.info("Fitting with arguments:")
-        logger.info(f"{pprint.pformat(fit_args)}")
+        logger.info(f"{pprint.pformat(fit_args)}\n")
+        train_data = self._check_and_prepare_data_frame(train_data, name="train_data")
         logger.info(
             f"Provided training data set with {len(train_data)} rows, {train_data.num_items} items (item = single time series). "
             f"Average time series length is {len(train_data) / train_data.num_items:.1f}. "
-            f"Data frequency is '{train_data.freq}'."
         )
         if tuning_data is not None:
+            tuning_data = self._check_and_prepare_data_frame(tuning_data, name="tuning_data")
+            self._check_data_for_evaluation(tuning_data, name="tuning_data")
             logger.info(
                 f"Provided tuning data set with {len(tuning_data)} rows, {tuning_data.num_items} items. "
                 f"Average time series length is {len(tuning_data) / tuning_data.num_items:.1f}."
@@ -552,8 +612,8 @@ class TimeSeriesPredictor:
     def predict(
         self,
-        data: Union[TimeSeriesDataFrame, pd.DataFrame],
-        known_covariates: Optional[TimeSeriesDataFrame] = None,
+        data: Union[TimeSeriesDataFrame, pd.DataFrame, str],
+        known_covariates: Optional[Union[TimeSeriesDataFrame, pd.DataFrame, str]] = None,
         model: Optional[str] = None,
         use_cache: bool = True,
         random_seed: Optional[int] = 123,
@@ -562,7 +622,7 @@ class TimeSeriesPredictor:
         Parameters
         ----------
-        data : Union[TimeSeriesDataFrame, pd.DataFrame]
+        data : Union[TimeSeriesDataFrame, pd.DataFrame, str]
             Time series data to forecast with.
             If ``known_covariates_names`` were specified when creating the predictor, ``data`` must include the columns
@@ -573,7 +633,7 @@ class TimeSeriesPredictor:
             If provided data is an instance of pandas DataFrame, AutoGluon will attempt to automatically convert it
             to a ``TimeSeriesDataFrame``.
-        known_covariates : TimeSeriesDataFrame, optional
+        known_covariates : Union[TimeSeriesDataFrame, pd.DataFrame, str], optional
             If ``known_covariates_names`` were specified when creating the predictor, it is necessary to provide the
             values of the known covariates for each time series during the forecast horizon. That is:
@@ -625,16 +685,18 @@ class TimeSeriesPredictor:
         # Don't use data.item_ids in case data is not a TimeSeriesDataFrame
         original_item_id_order = data.reset_index()[ITEMID].unique()
         data = self._check_and_prepare_data_frame(data)
+        if known_covariates is not None:
+            known_covariates = self._to_data_frame(known_covariates)
         predictions = self._learner.predict(data, known_covariates=known_covariates, model=model, use_cache=use_cache)
         return predictions.reindex(original_item_id_order, level=ITEMID)
-    def evaluate(self, data: Union[TimeSeriesDataFrame, pd.DataFrame], **kwargs):
+    def evaluate(self, data: Union[TimeSeriesDataFrame, pd.DataFrame, str], **kwargs):
         """Evaluate the performance for given dataset, computing the score determined by ``self.eval_metric``
         on the given data set, and with the same ``prediction_length`` used when training models.
         Parameters
         ----------
-        data : Union[TimeSeriesDataFrame, pd.DataFrame]
+        data : Union[TimeSeriesDataFrame, pd.DataFrame, str]
             The data to evaluate the best model on. The last ``prediction_length`` time steps of the data set, for each
             item, will be held out for prediction and forecast accuracy will be calculated on these time steps.
@@ -665,9 +727,10 @@ class TimeSeriesPredictor:
             will have their signs flipped to obey this convention. For example, negative MAPE values will be reported.
         """
         data = self._check_and_prepare_data_frame(data)
+        self._check_data_for_evaluation(data)
         return self._learner.score(data, **kwargs)
-    def score(self, data: Union[TimeSeriesDataFrame, pd.DataFrame], **kwargs):
+    def score(self, data: Union[TimeSeriesDataFrame, pd.DataFrame, str], **kwargs):
         """See, :meth:`~autogluon.timeseries.TimeSeriesPredictor.evaluate`."""
         return self.evaluate(data, **kwargs)
@@ -753,7 +816,7 @@ class TimeSeriesPredictor:
     def leaderboard(
         self,
-        data: Optional[Union[TimeSeriesDataFrame, pd.DataFrame]] = None,
+        data: Optional[Union[TimeSeriesDataFrame, pd.DataFrame, str]] = None,
         silent: bool = False,
         use_cache: bool = True,
     ) -> pd.DataFrame:
@@ -776,7 +839,7 @@ class TimeSeriesPredictor:
         Parameters
         ----------
-        data : Union[TimeSeriesDataFrame, pd.DataFrame], optional
+        data : Union[TimeSeriesDataFrame, pd.DataFrame, str], optional
             dataset used for additional evaluation. If not provided, the validation set used during training will be
             used.
@@ -801,7 +864,9 @@ class TimeSeriesPredictor:
             The leaderboard containing information on all models and in order of best model to worst in terms of
             test performance.
         """
-        data = self._check_and_prepare_data_frame(data)
+        if data is not None:
+            data = self._check_and_prepare_data_frame(data)
+            self._check_data_for_evaluation(data)
         leaderboard = self._learner.leaderboard(data, use_cache=use_cache)
         if not silent:
             with pd.option_context("display.max_rows", None, "display.max_columns", None, "display.width", 1000):

autogluon/timeseries/trainer/abstract_trainer.py CHANGED Viewed

@@ -264,10 +264,7 @@ class AbstractTimeSeriesTrainer(SimpleAbstractTrainer):
         super().__init__(path=path, save_data=save_data, low_memory=True, **kwargs)
         self.prediction_length = prediction_length
-        self.quantile_levels = kwargs.get(
-            "quantile_levels",
-            kwargs.get("quantiles", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
-        )
+        self.quantile_levels = kwargs.get("quantile_levels", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
         self.target = kwargs.get("target", "target")
         self.metadata = kwargs.get("metadata", CovariateMetadata())
         self.is_data_saved = False

autogluon/timeseries/trainer/auto_trainer.py CHANGED Viewed

@@ -22,7 +22,7 @@ class AutoTimeSeriesTrainer(AbstractTimeSeriesTrainer):
             freq=kwargs.get("freq"),
             hyperparameters=hyperparameters,
             hyperparameter_tune=hyperparameter_tune,
-            quantiles=quantile_levels,
+            quantile_levels=quantile_levels,
             all_assigned_names=self._get_banned_model_names(),
             target=self.target,
             metadata=self.metadata,

autogluon/timeseries/version.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """This is the autogluon version file."""
-__version__ = '0.8.3b20230817'
+__version__ = '0.8.3b20230819'
 __lite__ = False

{autogluon.timeseries-0.8.3b20230817.dist-info → autogluon.timeseries-0.8.3b20230819.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: autogluon.timeseries
-Version: 0.8.3b20230817
+Version: 0.8.3b20230819
 Summary: AutoML for Image, Text, and Tabular Data
 Home-page: https://github.com/autogluon/autogluon
 Author: AutoGluon Community
@@ -46,9 +46,9 @@ Requires-Dist: statsforecast <1.5,>=1.4.0
 Requires-Dist: mlforecast <0.7.4,>=0.7.0
 Requires-Dist: tqdm <5,>=4.38
 Requires-Dist: ujson <6,>=5
-Requires-Dist: autogluon.core[raytune] ==0.8.3b20230817
-Requires-Dist: autogluon.common ==0.8.3b20230817
-Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost] ==0.8.3b20230817
+Requires-Dist: autogluon.core[raytune] ==0.8.3b20230819
+Requires-Dist: autogluon.common ==0.8.3b20230819
+Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost] ==0.8.3b20230819
 Provides-Extra: all
 Provides-Extra: tests
 Requires-Dist: pytest ; extra == 'tests'

{autogluon.timeseries-0.8.3b20230817.dist-info → autogluon.timeseries-0.8.3b20230819.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
-autogluon.timeseries-0.8.3b20230817-py3.8-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
+autogluon.timeseries-0.8.3b20230819-py3.8-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
 autogluon/timeseries/__init__.py,sha256=oGfAdHmGz9zGFH53Q4zDL42CavzjqFaWTgkx_vg17QM,370
 autogluon/timeseries/evaluator.py,sha256=AWjqItDZA2tPexQ1e5S3IWTMNL4K-_Bcig6SUzDRkxY,11293
-autogluon/timeseries/learner.py,sha256=vhFt4y3J5q8UUD8Xf9C5VuWI60RngM7kXY0soYld2Dg,10323
-autogluon/timeseries/predictor.py,sha256=_FDHoKKlAwqG6qZB0KYNs0FdJE3zzr-cyQMqzaDBghI,49176
+autogluon/timeseries/learner.py,sha256=nt1z7BmL1f0lRcWTg0CFhK77bCbhqyla3LVs7FIIAdI,9090
+autogluon/timeseries/predictor.py,sha256=041AjOVnQ1PxjVULyybAZ8O4Hfnt-wCvSb3fTvWrrpU,52588
 autogluon/timeseries/splitter.py,sha256=s5S3CeJxcUfZrl7PSXjzubE06bgB8J8uUT8EywSwtYQ,9252
-autogluon/timeseries/version.py,sha256=42pIiXfodX5wHiRjy6ECpjN5JI46Reymvz5FV8O3Hjg,90
+autogluon/timeseries/version.py,sha256=UihvW4WkBxZ767FhaGBcD5FR7QEZDQR-1aUraFUtRAM,90
 autogluon/timeseries/configs/__init__.py,sha256=BTtHIPCYeGjqgOcvqb8qPD4VNX-ICKOg6wnkew1cPOE,98
 autogluon/timeseries/configs/presets_configs.py,sha256=mX0V5zajWWArVforLvbyr6W-JMsQBp2AkBqlWVP2Zuw,640
 autogluon/timeseries/dataset/__init__.py,sha256=UvnhAN5tjgxXTHoZMQDy64YMDj4Xxa68yY7NP4vAw0o,81
-autogluon/timeseries/dataset/ts_dataframe.py,sha256=zN_sKK10wymlGiFnaBx6ocnoUvOVX113DquduMdlNVg,37845
-autogluon/timeseries/models/__init__.py,sha256=dBYglymYNKgSdBEGqUybkVWL6B13eTDOrNwR9herycw,848
-autogluon/timeseries/models/presets.py,sha256=_q8Rbl3E2-_whi18-FcauZS4iQMoqrmejk1BZ23OavI,11317
+autogluon/timeseries/dataset/ts_dataframe.py,sha256=sHzsmnjED-3t3KSmAKSDuwNbWklxT-jcHMPfei8nNm4,41046
+autogluon/timeseries/models/__init__.py,sha256=eRXcHY5Fc3MRs-AMqQL8HNOHZtVDW1h43XB9zJGOZrg,924
+autogluon/timeseries/models/presets.py,sha256=us6ZpA3UL-NsOXMn_1hnfsd3fTloVEO8JAHjnw_2DZM,11428
 autogluon/timeseries/models/abstract/__init__.py,sha256=wvDsQAZIV0N3AwBeMaGItoQ82trEfnT-nol2AAOIxBg,102
-autogluon/timeseries/models/abstract/abstract_timeseries_model.py,sha256=K4_N6T2nfM1cOSDLGYDMscMbO7tiOuu4B4v3cMv1360,19492
+autogluon/timeseries/models/abstract/abstract_timeseries_model.py,sha256=cM5Wegh3ZrlzCL81EUimMV2aehpdyePJH0jif5jQAuo,19432
 autogluon/timeseries/models/abstract/model_trial.py,sha256=f840EF-PSj_j_u1DGVzSD3Z1kCXdOSRLcbn_LJSpw5g,3734
 autogluon/timeseries/models/autogluon_tabular/__init__.py,sha256=ZidrVDHL5x_k9F8lcGrg9Y-soLljsBlrsSeT5FIkn1E,163
 autogluon/timeseries/models/autogluon_tabular/direct_tabular.py,sha256=0-PAot_vu_BQEMtBHl8Zbky7mqahiWHIfOzsvfJ_VeE,17338
@@ -25,26 +25,27 @@ autogluon/timeseries/models/gluonts/__init__.py,sha256=AeycIz-Y-ZQhBxCQdqWKbbhCF
 autogluon/timeseries/models/gluonts/abstract_gluonts.py,sha256=K67s2eBk-PQX_RAt-iMmtxo-8kFTB1DTPFS8meyZOX8,21710
 autogluon/timeseries/models/gluonts/torch/__init__.py,sha256=DJqh-hYrxjRQHOc3GsaQE2Oa4wccedYW2NcSFC-W6rI,260
 autogluon/timeseries/models/gluonts/torch/models.py,sha256=7QNCLQuO52vrxxYSiEiRu3fTU9o5L7XClVQsdIhAVu4,12397
-autogluon/timeseries/models/local/__init__.py,sha256=-l8ib8W0jRho28Ll70Amj0-FE8pGqjebh_pQdrCue_s,572
-autogluon/timeseries/models/local/abstract_local_model.py,sha256=mLB9riUvJABpaV68lwTQnbNk2FXTreciELiN-8wTCSw,9326
-autogluon/timeseries/models/local/naive.py,sha256=oxSATbj7VKUB81WzYZExXxPipIVjWud3fsTlv3ZYwpU,2451
-autogluon/timeseries/models/local/statsforecast.py,sha256=HHpYSFIwndSL85cVvLVpsD2nlIr2EOq5RnjjUkrh-ic,11112
+autogluon/timeseries/models/local/__init__.py,sha256=ibQQmwvZNVjTpKGoKrLdBYO_pvkAeO4yG8qVJ4JFPfE,636
+autogluon/timeseries/models/local/abstract_local_model.py,sha256=xiK7ObhBuqJ1g85PSed8fG1j5kg7IXjLGbqs72HqRzM,9644
+autogluon/timeseries/models/local/naive.py,sha256=9b80zUccHfGv6pg33mppwTcSJgq4JF4CqTQ7SWq48Hk,7243
+autogluon/timeseries/models/local/npts.py,sha256=_bHwWDEnI8zoZ2KQQyF59BIom6VHc0sJgVjPDRGHSFY,2313
+autogluon/timeseries/models/local/statsforecast.py,sha256=FETPDwC5PYw5nFx13rjpYOiTHSmz2l241ot_TdRNtQQ,11086
 autogluon/timeseries/models/local/statsmodels.py,sha256=WdhUxmjmBpoWx6XRmTTWmPxTY8VSzxdDe-G38PgXDIQ,15975
 autogluon/timeseries/models/multi_window/__init__.py,sha256=Bq7AT2Jxdd4WNqmjTdzeqgNiwn1NCyWp4tBIWaM-zfI,60
 autogluon/timeseries/models/multi_window/multi_window_model.py,sha256=2eu9vph6pazrKvycEKEq8zgTzkIe_G7JcrgC8LKxCWE,8995
 autogluon/timeseries/trainer/__init__.py,sha256=lxiOT-Gc6BEnr_yWQqra85kEngeM_wtH2SCaRbmC_qE,170
-autogluon/timeseries/trainer/abstract_trainer.py,sha256=ovZ7U-xhaMTTMLw1p51mEXRs9XpXDJ3_EOF6XzhoRhE,48122
-autogluon/timeseries/trainer/auto_trainer.py,sha256=wABPY75dd8k7JE5AhdiIDIzhOTLFDwiruV4AtpqGB0I,3063
+autogluon/timeseries/trainer/abstract_trainer.py,sha256=_ef5d4oX8StLjd0uATd76bbFtxhFeYRE3YIG5J46L-w,48062
+autogluon/timeseries/trainer/auto_trainer.py,sha256=d_JDMxnEoDHqMIDvmz8qGe7AF2stmwe7IxF8V8qrFwU,3069
 autogluon/timeseries/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autogluon/timeseries/utils/features.py,sha256=QPESzJwMZlsnt_woQ4_I42MOlVT1VcPKk1fTeltPYLU,8270
 autogluon/timeseries/utils/forecast.py,sha256=ouOHcQEppD1ry-9buQ4plmyFK3GPef01gEQE7u2HzcI,1544
 autogluon/timeseries/utils/seasonality.py,sha256=p9mtahWOtDhHUjeGECUJA0VAKeLkZGZbj070dEqMTJQ,652
 autogluon/timeseries/utils/warning_filters.py,sha256=Xg9wuTaj-xRKVzdV43oHPbzrfCv_oWRHVrlB7S15rwc,2198
-autogluon.timeseries-0.8.3b20230817.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
-autogluon.timeseries-0.8.3b20230817.dist-info/METADATA,sha256=IlsS4rppTAdAUdr0OC-_RUJTy0ngbElLTNpyxTUzhxc,12682
-autogluon.timeseries-0.8.3b20230817.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
-autogluon.timeseries-0.8.3b20230817.dist-info/WHEEL,sha256=5sUXSg9e4bi7lTLOHcm6QEYwO5TIF1TNbTSVFVjcJcc,92
-autogluon.timeseries-0.8.3b20230817.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
-autogluon.timeseries-0.8.3b20230817.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
-autogluon.timeseries-0.8.3b20230817.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-autogluon.timeseries-0.8.3b20230817.dist-info/RECORD,,
+autogluon.timeseries-0.8.3b20230819.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
+autogluon.timeseries-0.8.3b20230819.dist-info/METADATA,sha256=J0gTh3-Gx3vx1LHhbpWVVFHx11vlxGfBGsyyYGVkA88,12682
+autogluon.timeseries-0.8.3b20230819.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
+autogluon.timeseries-0.8.3b20230819.dist-info/WHEEL,sha256=5sUXSg9e4bi7lTLOHcm6QEYwO5TIF1TNbTSVFVjcJcc,92
+autogluon.timeseries-0.8.3b20230819.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
+autogluon.timeseries-0.8.3b20230819.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
+autogluon.timeseries-0.8.3b20230819.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+autogluon.timeseries-0.8.3b20230819.dist-info/RECORD,,

/autogluon.timeseries-0.8.3b20230817-py3.8-nspkg.pth → /autogluon.timeseries-0.8.3b20230819-py3.8-nspkg.pth RENAMED Viewed

File without changes

{autogluon.timeseries-0.8.3b20230817.dist-info → autogluon.timeseries-0.8.3b20230819.dist-info}/LICENSE RENAMED Viewed

File without changes

{autogluon.timeseries-0.8.3b20230817.dist-info → autogluon.timeseries-0.8.3b20230819.dist-info}/NOTICE RENAMED Viewed

File without changes

{autogluon.timeseries-0.8.3b20230817.dist-info → autogluon.timeseries-0.8.3b20230819.dist-info}/WHEEL RENAMED Viewed

File without changes

{autogluon.timeseries-0.8.3b20230817.dist-info → autogluon.timeseries-0.8.3b20230819.dist-info}/namespace_packages.txt RENAMED Viewed

File without changes

{autogluon.timeseries-0.8.3b20230817.dist-info → autogluon.timeseries-0.8.3b20230819.dist-info}/top_level.txt RENAMED Viewed

File without changes

{autogluon.timeseries-0.8.3b20230817.dist-info → autogluon.timeseries-0.8.3b20230819.dist-info}/zip-safe RENAMED Viewed

File without changes

autogluon.timeseries 0.8.3b20230817__py3-none-any.whl → 0.8.3b20230819__py3-none-any.whl

Potentially problematic release.

autogluon.timeseries 0.8.3b20230817py3-none-any.whl → 0.8.3b20230819py3-none-any.whl