PyPI - autogluon.timeseries - Versions diffs - 0.8.3b20230817__tar.gz → 0.8.3b20230819__tar.gz - Mend

autogluon.timeseries 0.8.3b20230817tar.gz → 0.8.3b20230819tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (53) hide show

{autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: autogluon.timeseries
-Version: 0.8.3b20230817
+Version: 0.8.3b20230819
 Summary: AutoML for Image, Text, and Tabular Data
 Home-page: https://github.com/autogluon/autogluon
 Author: AutoGluon Community

{autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/dataset/ts_dataframe.py RENAMED Viewed

@@ -4,7 +4,7 @@ import copy
 import itertools
 import logging
 from collections.abc import Iterable
-from typing import Any, List, Optional, Tuple, Type
+from typing import Any, List, Optional, Tuple, Type, Union
 import numpy as np
 import pandas as pd
@@ -12,6 +12,7 @@ from joblib.parallel import Parallel, delayed
 from pandas.core.internals import ArrayManager, BlockManager
 from autogluon.common.loaders import load_pd
+from autogluon.common.utils.deprecated_utils import Deprecated
 logger = logging.getLogger(__name__)
@@ -322,7 +323,6 @@ class TimeSeriesDataFrame(pd.DataFrame):
         id_column: Optional[str] = None,
         timestamp_column: Optional[str] = None,
     ) -> pd.DataFrame:
         df = df.copy()
         if id_column is not None:
             assert id_column in df.columns, f"Column '{id_column}' not found!"
@@ -497,7 +497,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
         Examples
         --------
-        >>> print(ts_dataframe)
+        >>> ts_df
                             target
         item_id timestamp
         0       2019-01-01       0
@@ -582,7 +582,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
         Returns
         -------
-        ts_df: TimeSeriesDataFrame
+        ts_df : TimeSeriesDataFrame
             A new time series dataframe containing entries of the original time series between start and end timestamps.
         """
@@ -596,7 +596,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
         )
     @classmethod
-    def from_pickle(cls, filepath_or_buffer: Any) -> "TimeSeriesDataFrame":
+    def from_pickle(cls, filepath_or_buffer: Any) -> TimeSeriesDataFrame:
         """Convenience method to read pickled time series data frames. If the read pickle
         file refers to a plain pandas DataFrame, it will be cast to a TimeSeriesDataFrame.
@@ -607,7 +607,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
         Returns
         -------
-        ts_df: TimeSeriesDataFrame
+        ts_df : TimeSeriesDataFrame
             The pickled time series data frame.
         """
         try:
@@ -616,6 +616,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
         except Exception as err:  # noqa
             raise IOError(f"Could not load pickled data set due to error: {str(err)}")
+    @Deprecated(min_version_to_warn="0.9", min_version_to_error="1.0")
     def get_reindexed_view(self, freq: str = "S") -> TimeSeriesDataFrame:
         """Returns a new TimeSeriesDataFrame object with the same underlying data and
         static features as the current data frame, except the time index is replaced by
@@ -649,7 +650,8 @@ class TimeSeriesDataFrame(pd.DataFrame):
         return df_view
-    def to_regular_index(self, freq: str) -> "TimeSeriesDataFrame":
+    @Deprecated(min_version_to_warn="0.9", min_version_to_error="1.0", new="convert_frequency")
+    def to_regular_index(self, freq: str) -> TimeSeriesDataFrame:
         """Fill the gaps in an irregularly-sampled time series with NaNs.
         Parameters
@@ -659,7 +661,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
         Examples
         --------
-        >>> print(ts_dataframe)
+        >>> ts_df
                             target
         item_id timestamp
         0       2019-01-01     NaN
@@ -669,7 +671,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
         1       2019-02-04     3.0
                 2019-02-07     4.0
-        >>> print(ts_dataframe.to_regular_index(freq="D"))
+        >>> ts_df.to_regular_index(freq="D")
                             target
         item_id timestamp
         0       2019-01-01     NaN
@@ -685,30 +687,9 @@ class TimeSeriesDataFrame(pd.DataFrame):
                 2019-02-07     4.0
         """
-        if self.freq is not None:
-            if self.freq != freq:
-                raise ValueError(
-                    f"TimeSeriesDataFrame already has a regular index with freq '{self.freq}' "
-                    f"that cannot be converted to the given freq '{freq}'"
-                )
-            else:
-                return self
-        filled_series = []
-        for item_id, time_series in self.groupby(level=ITEMID, sort=False):
-            time_series = time_series.droplevel(ITEMID)
-            timestamps = time_series.index
-            resampled_ts = time_series.resample(freq).asfreq()
-            if not timestamps.isin(resampled_ts.index).all():
-                raise ValueError(
-                    f"Irregularly-sampled timestamps in this TimeSeriesDataFrame are not compatible "
-                    f"with the given frequency '{freq}'"
-                )
-            filled_series.append(pd.concat({item_id: resampled_ts}, names=[ITEMID]))
+        return self.convert_frequency(freq=freq)
-        return TimeSeriesDataFrame(pd.concat(filled_series), static_features=self.static_features)
-    def fill_missing_values(self, method: str = "auto", value: float = 0.0) -> "TimeSeriesDataFrame":
+    def fill_missing_values(self, method: str = "auto", value: float = 0.0) -> TimeSeriesDataFrame:
         """Fill missing values represented by NaN.
         Parameters
@@ -726,7 +707,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
         Examples
         --------
-        >>> print(ts_dataframe)
+        >>> ts_df
                             target
         item_id timestamp
         0       2019-01-01     NaN
@@ -741,7 +722,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
                 2019-02-06     NaN
                 2019-02-07     4.0
-        >>> print(ts_dataframe.fill_missing_values(method="auto"))
+        >>> ts_df.fill_missing_values(method="auto")
                             target
         item_id timestamp
         0       2019-01-01     1.0
@@ -765,7 +746,9 @@ class TimeSeriesDataFrame(pd.DataFrame):
         grouped_df = pd.DataFrame(self).groupby(level=ITEMID, sort=False, group_keys=False)
         if method == "auto":
-            filled_df = grouped_df.fillna(method="ffill").fillna(method="bfill")
+            filled_df = grouped_df.fillna(method="ffill")
+            # Fill missing values at the start of each time series with bfill
+            filled_df = filled_df.groupby(level=ITEMID, sort=False, group_keys=False).fillna(method="bfill")
         elif method in ["ffill", "pad"]:
             filled_df = grouped_df.fillna(method="ffill")
         elif method in ["bfill", "backfill"]:
@@ -782,13 +765,14 @@ class TimeSeriesDataFrame(pd.DataFrame):
             )
         return TimeSeriesDataFrame(filled_df, static_features=self.static_features)
-    def dropna(self, how: str = "any") -> "TimeSeriesDataFrame":
+    def dropna(self, how: str = "any") -> TimeSeriesDataFrame:
         """Drop rows containing NaNs.
         Parameters
         ----------
         how : {"any", "all"}, default = "any"
             Determine if row or column is removed from TimeSeriesDataFrame, when we have at least one NaN or all NaN.
             - "any" : If any NaN values are present, drop that row or column.
             - "all" : If all values are NaN, drop that row or column.
         """
@@ -864,3 +848,104 @@ class TimeSeriesDataFrame(pd.DataFrame):
                     data.static_features.index = data.static_features.index.astype(str)
                     data.static_features.index += suffix
         return train_data, test_data
+    def convert_frequency(
+        self,
+        freq: Union[str, pd.DateOffset],
+        agg_numeric: str = "mean",
+        agg_categorical: str = "first",
+        **kwargs,
+    ) -> TimeSeriesDataFrame:
+        """Convert each time series in the data frame to the given frequency.
+        This method is useful for two purposes:
+        1. Converting an irregularly-sampled time series to a regular time index.
+        2. Aggregating time series data by downsampling (e.g., convert daily sales into weekly sales)
+        Parameters
+        ----------
+        freq : Union[str, pd.DateOffset]
+            Frequency to which the data should be converted. See [pandas frequency aliases](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases)
+            for supported values.
+        agg_numeric : {"max", "min", "sum", "mean", "median", "first", "last"}, default = "mean"
+            Aggregation method applied to numeric columns.
+        agg_categorical : {"first", "last"}, default = "first"
+            Aggregation method applied to categorical columns.
+        **kwargs
+            Additional keywords arguments that will be passed to ``pandas.DataFrameGroupBy.resample``.
+        Returns
+        -------
+        ts_df : TimeSeriesDataFrame
+            A new time series dataframe with time series resampled at the new frequency. Output may contain missing
+            values represented by ``NaN`` if original data does not have information for the given period.
+        Examples
+        --------
+        Convert irregularly-sampled time series data to a regular index
+        >>> ts_df
+                            target
+        item_id timestamp
+        0       2019-01-01     NaN
+                2019-01-03     1.0
+                2019-01-06     2.0
+                2019-01-07     NaN
+        1       2019-02-04     3.0
+                2019-02-07     4.0
+        >>> ts_df.convert_frequency(freq="D")
+                            target
+        item_id timestamp
+        0       2019-01-01     NaN
+                2019-01-02     NaN
+                2019-01-03     1.0
+                2019-01-04     NaN
+                2019-01-05     NaN
+                2019-01-06     2.0
+                2019-01-07     NaN
+        1       2019-02-04     3.0
+                2019-02-05     NaN
+                2019-02-06     NaN
+                2019-02-07     4.0
+        Downsample quarterly data to yearly frequency
+        >>> ts_df
+                            target
+        item_id timestamp
+        0       2020-03-31     1.0
+                2020-06-30     2.0
+                2020-09-30     3.0
+                2020-12-31     4.0
+                2021-03-31     5.0
+                2021-06-30     6.0
+                2021-09-30     7.0
+                2021-12-31     8.0
+        >>> ts_df.convert_frequency("Y")
+                            target
+        item_id timestamp
+        0       2020-12-31     2.5
+                2021-12-31     6.5
+        >>> ts_df.convert_frequency("Y", agg_numeric="sum")
+                            target
+        item_id timestamp
+        0       2020-12-31    10.0
+                2021-12-31    26.0
+        """
+        if self.freq == pd.tseries.frequencies.to_offset(freq).freqstr:
+            return self
+        # We need to aggregate categorical columns separately because .agg("mean") deletes all non-numeric columns
+        aggregation = {}
+        for col in self.columns:
+            if pd.api.types.is_numeric_dtype(self.dtypes[col]):
+                aggregation[col] = agg_numeric
+            else:
+                aggregation[col] = agg_categorical
+        resampled_df = TimeSeriesDataFrame(
+            self.groupby(level=ITEMID, sort=False).resample(freq, level=TIMESTAMP, **kwargs).agg(aggregation)
+        )
+        resampled_df.static_features = self.static_features
+        return resampled_df

{autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/learner.py RENAMED Viewed

@@ -1,15 +1,13 @@
 import logging
 import time
-from typing import Any, Dict, List, Optional, Tuple, Type, Union
+from typing import Any, Dict, List, Optional, Type, Union
-import numpy as np
 import pandas as pd
 from autogluon.core.learner import AbstractLearner
-from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TimeSeriesDataFrame
+from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
 from autogluon.timeseries.evaluator import TimeSeriesEvaluator
 from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
-from autogluon.timeseries.splitter import AbstractTimeSeriesSplitter, LastWindowSplitter
 from autogluon.timeseries.trainer import AbstractTimeSeriesTrainer, AutoTimeSeriesTrainer
 from autogluon.timeseries.utils.features import TimeSeriesFeatureGenerator
 from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe
@@ -31,7 +29,6 @@ class TimeSeriesLearner(AbstractLearner):
         eval_metric: Optional[str] = None,
         eval_metric_seasonal_period: Optional[int] = None,
         prediction_length: int = 1,
-        ignore_time_index: bool = False,
         cache_predictions: bool = True,
         **kwargs,
     ):
@@ -42,11 +39,7 @@ class TimeSeriesLearner(AbstractLearner):
         self.target = target
         self.known_covariates_names = [] if known_covariates_names is None else known_covariates_names
         self.prediction_length = prediction_length
-        self.quantile_levels = kwargs.get(
-            "quantile_levels",
-            kwargs.get("quantiles", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
-        )
-        self.ignore_time_index = ignore_time_index
+        self.quantile_levels = kwargs.get("quantile_levels", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
         self.cache_predictions = cache_predictions
         self.feature_generator = TimeSeriesFeatureGenerator(
@@ -160,28 +153,13 @@ class TimeSeriesLearner(AbstractLearner):
             )
         forecast_index = get_forecast_horizon_index_ts_dataframe(data, prediction_length=self.prediction_length)
-        if self.ignore_time_index:
-            logger.warning(
-                "Because `ignore_time_index=True`, the predictor will ignore the time index of `known_covariates`. "
-                "Please make sure that `known_covariates` contain only the future values of the known covariates "
-                "(and the past values are not included)."
+        try:
+            known_covariates = known_covariates.loc[forecast_index]
+        except KeyError:
+            raise ValueError(
+                f"known_covariates should include the values for prediction_length={self.prediction_length} "
+                "many time steps into the future."
             )
-            known_covariates = known_covariates.loc[forecast_index.unique(level=ITEMID)]
-            if (known_covariates.num_timesteps_per_item() < self.prediction_length).any():
-                raise ValueError(
-                    f"known_covariates should include the values for prediction_length={self.prediction_length} "
-                    "many time steps into the future."
-                )
-            known_covariates = known_covariates.slice_by_timestep(None, self.prediction_length)
-            known_covariates.index = forecast_index
-        else:
-            try:
-                known_covariates = known_covariates.loc[forecast_index]
-            except KeyError:
-                raise ValueError(
-                    f"known_covariates should include the values for prediction_length={self.prediction_length} "
-                    "many time steps into the future."
-                )
         return known_covariates
     def predict(

{autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/__init__.py RENAMED Viewed

@@ -4,9 +4,12 @@ from .local import (
     ARIMAModel,
     AutoARIMAModel,
     AutoETSModel,
+    AverageModel,
     DynamicOptimizedThetaModel,
     ETSModel,
     NaiveModel,
+    NPTSModel,
+    SeasonalAverageModel,
     SeasonalNaiveModel,
     ThetaModel,
     ThetaStatsmodelsModel,
@@ -24,6 +27,7 @@ __all__ = [
     "DirectTabularModel",
     "RecursiveTabularModel",
     "NaiveModel",
+    "NPTSModel",
     "SeasonalNaiveModel",
     "AutoETSModel",
     "AutoARIMAModel",

{autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/abstract/abstract_timeseries_model.py RENAMED Viewed

@@ -105,10 +105,7 @@ class AbstractTimeSeriesModel(AbstractModel):
         self.freq: str = freq
         self.prediction_length: int = prediction_length
-        self.quantile_levels = kwargs.get(
-            "quantile_levels",
-            kwargs.get("quantiles", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
-        )
+        self.quantile_levels = kwargs.get("quantile_levels", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
         self._oof_predictions: Optional[TimeSeriesDataFrame] = None
     def __repr__(self) -> str:

{autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/local/__init__.py RENAMED Viewed

@@ -1,6 +1,7 @@
 import joblib.externals.loky
-from .naive import NaiveModel, SeasonalNaiveModel
+from .naive import AverageModel, NaiveModel, SeasonalAverageModel, SeasonalNaiveModel
+from .npts import NPTSModel
 from .statsforecast import AutoARIMAModel, AutoETSModel, DynamicOptimizedThetaModel, ThetaModel
 from .statsmodels import ARIMAModel, ETSModel, ThetaStatsmodelsModel

{autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/local/abstract_local_model.py RENAMED Viewed

@@ -34,6 +34,9 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
         Argument that can be passed to the underlying local model.
     default_n_jobs : Union[int, float]
         Default number of CPU cores used to train models. If float, this fraction of CPU cores will be used.
+    default_max_ts_length : Optional[int]
+        If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
+        This significantly speeds up fitting and usually leads to no change in accuracy.
     init_time_in_seconds : int
         Time that it takes to initialize the model in seconds (e.g., because of JIT compilation by Numba).
         If time_limit is below this number, model won't be trained.
@@ -41,6 +44,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
     allowed_local_model_args: List[str] = []
     default_n_jobs: Union[int, float] = AG_DEFAULT_N_JOBS
+    default_max_ts_length: Optional[int] = 2500
     init_time_in_seconds: int = 0
     def __init__(
@@ -65,7 +69,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
             raise ValueError(f"n_jobs must be a float between 0 and 1 or an integer (received n_jobs = {n_jobs})")
         # Default values, potentially overridden inside _fit()
         self.use_fallback_model = hyperparameters.pop("use_fallback_model", True)
-        self.max_ts_length = hyperparameters.pop("max_ts_length", 2500)
+        self.max_ts_length = hyperparameters.pop("max_ts_length", self.default_max_ts_length)
         super().__init__(
             path=path,

autogluon.timeseries-0.8.3b20230819/src/autogluon/timeseries/models/local/naive.py ADDED Viewed

@@ -0,0 +1,164 @@
+from typing import Callable
+import numpy as np
+import pandas as pd
+from autogluon.timeseries.models.local.abstract_local_model import AbstractLocalModel, seasonal_naive_forecast
+class NaiveModel(AbstractLocalModel):
+    """Baseline model that sets the forecast equal to the last observed value.
+    Quantiles are obtained by assuming that the residuals follow zero-mean normal distribution, scale of which is
+    estimated from the empirical distribution of the residuals.
+    As described in https://otexts.com/fpp3/prediction-intervals.html
+    Other Parameters
+    ----------------
+    n_jobs : int or float, default = 0.5
+        Number of CPU cores used to fit the models in parallel.
+        When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
+        When set to a positive integer, that many cores are used.
+        When set to -1, all CPU cores are used.
+    """
+    allowed_local_model_args = ["seasonal_period"]
+    def _predict_with_local_model(
+        self,
+        time_series: pd.Series,
+        local_model_args: dict,
+    ) -> pd.DataFrame:
+        return seasonal_naive_forecast(
+            target=time_series.values.ravel(),
+            prediction_length=self.prediction_length,
+            quantile_levels=self.quantile_levels,
+            seasonal_period=1,
+        )
+class SeasonalNaiveModel(AbstractLocalModel):
+    """Baseline model that sets the forecast equal to the last observed value from the same season.
+    Quantiles are obtained by assuming that the residuals follow zero-mean normal distribution, scale of which is
+    estimated from the empirical distribution of the residuals.
+    As described in https://otexts.com/fpp3/prediction-intervals.html
+    Other Parameters
+    ----------------
+    seasonal_period : int or None, default = None
+        Number of time steps in a complete seasonal cycle for seasonal models. For example, 7 for daily data with a
+        weekly cycle or 12 for monthly data with an annual cycle.
+        When set to None, seasonal_period will be inferred from the frequency of the training data. Can also be
+        specified manually by providing an integer > 1.
+        If seasonal_period (inferred or provided) is equal to 1, will fall back to Naive forecast.
+        Seasonality will also be disabled, if the length of the time series is < seasonal_period.
+    n_jobs : int or float, default = 0.5
+        Number of CPU cores used to fit the models in parallel.
+        When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
+        When set to a positive integer, that many cores are used.
+        When set to -1, all CPU cores are used.
+    """
+    allowed_local_model_args = ["seasonal_period"]
+    def _predict_with_local_model(
+        self,
+        time_series: np.ndarray,
+        local_model_args: dict,
+    ) -> pd.DataFrame:
+        return seasonal_naive_forecast(
+            target=time_series.values.ravel(),
+            prediction_length=self.prediction_length,
+            quantile_levels=self.quantile_levels,
+            seasonal_period=local_model_args["seasonal_period"],
+        )
+def _get_quantile_function(q: float) -> Callable:
+    """Returns a function with name "q" that computes the q'th quantile of a pandas.Series."""
+    def quantile_fn(x: pd.Series) -> pd.Series:
+        return x.quantile(q)
+    quantile_fn.__name__ = str(q)
+    return quantile_fn
+class AverageModel(AbstractLocalModel):
+    """Baseline model that sets the forecast equal to the historic average or quantile.
+    Other Parameters
+    ----------------
+    n_jobs : int or float, default = 0.5
+        Number of CPU cores used to fit the models in parallel.
+        When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
+        When set to a positive integer, that many cores are used.
+        When set to -1, all CPU cores are used.
+    max_ts_length : Optional[int], default = None
+        If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
+        This significantly speeds up fitting and usually leads to no change in accuracy.
+    """
+    allowed_local_model_args = ["seasonal_period"]
+    default_max_ts_length = None
+    def _predict_with_local_model(
+        self,
+        time_series: pd.Series,
+        local_model_args: dict,
+    ) -> pd.DataFrame:
+        agg_functions = ["mean"] + [_get_quantile_function(q) for q in self.quantile_levels]
+        stats_marginal = time_series.agg(agg_functions)
+        stats_repeated = np.tile(stats_marginal.values, [self.prediction_length, 1])
+        return pd.DataFrame(stats_repeated, columns=stats_marginal.index)
+class SeasonalAverageModel(AbstractLocalModel):
+    """Baseline model that sets the forecast equal to the historic average or quantile in the same season.
+    Other Parameters
+    ----------------
+    seasonal_period : int or None, default = None
+        Number of time steps in a complete seasonal cycle for seasonal models. For example, 7 for daily data with a
+        weekly cycle or 12 for monthly data with an annual cycle.
+        When set to None, seasonal_period will be inferred from the frequency of the training data. Can also be
+        specified manually by providing an integer > 1.
+        If seasonal_period (inferred or provided) is equal to 1, will fall back to Naive forecast.
+        Seasonality will also be disabled, if the length of the time series is < seasonal_period.
+    n_jobs : int or float, default = 0.5
+        Number of CPU cores used to fit the models in parallel.
+        When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
+        When set to a positive integer, that many cores are used.
+        When set to -1, all CPU cores are used.
+    max_ts_length : Optional[int], default = None
+        If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
+        This significantly speeds up fitting and usually leads to no change in accuracy.
+    """
+    allowed_local_model_args = ["seasonal_period"]
+    default_max_ts_length = None
+    def _predict_with_local_model(
+        self,
+        time_series: pd.Series,
+        local_model_args: dict,
+    ) -> pd.DataFrame:
+        seasonal_period = local_model_args["seasonal_period"]
+        agg_functions = ["mean"] + [_get_quantile_function(q) for q in self.quantile_levels]
+        # Compute mean & quantiles for each season
+        ts_df = time_series.reset_index(drop=True).to_frame()
+        ts_df["season"] = ts_df.index % seasonal_period
+        stats_per_season = ts_df.groupby("season")[self.target].agg(agg_functions)
+        next_season = ts_df["season"].iloc[-1] + 1
+        season_in_forecast_horizon = np.arange(next_season, next_season + self.prediction_length) % seasonal_period
+        result = stats_per_season.reindex(season_in_forecast_horizon)
+        if np.any(result.isna().values):
+            # Use statistics over all timesteps to fill values for seasons that are missing from training data
+            stats_marginal = time_series.agg(agg_functions)
+            result = result.fillna(stats_marginal)
+        return result

autogluon.timeseries-0.8.3b20230819/src/autogluon/timeseries/models/local/npts.py ADDED Viewed

@@ -0,0 +1,59 @@
+import pandas as pd
+from autogluon.timeseries.models.local.abstract_local_model import AbstractLocalModel
+class NPTSModel(AbstractLocalModel):
+    """Non-Parametric Time Series Forecaster.
+    This models is especially well suited for forecasting sparse or intermittent time series with many zero values.
+    Based on `gluonts.model.npts.NPTSPredictor <https://ts.gluon.ai/stable/api/gluonts/gluonts.model.npts.html>`_.
+    See GluonTS documentation for more information about the model.
+    Other Parameters
+    ----------------
+    kernel_type : {"exponential", "uniform"}, default = "exponential"
+        Kernel used by the model.
+    exp_kernel_weights : float, default = 1.0
+        Scaling factor used in the exponential kernel.
+    use_seasonal_variant : bool, default = True
+        Whether to use the seasonal variant of the model.
+    n_jobs : int or float, default = 0.5
+        Number of CPU cores used to fit the models in parallel.
+        When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
+        When set to a positive integer, that many cores are used.
+        When set to -1, all CPU cores are used.
+    max_ts_length : Optional[int], default = 2500
+        If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
+        This significantly speeds up fitting and usually leads to no change in accuracy.
+    """
+    allowed_local_model_args = [
+        "kernel_type",
+        "exp_kernel_weights",
+        "use_seasonal_model",
+        "seasonal_period",
+    ]
+    def _predict_with_local_model(
+        self,
+        time_series: pd.Series,
+        local_model_args: dict,
+    ) -> pd.DataFrame:
+        from gluonts.model.npts import NPTSPredictor
+        local_model_args.pop("seasonal_period")
+        predictor = NPTSPredictor(
+            freq=self.freq,
+            prediction_length=self.prediction_length,
+            **local_model_args,
+        )
+        ts = time_series.copy(deep=False)
+        ts.index = ts.index.to_period()
+        forecast = predictor.predict_time_series(ts, num_samples=100)
+        forecast_dict = {"mean": forecast.mean}
+        for q in self.quantile_levels:
+            forecast_dict[str(q)] = forecast.quantile(q)
+        return pd.DataFrame(forecast_dict)

{autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/local/statsforecast.py RENAMED Viewed

@@ -255,8 +255,6 @@ class ThetaModel(AbstractStatsForecastModel):
         This significantly speeds up fitting and usually leads to no change in accuracy.
     """
-    max_ts_length = 3000
     allowed_local_model_args = [
         "decomposition_type",
         "seasonal_period",

autogluon.timeseries 0.8.3b20230817__tar.gz → 0.8.3b20230819__tar.gz

Potentially problematic release.

autogluon.timeseries 0.8.3b20230817tar.gz → 0.8.3b20230819tar.gz