PyPI - autogluon.timeseries - Versions diffs - 1.0.1b20240304__py3-none-any.whl → 1.4.1b20251210__py3-none-any.whl - Mend

autogluon.timeseries 1.0.1b20240304py3-none-any.whl → 1.4.1b20251210py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (108) hide show

autogluon/timeseries/models/gluonts/dataset.py ADDED Viewed

@@ -0,0 +1,109 @@
+from typing import Any, Iterator, Type
+import numpy as np
+import pandas as pd
+from gluonts.dataset.common import Dataset as GluonTSDataset
+from gluonts.dataset.field_names import FieldName
+from autogluon.timeseries.dataset import TimeSeriesDataFrame
+from autogluon.timeseries.utils.datetime import norm_freq_str
+class SimpleGluonTSDataset(GluonTSDataset):
+    """Wrapper for TimeSeriesDataFrame that is compatible with the GluonTS Dataset API."""
+    def __init__(
+        self,
+        target_df: TimeSeriesDataFrame,
+        freq: str,
+        target_column: str = "target",
+        feat_static_cat: np.ndarray | None = None,
+        feat_static_real: np.ndarray | None = None,
+        feat_dynamic_cat: np.ndarray | None = None,
+        feat_dynamic_real: np.ndarray | None = None,
+        past_feat_dynamic_cat: np.ndarray | None = None,
+        past_feat_dynamic_real: np.ndarray | None = None,
+        includes_future: bool = False,
+        prediction_length: int | None = None,
+    ):
+        assert target_df is not None
+        # Convert TimeSeriesDataFrame to pd.Series for faster processing
+        self.target_array = target_df[target_column].to_numpy(np.float32)
+        self.feat_static_cat = self._astype(feat_static_cat, dtype=np.int64)
+        self.feat_static_real = self._astype(feat_static_real, dtype=np.float32)
+        self.feat_dynamic_cat = self._astype(feat_dynamic_cat, dtype=np.int64)
+        self.feat_dynamic_real = self._astype(feat_dynamic_real, dtype=np.float32)
+        self.past_feat_dynamic_cat = self._astype(past_feat_dynamic_cat, dtype=np.int64)
+        self.past_feat_dynamic_real = self._astype(past_feat_dynamic_real, dtype=np.float32)
+        self.freq = self._get_freq_for_period(freq)
+        # Necessary to compute indptr for known_covariates at prediction time
+        self.includes_future = includes_future
+        self.prediction_length = prediction_length
+        # Replace inefficient groupby ITEMID with indptr that stores start:end of each time series
+        self.item_ids = target_df.item_ids
+        self.indptr = target_df.get_indptr()
+        self.start_timestamps = target_df.index[self.indptr[:-1]].to_frame(index=False)[TimeSeriesDataFrame.TIMESTAMP]
+        assert len(self.item_ids) == len(self.start_timestamps)
+    @staticmethod
+    def _astype(array: np.ndarray | None, dtype: Type[np.generic]) -> np.ndarray | None:
+        if array is None:
+            return None
+        else:
+            return array.astype(dtype)
+    @staticmethod
+    def _get_freq_for_period(freq: str) -> str:
+        """Convert freq to format compatible with pd.Period.
+        For example, ME freq must be converted to M when creating a pd.Period.
+        """
+        offset = pd.tseries.frequencies.to_offset(freq)
+        assert offset is not None
+        freq_name = norm_freq_str(offset)
+        if freq_name == "SME":
+            # Replace unsupported frequency "SME" with "2W"
+            return "2W"
+        elif freq_name == "bh":
+            # Replace unsupported frequency "bh" with dummy value "Y"
+            return "Y"
+        else:
+            freq_name_for_period = {"YE": "Y", "QE": "Q", "ME": "M"}.get(freq_name, freq_name)
+            return f"{offset.n}{freq_name_for_period}"
+    def __len__(self):
+        return len(self.indptr) - 1  # noqa
+    def __iter__(self) -> Iterator[dict[str, Any]]:
+        for j in range(len(self.indptr) - 1):
+            start_idx = self.indptr[j]
+            end_idx = self.indptr[j + 1]
+            # GluonTS expects item_id to be a string
+            ts = {
+                FieldName.ITEM_ID: str(self.item_ids[j]),
+                FieldName.START: pd.Period(self.start_timestamps.iloc[j], freq=self.freq),
+                FieldName.TARGET: self.target_array[start_idx:end_idx],
+            }
+            if self.feat_static_cat is not None:
+                ts[FieldName.FEAT_STATIC_CAT] = self.feat_static_cat[j]
+            if self.feat_static_real is not None:
+                ts[FieldName.FEAT_STATIC_REAL] = self.feat_static_real[j]
+            if self.past_feat_dynamic_cat is not None:
+                ts[FieldName.PAST_FEAT_DYNAMIC_CAT] = self.past_feat_dynamic_cat[start_idx:end_idx].T
+            if self.past_feat_dynamic_real is not None:
+                ts[FieldName.PAST_FEAT_DYNAMIC_REAL] = self.past_feat_dynamic_real[start_idx:end_idx].T
+            # Dynamic features that may extend into the future
+            if self.includes_future:
+                assert self.prediction_length is not None, (
+                    "Prediction length must be provided if includes_future is True"
+                )
+                start_idx = start_idx + j * self.prediction_length
+                end_idx = end_idx + (j + 1) * self.prediction_length
+            if self.feat_dynamic_cat is not None:
+                ts[FieldName.FEAT_DYNAMIC_CAT] = self.feat_dynamic_cat[start_idx:end_idx].T
+            if self.feat_dynamic_real is not None:
+                ts[FieldName.FEAT_DYNAMIC_REAL] = self.feat_dynamic_real[start_idx:end_idx].T
+            yield ts

autogluon/timeseries/models/gluonts/{torch/models.py → models.py} RENAMED Viewed

@@ -3,28 +3,21 @@ Module including wrappers for PyTorch implementations of models in GluonTS
 """
 import logging
-from typing import Any, Dict, Type
+from typing import Any, Type
 from gluonts.model.estimator import Estimator as GluonTSEstimator
-from autogluon.timeseries.models.gluonts.abstract_gluonts import AbstractGluonTSModel
 from autogluon.timeseries.utils.datetime import (
     get_lags_for_frequency,
     get_seasonality,
     get_time_features_for_frequency,
 )
+from .abstract import AbstractGluonTSModel
 # NOTE: We avoid imports for torch and lightning.pytorch at the top level and hide them inside class methods.
 # This is done to skip these imports during multiprocessing (which may cause bugs)
-# FIXME: introduces cpflows dependency. We exclude this model until a future release.
-# from gluonts.torch.model.mqf2 import MQF2MultiHorizonEstimator
-# FIXME: DeepNPTS does not implement the GluonTS PyTorch API, and does not use
-# PyTorch Lightning. We exclude this model until a future release.
-# from gluonts.torch.model.deep_npts import DeepNPTSEstimator
 logger = logging.getLogger(__name__)
@@ -61,10 +54,13 @@ class DeepARModel(AbstractGluonTSModel):
     embedding_dimension : int, optional
         Dimension of the embeddings for categorical features
         (if None, defaults to [min(50, (cat+1)//2) for cat in cardinality])
-    distr_output : gluonts.torch.distributions.DistributionOutput, default = StudentTOutput()
-        Distribution to use to evaluate observations and sample predictions
+    max_cat_cardinality : int, default = 100
+        Maximum number of dimensions to use when one-hot-encoding categorical known_covariates.
+    distr_output : gluonts.torch.distributions.Output, default = StudentTOutput()
+        Distribution output object that defines how the model output is converted to a forecast, and how the loss is computed.
     scaling: bool, default = True
-        Whether to automatically scale the target values
+        If True, mean absolute scaling will be applied to each *context window* during training & prediction.
+        Note that this is different from the ``target_scaler`` that is applied to the *entire time series*.
     max_epochs : int, default = 100
         Number of epochs the model will be trained for
     batch_size : int, default = 64
@@ -83,20 +79,25 @@ class DeepARModel(AbstractGluonTSModel):
         If True, ``lightning_logs`` directory will NOT be removed after the model finished training.
     """
-    supports_known_covariates = True
+    # TODO: Replace "scaling: bool" with "window_scaler": {"mean_abs", None} for consistency?
+    ag_priority = 40
+    _supports_known_covariates = True
+    _supports_static_features = True
     def _get_estimator_class(self) -> Type[GluonTSEstimator]:
         from gluonts.torch.model.deepar import DeepAREstimator
         return DeepAREstimator
-    def _get_estimator_init_args(self) -> Dict[str, Any]:
+    def _get_estimator_init_args(self) -> dict[str, Any]:
         init_kwargs = super()._get_estimator_init_args()
         init_kwargs["num_feat_static_cat"] = self.num_feat_static_cat
         init_kwargs["num_feat_static_real"] = self.num_feat_static_real
         init_kwargs["cardinality"] = self.feat_static_cat_cardinality
         init_kwargs["num_feat_dynamic_real"] = self.num_feat_dynamic_real
-        init_kwargs.setdefault("lags_seq", get_lags_for_frequency(self.freq))
+        init_kwargs.setdefault("lags_seq", get_lags_for_frequency(self.freq))  # type: ignore
         init_kwargs.setdefault("time_features", get_time_features_for_frequency(self.freq))
         return init_kwargs
@@ -112,14 +113,15 @@ class SimpleFeedForwardModel(AbstractGluonTSModel):
     ----------------
     context_length : int, default = max(10, 2 * prediction_length)
         Number of time units that condition the predictions
-    hidden_dimensions: List[int], default = [20, 20]
+    hidden_dimensions: list[int], default = [20, 20]
         Size of hidden layers in the feedforward network
-    distr_output : gluonts.torch.distributions.DistributionOutput, default = StudentTOutput()
-        Distribution to fit.
+    distr_output : gluonts.torch.distributions.Output, default = StudentTOutput()
+        Distribution output object that defines how the model output is converted to a forecast, and how the loss is computed.
     batch_normalization : bool, default = False
         Whether to use batch normalization
     mean_scaling : bool, default = True
-        Scale the network input by the data mean and the network output by its inverse
+        If True, mean absolute scaling will be applied to each *context window* during training & prediction.
+        Note that this is different from the ``target_scaler`` that is applied to the *entire time series*.
     max_epochs : int, default = 100
         Number of epochs the model will be trained for
     batch_size : int, default = 64
@@ -138,6 +140,8 @@ class SimpleFeedForwardModel(AbstractGluonTSModel):
         If True, ``lightning_logs`` directory will NOT be removed after the model finished training.
     """
+    ag_priority = 10
     def _get_estimator_class(self) -> Type[GluonTSEstimator]:
         from gluonts.torch.model.simple_feedforward import SimpleFeedForwardEstimator
@@ -162,6 +166,8 @@ class TemporalFusionTransformerModel(AbstractGluonTSModel):
     ----------------
     context_length : int, default = max(64, 2 * prediction_length)
         Number of past values used for prediction.
+    distr_output : gluonts.torch.distributions.Output, default = QuantileOutput()
+        Distribution output object that defines how the model output is converted to a forecast, and how the loss is computed.
     disable_static_features : bool, default = False
         If True, static features won't be used by the model even if they are present in the dataset.
         If False, static features will be used by the model if they are present in the dataset.
@@ -197,19 +203,25 @@ class TemporalFusionTransformerModel(AbstractGluonTSModel):
         If True, ``lightning_logs`` directory will NOT be removed after the model finished training.
     """
-    supports_known_covariates = True
-    supports_past_covariates = True
+    ag_priority = 45
+    ag_model_aliases = ["TFT"]
-    @property
-    def default_context_length(self) -> int:
-        return min(512, max(64, 2 * self.prediction_length))
+    _supports_known_covariates = True
+    _supports_past_covariates = True
+    _supports_cat_covariates = True
+    _supports_static_features = True
     def _get_estimator_class(self) -> Type[GluonTSEstimator]:
         from gluonts.torch.model.tft import TemporalFusionTransformerEstimator
         return TemporalFusionTransformerEstimator
-    def _get_estimator_init_args(self) -> Dict[str, Any]:
+    def _get_default_hyperparameters(self):
+        return super()._get_default_hyperparameters() | {
+            "context_length": min(512, max(64, 2 * self.prediction_length)),
+        }
+    def _get_estimator_init_args(self) -> dict[str, Any]:
         init_kwargs = super()._get_estimator_init_args()
         if self.num_feat_dynamic_real > 0:
             init_kwargs["dynamic_dims"] = [self.num_feat_dynamic_real]
@@ -219,7 +231,16 @@ class TemporalFusionTransformerModel(AbstractGluonTSModel):
             init_kwargs["static_dims"] = [self.num_feat_static_real]
         if len(self.feat_static_cat_cardinality):
             init_kwargs["static_cardinalities"] = self.feat_static_cat_cardinality
+        if len(self.feat_dynamic_cat_cardinality):
+            init_kwargs["dynamic_cardinalities"] = self.feat_dynamic_cat_cardinality
+        if len(self.past_feat_dynamic_cat_cardinality):
+            init_kwargs["past_dynamic_cardinalities"] = self.past_feat_dynamic_cat_cardinality
         init_kwargs.setdefault("time_features", get_time_features_for_frequency(self.freq))
+        # 'distr_output' and 'quantiles' shouldn't be included at the same time (otherwise an exception will be raised)
+        if "distr_output" in init_kwargs:
+            init_kwargs.pop("quantiles", None)
         return init_kwargs
@@ -241,10 +262,13 @@ class DLinearModel(AbstractGluonTSModel):
         Number of time units that condition the predictions
     hidden_dimension: int, default = 20
         Size of hidden layers in the feedforward network
-    distr_output : gluonts.torch.distributions.DistributionOutput, default = StudentTOutput()
-        Distribution to fit.
+    distr_output : gluonts.torch.distributions.Output, default = StudentTOutput()
+        Distribution output object that defines how the model output is converted to a forecast, and how the loss is computed.
     scaling : {"mean", "std", None}, default = "mean"
-        Scaling applied to the inputs. One of ``"mean"`` (mean absolute scaling), ``"std"`` (standardization), ``None`` (no scaling).
+        Scaling applied to each *context window* during training & prediction.
+        One of ``"mean"`` (mean absolute scaling), ``"std"`` (standardization), ``None`` (no scaling).
+        Note that this is different from the ``target_scaler`` that is applied to the *entire time series*.
     max_epochs : int, default = 100
         Number of epochs the model will be trained for
     batch_size : int, default = 64
@@ -265,9 +289,12 @@ class DLinearModel(AbstractGluonTSModel):
         If True, ``lightning_logs`` directory will NOT be removed after the model finished training.
     """
-    @property
-    def default_context_length(self) -> int:
-        return 96
+    ag_priority = 10
+    def _get_default_hyperparameters(self):
+        return super()._get_default_hyperparameters() | {
+            "context_length": 96,
+        }
     def _get_estimator_class(self) -> Type[GluonTSEstimator]:
         from gluonts.torch.model.d_linear import DLinearEstimator
@@ -301,10 +328,13 @@ class PatchTSTModel(AbstractGluonTSModel):
         Number of attention heads in the Transformer encoder which must divide d_model.
     num_encoder_layers : int, default = 2
         Number of layers in the Transformer encoder.
-    distr_output : gluonts.torch.distributions.DistributionOutput, default = StudentTOutput()
-        Distribution to fit.
+    distr_output : gluonts.torch.distributions.Output, default = StudentTOutput()
+        Distribution output object that defines how the model output is converted to a forecast, and how the loss is computed.
     scaling : {"mean", "std", None}, default = "mean"
-        Scaling applied to the inputs. One of ``"mean"`` (mean absolute scaling), ``"std"`` (standardization), ``None`` (no scaling).
+        Scaling applied to each *context window* during training & prediction.
+        One of ``"mean"`` (mean absolute scaling), ``"std"`` (standardization), ``None`` (no scaling).
+        Note that this is different from the ``target_scaler`` that is applied to the *entire time series*.
     max_epochs : int, default = 100
         Number of epochs the model will be trained for
     batch_size : int, default = 64
@@ -319,18 +349,21 @@ class PatchTSTModel(AbstractGluonTSModel):
         If True, ``lightning_logs`` directory will NOT be removed after the model finished training.
     """
-    @property
-    def default_context_length(self) -> int:
-        return 96
+    ag_priority = 30
+    _supports_known_covariates = True
     def _get_estimator_class(self) -> Type[GluonTSEstimator]:
         from gluonts.torch.model.patch_tst import PatchTSTEstimator
         return PatchTSTEstimator
-    def _get_estimator_init_args(self) -> Dict[str, Any]:
+    def _get_default_hyperparameters(self):
+        return super()._get_default_hyperparameters() | {"context_length": 96, "patch_len": 16}
+    def _get_estimator_init_args(self) -> dict[str, Any]:
         init_kwargs = super()._get_estimator_init_args()
-        init_kwargs.setdefault("patch_len", 16)
+        init_kwargs["num_feat_dynamic_real"] = self.num_feat_dynamic_real
         return init_kwargs
@@ -372,6 +405,8 @@ class WaveNetModel(AbstractGluonTSModel):
         If True, logarithm of the scale of the past data will be used as an additional static feature.
     negative_data : bool, default = True
         Flag indicating whether the time series take negative values.
+    max_cat_cardinality : int, default = 100
+        Maximum number of dimensions to use when one-hot-encoding categorical known_covariates.
     max_epochs : int, default = 100
         Number of epochs the model will be trained for
     batch_size : int, default = 64
@@ -392,7 +427,10 @@ class WaveNetModel(AbstractGluonTSModel):
         If True, ``lightning_logs`` directory will NOT be removed after the model finished training.
     """
-    supports_known_covariates = True
+    ag_priority = 25
+    _supports_known_covariates = True
+    _supports_static_features = True
     default_num_samples: int = 100
     def _get_estimator_class(self) -> Type[GluonTSEstimator]:
@@ -400,7 +438,7 @@ class WaveNetModel(AbstractGluonTSModel):
         return WaveNetEstimator
-    def _get_estimator_init_args(self) -> Dict[str, Any]:
+    def _get_estimator_init_args(self) -> dict[str, Any]:
         init_kwargs = super()._get_estimator_init_args()
         init_kwargs["num_feat_static_cat"] = self.num_feat_static_cat
         init_kwargs["num_feat_static_real"] = self.num_feat_static_real
@@ -410,6 +448,109 @@ class WaveNetModel(AbstractGluonTSModel):
         init_kwargs.setdefault("seasonality", get_seasonality(self.freq))
         init_kwargs.setdefault("time_features", get_time_features_for_frequency(self.freq))
         init_kwargs.setdefault("num_parallel_samples", self.default_num_samples)
-        # WaveNet model fails if an unsupported frequency such as "SM" is provided. We provide a dummy freq instead
-        init_kwargs["freq"] = "H"
+        return init_kwargs
+class TiDEModel(AbstractGluonTSModel):
+    """Time series dense encoder model from [Das2023]_.
+    Based on `gluonts.torch.model.tide.TiDEEstimator <https://ts.gluon.ai/stable/api/gluonts/gluonts.torch.model.tide.html>`_.
+    See GluonTS documentation for additional hyperparameters.
+    References
+    ----------
+    .. [Das2023] Das, Abhimanyu, et al.
+        "Long-term Forecasting with TiDE: Time-series Dense Encoder."
+        Transactions of Machine Learning Research. 2023.
+    Other Parameters
+    ----------------
+    context_length : int, default = max(64, 2 * prediction_length)
+        Number of past values used for prediction.
+    disable_static_features : bool, default = False
+        If True, static features won't be used by the model even if they are present in the dataset.
+        If False, static features will be used by the model if they are present in the dataset.
+    disable_known_covariates : bool, default = False
+        If True, known covariates won't be used by the model even if they are present in the dataset.
+        If False, known covariates will be used by the model if they are present in the dataset.
+    feat_proj_hidden_dim : int, default = 4
+        Size of the feature projection layer.
+    encoder_hidden_dim : int, default = 64
+        Size of the dense encoder layer.
+    decoder_hidden_dim : int, default = 64
+        Size of the dense decoder layer.
+    temporal_hidden_dim : int, default = 64
+        Size of the temporal decoder layer.
+    distr_hidden_dim : int, default = 64
+        Size of the distribution projection layer.
+    num_layers_encoder : int, default = 2
+        Number of layers in dense encoder.
+    num_layers_decoder : int, default = 2
+        Number of layers in dense decoder.
+    decoder_output_dim : int, default = 16
+        Output size of the dense decoder.
+    dropout_rate : float, default = 0.2
+        Dropout regularization parameter.
+    num_feat_dynamic_proj : int, default = 2
+        Output size of feature projection layer.
+    embedding_dimension : int, default = [16] * num_feat_static_cat
+        Dimension of the embeddings for categorical features
+    layer_norm : bool, default = True
+        Should layer normalization be enabled?
+    scaling : {"mean", "std", None}, default = "mean"
+        Scaling applied to each *context window* during training & prediction.
+        One of ``"mean"`` (mean absolute scaling), ``"std"`` (standardization), ``None`` (no scaling).
+        Note that this is different from the ``target_scaler`` that is applied to the *entire time series*.
+    max_epochs : int, default = 100
+        Number of epochs the model will be trained for
+    batch_size : int, default = 256
+        Size of batches used during training
+    predict_batch_size : int, default = 500
+        Size of batches used during prediction.
+    num_batches_per_epoch : int, default = 50
+        Number of batches processed every epoch
+    lr : float, default = 1e-4,
+        Learning rate used during training
+    trainer_kwargs : dict, optional
+        Optional keyword arguments passed to ``lightning.Trainer``.
+    early_stopping_patience : int or None, default = 20
+        Early stop training if the validation loss doesn't improve for this many epochs.
+    keep_lightning_logs : bool, default = False
+        If True, ``lightning_logs`` directory will NOT be removed after the model finished training.
+    """
+    ag_priority = 30
+    _supports_known_covariates = True
+    _supports_static_features = True
+    def _get_estimator_class(self) -> Type[GluonTSEstimator]:
+        from gluonts.torch.model.tide import TiDEEstimator
+        return TiDEEstimator
+    def _get_default_hyperparameters(self):
+        return super()._get_default_hyperparameters() | {
+            "context_length": min(512, max(64, 2 * self.prediction_length)),
+            "encoder_hidden_dim": 64,
+            "decoder_hidden_dim": 64,
+            "temporal_hidden_dim": 64,
+            "distr_hidden_dim": 64,
+            "num_layers_encoder": 2,
+            "num_layers_decoder": 2,
+            "decoder_output_dim": 16,
+            "dropout_rate": 0.2,
+            "layer_norm": True,
+            "lr": 1e-4,
+            "batch_size": 256,
+        }
+    def _get_estimator_init_args(self) -> dict[str, Any]:
+        init_kwargs = super()._get_estimator_init_args()
+        init_kwargs["num_feat_static_cat"] = self.num_feat_static_cat
+        init_kwargs["num_feat_static_real"] = self.num_feat_static_real
+        init_kwargs["cardinality"] = self.feat_static_cat_cardinality
+        init_kwargs["num_feat_dynamic_real"] = self.num_feat_dynamic_real
         return init_kwargs

autogluon/timeseries/models/local/__init__.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import joblib.externals.loky
 from .naive import AverageModel, NaiveModel, SeasonalAverageModel, SeasonalNaiveModel
 from .npts import NPTSModel
 from .statsforecast import (
@@ -8,17 +6,10 @@ from .statsforecast import (
     AutoARIMAModel,
     AutoCESModel,
     AutoETSModel,
-    CrostonClassicModel,
-    CrostonOptimizedModel,
-    CrostonSBAModel,
+    CrostonModel,
     DynamicOptimizedThetaModel,
     ETSModel,
     IMAPAModel,
     ThetaModel,
     ZeroModel,
 )
-# By default, joblib w/ loky backend kills processes that take >300MB of RAM assuming that this is caused by a memory
-# leak. This leads to problems for some memory-hungry models like AutoARIMA/Theta.
-# This monkey patch removes this undesired behavior
-joblib.externals.loky.process_executor._MAX_MEMORY_LEAK_SIZE = int(3e10)

autogluon.timeseries 1.0.1b20240304__py3-none-any.whl → 1.4.1b20251210__py3-none-any.whl

Potentially problematic release.

autogluon.timeseries 1.0.1b20240304py3-none-any.whl → 1.4.1b20251210py3-none-any.whl