PyPI - autogluon.timeseries - Versions diffs - 1.0.1b20240327__tar.gz → 1.0.1b20240403__tar.gz - Mend

autogluon.timeseries 1.0.1b20240327tar.gz → 1.0.1b20240403tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (62) hide show

{autogluon.timeseries-1.0.1b20240327 → autogluon.timeseries-1.0.1b20240403}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: autogluon.timeseries
-Version: 1.0.1b20240327
+Version: 1.0.1b20240403
 Summary: AutoML for Image, Text, and Tabular Data
 Home-page: https://github.com/autogluon/autogluon
 Author: AutoGluon Community
@@ -26,7 +26,7 @@ Description:
         [![Continuous Integration](https://github.com/autogluon/autogluon/actions/workflows/continuous_integration.yml/badge.svg)](https://github.com/autogluon/autogluon/actions/workflows/continuous_integration.yml)
         [![Platform Tests](https://github.com/autogluon/autogluon/actions/workflows/platform_tests-command.yml/badge.svg?event=schedule)](https://github.com/autogluon/autogluon/actions/workflows/platform_tests-command.yml)
-        [Install Instructions](https://auto.gluon.ai/stable/install.html) | [Documentation](https://auto.gluon.ai/stable/index.html) | [Release Notes](https://auto.gluon.ai/stable/whats_new/index.html)
+        [Installation](https://auto.gluon.ai/stable/install.html) | [Documentation](https://auto.gluon.ai/stable/index.html) | [Release Notes](https://auto.gluon.ai/stable/whats_new/index.html)
         AutoGluon automates machine learning tasks enabling you to easily achieve strong predictive performance in your applications.  With just a few lines of code, you can train and deploy high-accuracy machine learning and deep learning models on image, text, time series, and tabular data.
         </div>

{autogluon.timeseries-1.0.1b20240327 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/dataset/ts_dataframe.py RENAMED Viewed

@@ -765,11 +765,19 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
                 "(for example, using the `convert_frequency` method)."
             )
-        grouped_df = pd.DataFrame(self).groupby(level=ITEMID, sort=False, group_keys=False)
+        # Convert to pd.DataFrame for faster processing
+        df = pd.DataFrame(self)
+        # Skip filling if there are no NaNs
+        if not df.isna().any(axis=None):
+            return self
+        grouped_df = df.groupby(level=ITEMID, sort=False, group_keys=False)
         if method == "auto":
             filled_df = grouped_df.ffill()
-            # Fill missing values at the start of each time series with bfill
-            filled_df = filled_df.groupby(level=ITEMID, sort=False, group_keys=False).bfill()
+            # If necessary, fill missing values at the start of each time series with bfill
+            if filled_df.isna().any(axis=None):
+                filled_df = filled_df.groupby(level=ITEMID, sort=False, group_keys=False).bfill()
         elif method in ["ffill", "pad"]:
             filled_df = grouped_df.ffill()
         elif method in ["bfill", "backfill"]:

{autogluon.timeseries-1.0.1b20240327 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/learner.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import logging
 import reprlib
 import time
-from typing import Any, Dict, List, Optional, Type, Union
+from typing import Any, Dict, List, Literal, Optional, Type, Union
 import pandas as pd
@@ -228,5 +228,32 @@ class TimeSeriesLearner(AbstractLearner):
         learner_info.pop("random_state", None)
         return learner_info
+    def persist_trainer(
+        self, models: Union[Literal["all", "best"], List[str]] = "all", with_ancestors: bool = False
+    ) -> List[str]:
+        """Loads models and trainer in memory so that they don't have to be
+        loaded during predictions
+        Returns
+        -------
+        list_of_models : List[str]
+            List of models persisted in memory
+        """
+        self.trainer = self.load_trainer()
+        return self.trainer.persist(models, with_ancestors=with_ancestors)
+    def unpersist_trainer(self) -> List[str]:
+        """Unloads models and trainer from memory. Models will have to be reloaded from disk
+        when predicting.
+        Returns
+        -------
+        list_of_models : List[str]
+            List of models removed from memory
+        """
+        unpersisted_models = self.load_trainer().unpersist()
+        self.trainer = None
+        return unpersisted_models
     def refit_full(self, model: str = "all") -> Dict[str, str]:
         return self.load_trainer().refit_full(model=model)

{autogluon.timeseries-1.0.1b20240327 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/abstract/abstract_timeseries_model.py RENAMED Viewed

@@ -201,7 +201,9 @@ class AbstractTimeSeriesModel(AbstractModel):
         }
         return info
-    def fit(self, **kwargs) -> "AbstractTimeSeriesModel":
+    def fit(
+        self, train_data: TimeSeriesDataFrame, val_data: Optional[TimeSeriesDataFrame] = None, **kwargs
+    ) -> "AbstractTimeSeriesModel":
         """Fit timeseries model.
         Models should not override the `fit` method, but instead override the `_fit` method which
@@ -235,7 +237,10 @@ class AbstractTimeSeriesModel(AbstractModel):
         model: AbstractTimeSeriesModel
             The fitted model object
         """
-        return super().fit(**kwargs)
+        train_data = self.preprocess(train_data, is_train=True)
+        if self._get_tags()["can_use_val_data"] and val_data is not None:
+            val_data = self.preprocess(val_data, is_train=False)
+        return super().fit(train_data=train_data, val_data=val_data, **kwargs)
     def _fit(
         self,
@@ -290,6 +295,7 @@ class AbstractTimeSeriesModel(AbstractModel):
             data is given as a separate forecast item in the dictionary, keyed by the `item_id`s
             of input items.
         """
+        data = self.preprocess(data, is_train=False)
         predictions = self._predict(data=data, known_covariates=known_covariates, **kwargs)
         logger.debug(f"Predicting with model {self.name}")
         # "0.5" might be missing from the quantiles if self is a wrapper (MultiWindowBacktestingModel or ensemble)
@@ -415,6 +421,13 @@ class AbstractTimeSeriesModel(AbstractModel):
         hpo_executor.register_resources(self, k_fold=1, **kwargs)
         return self._hyperparameter_tune(hpo_executor=hpo_executor, **kwargs)
+    def persist(self) -> "AbstractTimeSeriesModel":
+        """Ask the model to persist its assets in memory, i.e., to predict with low latency. In practice
+        this is used for pretrained models that have to lazy-load model parameters to device memory at
+        prediction time.
+        """
+        return self
     def _hyperparameter_tune(
         self,
         train_data: TimeSeriesDataFrame,
@@ -481,7 +494,7 @@ class AbstractTimeSeriesModel(AbstractModel):
         return hpo_models, analysis
-    def preprocess(self, data: Any, **kwargs) -> Any:
+    def preprocess(self, data: TimeSeriesDataFrame, is_train: bool = False, **kwargs) -> Any:
         return data
     def get_memory_size(self, **kwargs) -> Optional[int]:
@@ -499,3 +512,20 @@ class AbstractTimeSeriesModel(AbstractModel):
             return {}
         else:
             return self._user_params.copy()
+    def _more_tags(self) -> dict:
+        """Encode model properties using tags, similar to sklearn & autogluon.tabular.
+        For more details, see `autogluon.core.models.abstract.AbstractModel._get_tags()` and https://scikit-learn.org/stable/_sources/developers/develop.rst.txt.
+        List of currently supported tags:
+        - allow_nan: Can the model handle data with missing values represented by np.nan?
+        - can_refit_full: Does it make sense to retrain the model without validation data?
+            See `autogluon.core.models.abstract._tags._DEFAULT_TAGS` for more details.
+        - can_use_val_data: Can model use val_data if it's provided to model.fit()?
+        """
+        return {
+            "allow_nan": False,
+            "can_refit_full": False,
+            "can_use_val_data": False,
+        }

{autogluon.timeseries-1.0.1b20240327 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/autogluon_tabular/mlforecast.py RENAMED Viewed

@@ -85,6 +85,21 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
         self._scaler: Optional[BaseTargetTransform] = None
         self._residuals_std_per_item: Optional[pd.Series] = None
         self._avg_residuals_std: Optional[float] = None
+        self._train_target_median: Optional[float] = None
+    def preprocess(self, data: TimeSeriesDataFrame, is_train: bool = False, **kwargs) -> Any:
+        if is_train:
+            # All-NaN series are removed; partially-NaN series in train_data are handled inside _generate_train_val_dfs
+            all_nan_items = data.item_ids[data[self.target].isna().groupby(ITEMID, sort=False).all()]
+            if len(all_nan_items):
+                data = data.query("item_id not in @all_nan_items")
+            return data
+        else:
+            data = data.fill_missing_values()
+            # Fill time series consisting of all NaNs with the median of target in train_data
+            if data.isna().any(axis=None):
+                data[self.target] = data[self.target].fillna(value=self._train_target_median)
+            return data
     def _get_extra_tabular_init_kwargs(self) -> dict:
         raise NotImplementedError
@@ -98,8 +113,6 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
         return model_params
     def _get_mlforecast_init_args(self, train_data: TimeSeriesDataFrame, model_params: dict) -> dict:
-        # TODO: Support lag generation for all pandas frequencies
-        # TODO: Support date_feature generation for all pandas frequencies
         from mlforecast.target_transforms import Differences
         from .utils import MeanAbsScaler, StandardScaler
@@ -181,6 +194,10 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
             items_to_keep = data.item_ids.to_series().sample(n=int(max_num_items))  # noqa: F841
             data = data.query("item_id in @items_to_keep")
+        # MLForecast.preprocess does not support missing values, but we will exclude them later from the training set
+        missing_entries = data.index[data[self.target].isna()]
+        data = data.fill_missing_values()
         num_items = data.num_items
         mlforecast_df = self._to_mlforecast_df(data, data.static_features)
@@ -197,6 +214,10 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
         df = self._mask_df(df)
+        # We remove originally missing values filled via imputation from the training set
+        if len(missing_entries):
+            df = df.set_index(["unique_id", "ds"]).drop(missing_entries, errors="ignore").reset_index()
         if max_num_samples is not None and len(df) > max_num_samples:
             df = df.sample(n=max_num_samples)
@@ -246,6 +267,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
         self._check_fit_params()
         fit_start_time = time.time()
+        self._train_target_median = train_data[self.target].median()
         # TabularEstimator is passed to MLForecast later to include tuning_data
         model_params = self._get_model_params()
@@ -355,7 +377,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
         return predictions
     def _more_tags(self) -> dict:
-        return {"can_refit_full": True}
+        return {"allow_nan": True, "can_refit_full": True}
 class DirectTabularModel(AbstractMLForecastModel):

{autogluon.timeseries-1.0.1b20240327 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/chronos/model.py RENAMED Viewed

@@ -18,11 +18,29 @@ logger = logging.getLogger(__name__)
 MODEL_CONFIGS = {
     "amazon/chronos-t5-tiny": {
         "num_gpus": 0,  # minimum number of required GPUs
+        "default_torch_dtype": "auto",
+        "default_batch_size": 16,
+    },
+    "amazon/chronos-t5-mini": {
+        "num_gpus": 0,
+        "default_torch_dtype": "auto",
+        "default_batch_size": 16,
+    },
+    "amazon/chronos-t5-small": {
+        "num_gpus": 1,
+        "default_torch_dtype": "bfloat16",
+        "default_batch_size": 16,
+    },
+    "amazon/chronos-t5-base": {
+        "num_gpus": 1,
+        "default_torch_dtype": "bfloat16",
+        "default_batch_size": 16,
+    },
+    "amazon/chronos-t5-large": {
+        "num_gpus": 1,
+        "default_torch_dtype": "bfloat16",
+        "default_batch_size": 8,
     },
-    "amazon/chronos-t5-mini": {"num_gpus": 0},
-    "amazon/chronos-t5-small": {"num_gpus": 1},
-    "amazon/chronos-t5-base": {"num_gpus": 1},
-    "amazon/chronos-t5-large": {"num_gpus": 1},
 }
@@ -124,7 +142,6 @@ class ChronosModel(AbstractTimeSeriesModel):
     # default number of samples for prediction
     default_num_samples: int = 20
-    default_batch_size: int = 16
     default_model_path = "amazon/chronos-t5-small"
     maximum_context_length = 512
@@ -149,7 +166,7 @@ class ChronosModel(AbstractTimeSeriesModel):
         self.device = hyperparameters.get("device")
         # if the model requires a GPU, set the torch dtype to bfloat16
-        self.torch_dtype = hyperparameters.get("torch_dtype", "auto" if self.min_num_gpus == 0 else "bfloat16")
+        self.torch_dtype = hyperparameters.get("torch_dtype", self.default_torch_dtype)
         self.data_loader_num_workers = hyperparameters.get("data_loader_num_workers", 0)
         self.optimization_strategy: Optional[Literal["onnx", "openvino"]] = hyperparameters.get(
@@ -200,8 +217,32 @@ class ChronosModel(AbstractTimeSeriesModel):
         return torch.cuda.is_available()
     @property
-    def min_num_gpus(self):
-        return MODEL_CONFIGS.get(self.model_path, {}).get("num_gpus", 0)
+    def ag_default_config(self) -> Dict[str, Any]:
+        """The default configuration of the model used by AutoGluon if the model is one of those
+        defined in MODEL_CONFIGS. For now, these are ``amazon/chronos-t5-*`` family of models.
+        """
+        return MODEL_CONFIGS.get(self.model_path, {})
+    @property
+    def min_num_gpus(self) -> int:
+        """Minimum number of GPUs required for the model. For models not defined in AutoGluon,
+        this value defaults to 0.
+        """
+        return self.ag_default_config.get("num_gpus", 0)
+    @property
+    def default_batch_size(self) -> int:
+        """Default batch size used for the model. For models not defined in AutoGluon, this value
+        defaults to 8.
+        """
+        return self.ag_default_config.get("default_batch_size", 8)
+    @property
+    def default_torch_dtype(self) -> Any:
+        """Default torch data type used for the model. For models not defined in AutoGluon, this value
+        defaults to "auto".
+        """
+        return self.ag_default_config.get("default_torch_dtype", "auto")
     def get_minimum_resources(self, is_gpu_available: bool = False) -> Dict[str, Union[int, float]]:
         minimum_resources = {"num_cpus": 1}
@@ -211,7 +252,7 @@ class ChronosModel(AbstractTimeSeriesModel):
         return minimum_resources
     def load_model_pipeline(self, context_length: Optional[int] = None):
-        from .chronos import OptimizedChronosPipeline
+        from .pipeline import OptimizedChronosPipeline
         gpu_available = self._is_gpu_available()
@@ -234,6 +275,10 @@ class ChronosModel(AbstractTimeSeriesModel):
         self.model_pipeline = pipeline
+    def persist(self) -> "ChronosModel":
+        self.load_model_pipeline(context_length=self.context_length or self.maximum_context_length)
+        return self
     def _fit(
         self,
         train_data: TimeSeriesDataFrame,
@@ -283,8 +328,9 @@ class ChronosModel(AbstractTimeSeriesModel):
         with warning_filter(all_warnings=True):
             import torch
-            # load model pipeline to device memory
-            self.load_model_pipeline(context_length=context_length)
+            if self.model_pipeline is None:
+                # load model pipeline to device memory
+                self.load_model_pipeline(context_length=context_length)
             self.model_pipeline.model.eval()
             with torch.inference_mode():
@@ -317,3 +363,6 @@ class ChronosModel(AbstractTimeSeriesModel):
         )
         return TimeSeriesDataFrame(df)
+    def _more_tags(self) -> Dict:
+        return {"allow_nan": True}

autogluon.timeseries-1.0.1b20240327/src/autogluon/timeseries/models/chronos/chronos.py → autogluon.timeseries-1.0.1b20240403/src/autogluon/timeseries/models/chronos/pipeline.py RENAMED Viewed

@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # Original Source: https://github.com/amazon-science/chronos-forecasting
-# Author: Lorenzo Stella <stellalo@amazon.com>
+# Authors: Lorenzo Stella <stellalo@amazon.com>, Abdul Fatir Ansari <ansarnd@amazon.com>
 import logging
 import warnings
@@ -18,6 +18,9 @@ from autogluon.timeseries.utils.warning_filters import set_loggers_level
 logger = logging.getLogger(__name__)
+__all__ = ["ChronosConfig", "ChronosPipeline", "OptimizedChronosPipeline"]
 @dataclass
 class ChronosConfig:
     """
@@ -81,14 +84,14 @@ class ChronosTokenizer:
             A boolean tensor, same shape as ``token_ids``, indicating
             which input observations are not ``torch.nan`` (i.e. not
             missing nor padding).
-        decoding_context
+        tokenizer_state
             An object that will be passed to ``output_transform``.
             Contains the relevant context to decode output samples into
             real values, such as location and scale parameters.
         """
         raise NotImplementedError()
-    def output_transform(self, samples: torch.Tensor, decoding_context: Any) -> torch.Tensor:
+    def output_transform(self, samples: torch.Tensor, tokenizer_state: Any) -> torch.Tensor:
         """
         Turn a batch of sample token IDs into real values.
@@ -97,7 +100,7 @@ class ChronosTokenizer:
         samples
             A tensor of integers, shaped (batch_size, num_samples, time_length),
             containing token IDs of sample trajectories.
-        decoding_context
+        tokenizer_state
             An object returned by ``input_transform`` containing
             relevant context to decode samples, such as location and scale.
             The nature of this depends on the specific tokenizer.
@@ -132,13 +135,6 @@ class MeanScaleUniformBins(ChronosTokenizer):
         if length > self.config.context_length:
             context = context[..., -self.config.context_length :]
-        elif length < self.config.context_length:
-            padding_size = (
-                *context.shape[:-1],
-                self.config.context_length - length,
-            )
-            padding = torch.full(size=padding_size, fill_value=torch.nan)
-            context = torch.concat((padding, context), dim=-1)
         attention_mask = ~torch.isnan(context)
         scale = torch.nansum(torch.abs(context) * attention_mask, dim=-1) / torch.nansum(attention_mask, dim=-1)
@@ -191,7 +187,36 @@ class ChronosPretrainedModel(nn.Module):
         super().__init__()
         self.config = config
         self.model = model
-        self.device = model.device
+    @property
+    def device(self):
+        return self.model.device
+    def encode(
+        self,
+        input_ids: torch.Tensor,
+        attention_mask: torch.Tensor,
+    ):
+        """
+        Extract the encoder embedding for the given token sequences.
+        Parameters
+        ----------
+        input_ids
+            Tensor of indices of input sequence tokens in the vocabulary
+            with shape (batch_size, sequence_length).
+        attention_mask
+            A mask tensor of the same shape as input_ids to avoid attending
+            on padding or missing tokens.
+        Returns
+        -------
+        embedding
+            A tensor of encoder embeddings with shape
+            (batch_size, sequence_length, d_model).
+        """
+        assert self.config.model_type == "seq2seq", "Encoder embeddings are only supported for encoder-decoder models"
+        return self.model.encoder(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state
     def forward(
         self,
@@ -288,6 +313,48 @@ class ChronosPipeline:
         self.tokenizer = tokenizer
         self.model = model
+    def _prepare_and_validate_context(self, context: Union[torch.Tensor, List[torch.Tensor]]):
+        if isinstance(context, list):
+            context = left_pad_and_stack_1D(context)
+        assert isinstance(context, torch.Tensor)
+        if context.ndim == 1:
+            context = context.unsqueeze(0)
+        assert context.ndim == 2
+        return context
+    @torch.no_grad()
+    def embed(self, context: Union[torch.Tensor, List[torch.Tensor]]) -> Tuple[torch.Tensor, Any]:
+        """
+        Get encoder embeddings for the given time series.
+        Parameters
+        ----------
+        context
+            Input series. This is either a 1D tensor, or a list
+            of 1D tensors, or a 2D tensor whose first dimension
+            is batch. In the latter case, use left-padding with
+            ``torch.nan`` to align series of different lengths.
+        Returns
+        -------
+        embeddings, tokenizer_state
+            A tuple of two tensors: the encoder embeddings and the tokenizer_state,
+            e.g., the scale of the time series in the case of mean scaling.
+            The encoder embeddings are shaped (batch_size, context_length, d_model)
+            or (batch_size, context_length + 1, d_model), where context_length
+            is the size of the context along the time axis if a 2D tensor was provided
+            or the length of the longest time series, if a list of 1D tensors was
+            provided, and the extra 1 is for EOS.
+        """
+        context = self._prepare_and_validate_context(context=context)
+        token_ids, attention_mask, tokenizer_state = self.tokenizer.input_transform(context)
+        embeddings = self.model.encode(
+            input_ids=token_ids.to(self.model.device),
+            attention_mask=attention_mask.to(self.model.device),
+        ).cpu()
+        return embeddings, tokenizer_state
     def predict(
         self,
         context: Union[torch.Tensor, List[torch.Tensor]],
@@ -335,13 +402,7 @@ class ChronosPipeline:
             Tensor of sample forecasts, of shape
             (batch_size, num_samples, prediction_length).
         """
-        if isinstance(context, list):
-            context = left_pad_and_stack_1D(context)
-        assert isinstance(context, torch.Tensor)
-        if context.ndim == 1:
-            context = context.unsqueeze(0)
-        assert context.ndim == 2
+        context = self._prepare_and_validate_context(context=context)
         if prediction_length is None:
             prediction_length = self.model.config.prediction_length

{autogluon.timeseries-1.0.1b20240327 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/gluonts/abstract_gluonts.py RENAMED Viewed

@@ -328,8 +328,6 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
             if self.num_feat_static_real > 0:
                 feat_static_real = time_series_df.static_features[self.metadata.static_features_real]
-                if feat_static_real.isna().values.any():
-                    feat_static_real = feat_static_real.fillna(feat_static_real.mean())
             else:
                 feat_static_real = None
@@ -548,3 +546,6 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
         forecast_df.index = forecast_index
         return TimeSeriesDataFrame(forecast_df)
+    def _more_tags(self) -> dict:
+        return {"allow_nan": True, "can_use_val_data": True}

{autogluon.timeseries-1.0.1b20240327 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/local/abstract_local_model.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import logging
 import time
 from multiprocessing import TimeoutError, cpu_count
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 import numpy as np
 import pandas as pd
@@ -85,6 +85,12 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
         self._local_model_args: Dict[str, Any] = None
         self._seasonal_period: Optional[int] = None
         self.time_limit: Optional[float] = None
+        self._dummy_forecast: Optional[pd.DataFrame] = None
+    def preprocess(self, data: TimeSeriesDataFrame, is_train: bool = False, **kwargs) -> Any:
+        if not self._get_tags()["allow_nan"]:
+            data = data.fill_missing_values()
+        return data
     def _fit(self, train_data: TimeSeriesDataFrame, time_limit: Optional[int] = None, **kwargs):
         self._check_fit_params()
@@ -115,8 +121,16 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
         self._local_model_args = self._update_local_model_args(local_model_args=local_model_args)
         self.time_limit = time_limit
+        self._dummy_forecast = self._get_dummy_forecast(train_data)
         return self
+    def _get_dummy_forecast(self, train_data: TimeSeriesDataFrame) -> pd.DataFrame:
+        agg_functions = ["mean"] + [get_quantile_function(q) for q in self.quantile_levels]
+        stats_marginal = train_data[self.target].agg(agg_functions)
+        stats_repeated = np.tile(stats_marginal.values, [self.prediction_length, 1])
+        return pd.DataFrame(stats_repeated, columns=stats_marginal.index)
     def _update_local_model_args(self, local_model_args: Dict[str, Any]) -> Dict[str, Any]:
         return local_model_args
@@ -164,25 +178,30 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
     def _predict_wrapper(self, time_series: pd.Series, end_time: Optional[float] = None) -> Tuple[pd.DataFrame, bool]:
         if end_time is not None and time.time() >= end_time:
             raise TimeLimitExceeded
-        try:
-            result = self._predict_with_local_model(
-                time_series=time_series,
-                local_model_args=self._local_model_args.copy(),
-            )
-            if not np.isfinite(result.values).all():
-                raise RuntimeError("Forecast contains NaN or Inf values.")
-            model_failed = False
-        except Exception:
-            if self.use_fallback_model:
-                result = seasonal_naive_forecast(
-                    target=time_series.values.ravel(),
-                    prediction_length=self.prediction_length,
-                    quantile_levels=self.quantile_levels,
-                    seasonal_period=self._seasonal_period,
+        if time_series.isna().all():
+            result = self._dummy_forecast.copy()
+            model_failed = True
+        else:
+            try:
+                result = self._predict_with_local_model(
+                    time_series=time_series,
+                    local_model_args=self._local_model_args.copy(),
                 )
-                model_failed = True
-            else:
-                raise
+                if not np.isfinite(result.values).all():
+                    raise RuntimeError("Forecast contains NaN or Inf values.")
+                model_failed = False
+            except Exception:
+                if self.use_fallback_model:
+                    result = seasonal_naive_forecast(
+                        target=time_series.values.ravel(),
+                        prediction_length=self.prediction_length,
+                        quantile_levels=self.quantile_levels,
+                        seasonal_period=self._seasonal_period,
+                    )
+                    model_failed = True
+                else:
+                    raise
         return result, model_failed
     def _predict_with_local_model(
@@ -197,25 +216,51 @@ def seasonal_naive_forecast(
     target: np.ndarray, prediction_length: int, quantile_levels: List[float], seasonal_period: int
 ) -> pd.DataFrame:
     """Generate seasonal naive forecast, predicting the last observed value from the same period."""
+    def numpy_ffill(arr: np.ndarray) -> np.ndarray:
+        """Fast implementation of forward fill in numpy."""
+        idx = np.arange(len(arr))
+        mask = np.isnan(arr)
+        idx[mask] = 0
+        return arr[np.maximum.accumulate(idx)]
     forecast = {}
+    # Convert to float64 since std computation can be unstable in float32
+    target = target.astype(np.float64)
     # At least seasonal_period + 2 values are required to compute sigma for seasonal naive
     if len(target) > seasonal_period + 1 and seasonal_period > 1:
+        if np.isnan(target[-(seasonal_period + 2) :]).any():
+            target = numpy_ffill(target)
         indices = [len(target) - seasonal_period + k % seasonal_period for k in range(prediction_length)]
         forecast["mean"] = target[indices]
         residuals = target[seasonal_period:] - target[:-seasonal_period]
-        sigma = np.sqrt(np.mean(np.square(residuals)))
+        sigma = np.sqrt(np.nanmean(np.square(residuals)))
         num_full_seasons = np.arange(1, prediction_length + 1) // seasonal_period
         sigma_per_timestep = sigma * np.sqrt(num_full_seasons + 1)
     else:
         # Fall back to naive forecast
-        forecast["mean"] = np.full(shape=[prediction_length], fill_value=target[-1])
+        last_observed_value = target[np.isfinite(target)][-1]
+        forecast["mean"] = np.full(shape=[prediction_length], fill_value=last_observed_value)
         residuals = target[1:] - target[:-1]
-        sigma = np.sqrt(np.mean(np.square(residuals)))
+        sigma = np.sqrt(np.nanmean(np.square(residuals)))
+        if np.isnan(sigma):  # happens if there are no two consecutive non-nan observations
+            sigma = 0.0
         sigma_per_timestep = sigma * np.sqrt(np.arange(1, prediction_length + 1))
     for q in quantile_levels:
         forecast[str(q)] = forecast["mean"] + norm.ppf(q) * sigma_per_timestep
     return pd.DataFrame(forecast)
+def get_quantile_function(q: float) -> Callable:
+    """Returns a function with name "q" that computes the q'th quantile of a pandas.Series."""
+    def quantile_fn(x: pd.Series) -> pd.Series:
+        return x.quantile(q)
+    quantile_fn.__name__ = str(q)
+    return quantile_fn

autogluon.timeseries 1.0.1b20240327__tar.gz → 1.0.1b20240403__tar.gz

Potentially problematic release.

autogluon.timeseries 1.0.1b20240327tar.gz → 1.0.1b20240403tar.gz