PyPI - autogluon.timeseries - Versions diffs - 1.0.1b20240407__tar.gz → 1.1.0__tar.gz - Mend

autogluon.timeseries 1.0.1b20240407tar.gz → 1.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (64) hide show

{autogluon.timeseries-1.0.1b20240407 → autogluon.timeseries-1.1.0}/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.1
 Name: autogluon.timeseries
-Version: 1.0.1b20240407
-Summary: AutoML for Image, Text, and Tabular Data
+Version: 1.1.0
+Summary: Fast and Accurate ML in 3 Lines of Code
 Home-page: https://github.com/autogluon/autogluon
 Author: AutoGluon Community
 License: Apache-2.0
@@ -14,7 +14,7 @@ Description:
         <div align="center">
         <img src="https://user-images.githubusercontent.com/16392542/77208906-224aa500-6aba-11ea-96bd-e81806074030.png" width="350">
-        ## AutoML for Image, Text, Time Series, and Tabular Data
+        ## Fast and Accurate ML in 3 Lines of Code
         [![Latest Release](https://img.shields.io/github/v/release/autogluon/autogluon)](https://github.com/autogluon/autogluon/releases)
         [![Conda Forge](https://img.shields.io/conda/vn/conda-forge/autogluon.svg)](https://anaconda.org/conda-forge/autogluon)
@@ -107,7 +107,7 @@ Description:
         This library is licensed under the Apache 2.0 License.
 Platform: UNKNOWN
-Classifier: Development Status :: 4 - Beta
+Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Education
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Science/Research

{autogluon.timeseries-1.0.1b20240407 → autogluon.timeseries-1.1.0}/setup.py RENAMED Viewed

@@ -30,8 +30,7 @@ install_requires = [
     "pytorch_lightning",  # version range defined in `core/_setup_utils.py`
     "transformers[sentencepiece]",  # version range defined in `core/_setup_utils.py`
     "accelerate",  # version range defined in `core/_setup_utils.py`
-    "statsmodels>=0.13.0,<0.15",
-    "gluonts>=0.14.0,<0.15",
+    "gluonts>=0.14.0,<0.14.4",  # 0.14.4 caps pandas<2.2
     "networkx",  # version range defined in `core/_setup_utils.py`
     # TODO: update statsforecast to v1.5.0 - resolve antlr4-python3-runtime dependency clash with multimodal
     "statsforecast>=1.4.0,<1.5",
@@ -56,16 +55,16 @@ extras_require = {
         "black~=23.0",
     ],
     "chronos-openvino": [  # for faster CPU inference in pretrained models with OpenVINO
-        "optimum[openvino,nncf]>=1.17,<1.18",
+        "optimum-intel[openvino,nncf]>=1.15,<1.17",
+        "optimum[openvino,nncf]>=1.17,<1.19",
     ],
     "chronos-onnx": [  # for faster CPU inference in pretrained models with ONNX
-        "optimum[onnxruntime]>=1.17,<1.18",
+        "optimum[onnxruntime]>=1.17,<1.19",
     ],
 }
-extras_require["all"] = list(
-    set.union(*(set(extras_require[extra]) for extra in ["chronos-onnx", "chronos-openvino"]))
-)
+# TODO: add openvino back to "all" after dependency versions are relaxed
+extras_require["all"] = list(set.union(*(set(extras_require[extra]) for extra in ["chronos-onnx"])))
 install_requires = ag.get_dependency_version_ranges(install_requires)

{autogluon.timeseries-1.0.1b20240407 → autogluon.timeseries-1.1.0}/src/autogluon/timeseries/configs/presets_configs.py RENAMED Viewed

@@ -33,13 +33,13 @@ TIMESERIES_PRESETS_CONFIGS = dict(
     chronos_ensemble={
         "hyperparameters": {
             "Chronos": {"model_path": "small"},
-            **get_default_hps("default"),
+            **get_default_hps("light_inference"),
         }
     },
     chronos_large_ensemble={
         "hyperparameters": {
             "Chronos": {"model_path": "large", "batch_size": 8},
-            **get_default_hps("default"),
+            **get_default_hps("light_inference"),
         }
     },
 )

{autogluon.timeseries-1.0.1b20240407 → autogluon.timeseries-1.1.0}/src/autogluon/timeseries/dataset/ts_dataframe.py RENAMED Viewed

@@ -134,7 +134,7 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
     ----------
     freq : str
         A pandas-compatible string describing the frequency of the time series. For example ``"D"`` for daily data,
-        ``"H"`` for hourly data, etc. This attribute is determined automatically based on the timestamps. For the full
+        ``"h"`` for hourly data, etc. This attribute is determined automatically based on the timestamps. For the full
         list of possible values, see `pandas documentation <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_.
     num_items : int
         Number of items (time series) in the data set.
@@ -759,12 +759,6 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
                 2019-02-07     4.0
         """
-        if self.freq is None:
-            raise ValueError(
-                "Please make sure that all time series have a regular index before calling `fill_missing_values`"
-                "(for example, using the `convert_frequency` method)."
-            )
         # Convert to pd.DataFrame for faster processing
         df = pd.DataFrame(self)
@@ -772,6 +766,12 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
         if not df.isna().any(axis=None):
             return self
+        if not self.index.is_monotonic_increasing:
+            logger.warning(
+                "Trying to fill missing values in an unsorted dataframe. "
+                "It is highly recommended to call `ts_df.sort_index()` before calling `ts_df.fill_missing_values()`"
+            )
         grouped_df = df.groupby(level=ITEMID, sort=False, group_keys=False)
         if method == "auto":
             filled_df = grouped_df.ffill()
@@ -961,12 +961,12 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
                 2021-06-30     6.0
                 2021-09-30     7.0
                 2021-12-31     8.0
-        >>> ts_df.convert_frequency("Y")
+        >>> ts_df.convert_frequency("YE")
                             target
         item_id timestamp
         0       2020-12-31     2.5
                 2021-12-31     6.5
-        >>> ts_df.convert_frequency("Y", agg_numeric="sum")
+        >>> ts_df.convert_frequency("YE", agg_numeric="sum")
                             target
         item_id timestamp
         0       2020-12-31    10.0

{autogluon.timeseries-1.0.1b20240407 → autogluon.timeseries-1.1.0}/src/autogluon/timeseries/learner.py RENAMED Viewed

@@ -43,6 +43,7 @@ class TimeSeriesLearner(AbstractLearner):
         self.prediction_length = prediction_length
         self.quantile_levels = kwargs.get("quantile_levels", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
         self.cache_predictions = cache_predictions
+        self.freq: Optional[str] = None
         self.feature_generator = TimeSeriesFeatureGenerator(
             target=self.target, known_covariates_names=self.known_covariates_names
@@ -87,6 +88,8 @@ class TimeSeriesLearner(AbstractLearner):
         if val_data is not None:
             val_data = self.feature_generator.transform(val_data, data_frame_name="tuning_data")
+        self.freq = train_data.freq
         trainer_init_kwargs = kwargs.copy()
         trainer_init_kwargs.update(
             dict(
@@ -155,7 +158,9 @@ class TimeSeriesLearner(AbstractLearner):
                 f"known_covariates are missing information for the following item_ids: {reprlib.repr(missing_item_ids.to_list())}."
             )
-        forecast_index = get_forecast_horizon_index_ts_dataframe(data, prediction_length=self.prediction_length)
+        forecast_index = get_forecast_horizon_index_ts_dataframe(
+            data, prediction_length=self.prediction_length, freq=self.freq
+        )
         try:
             known_covariates = known_covariates.loc[forecast_index]
         except KeyError:
@@ -245,8 +250,8 @@ class TimeSeriesLearner(AbstractLearner):
                     raise ValueError(f"Feature {fn} not found in covariate metadata or the dataset.")
         if len(set(features)) < len(features):
-            logger.warning(
-                "Duplicate feature names provided to compute feature importance. This will lead to unexpected behavior. "
+            raise ValueError(
+                "Duplicate feature names provided to compute feature importance. "
                 "Please provide unique feature names across both static features and covariates."
             )

{autogluon.timeseries-1.0.1b20240407 → autogluon.timeseries-1.1.0}/src/autogluon/timeseries/models/abstract/abstract_timeseries_model.py RENAMED Viewed

@@ -31,7 +31,7 @@ class AbstractTimeSeriesModel(AbstractModel):
         If None, a new unique time-stamped directory is chosen.
     freq: str
         Frequency string (cf. gluonts frequency strings) describing the frequency
-        of the time series data. For example, "H" for hourly or "D" for daily data.
+        of the time series data. For example, "h" for hourly or "D" for daily data.
     prediction_length: int
         Length of the prediction horizon, i.e., the number of time steps the model
         is fit to forecast.
@@ -373,13 +373,14 @@ class AbstractTimeSeriesModel(AbstractModel):
         val_data: TimeSeriesDataFrame,
         store_val_score: bool = False,
         store_predict_time: bool = False,
+        **predict_kwargs,
     ) -> None:
         """Compute val_score, predict_time and cache out-of-fold (OOF) predictions."""
         past_data, known_covariates = val_data.get_model_inputs_for_scoring(
             prediction_length=self.prediction_length, known_covariates_names=self.metadata.known_covariates
         )
         predict_start_time = time.time()
-        oof_predictions = self.predict(past_data, known_covariates=known_covariates)
+        oof_predictions = self.predict(past_data, known_covariates=known_covariates, **predict_kwargs)
         self._oof_predictions = [oof_predictions]
         if store_predict_time:
             self.predict_time = time.time() - predict_start_time

{autogluon.timeseries-1.0.1b20240407 → autogluon.timeseries-1.1.0}/src/autogluon/timeseries/models/autogluon_tabular/mlforecast.py RENAMED Viewed

@@ -252,6 +252,15 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
         if static_features is not None:
             df = pd.merge(df, static_features, how="left", on=ITEMID, suffixes=(None, "_static_feat"))
+        for col in self.metadata.known_covariates_real:
+            # Normalize non-boolean features using mean_abs scaling
+            if not df[col].isin([0, 1]).all():
+                df[f"__scaled_{col}"] = df[col] / df[col].abs().groupby(df[ITEMID]).mean().reindex(df[ITEMID]).values
+        # Convert float64 to float32 to reduce memory usage
+        float64_cols = list(df.select_dtypes(include="float64"))
+        df[float64_cols] = df[float64_cols].astype("float32")
         # We assume that df is sorted by 'unique_id' inside `TimeSeriesPredictor._check_and_prepare_data_frame`
         return df.rename(columns=column_name_mapping)
@@ -332,7 +341,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
             Seasonal naive forecast for short series, if there are any in the dataset.
         """
         ts_lengths = data.num_timesteps_per_item()
-        short_series = ts_lengths.index[ts_lengths <= self._sum_of_differences]
+        short_series = ts_lengths.index[ts_lengths <= self._sum_of_differences + 1]
         if len(short_series) > 0:
             logger.warning(
                 f"Warning: {len(short_series)} time series ({len(short_series) / len(ts_lengths):.1%}) are shorter "
@@ -474,7 +483,7 @@ class DirectTabularModel(AbstractMLForecastModel):
         if known_covariates is not None:
             data_future = known_covariates.copy()
         else:
-            future_index = get_forecast_horizon_index_ts_dataframe(data, self.prediction_length)
+            future_index = get_forecast_horizon_index_ts_dataframe(data, self.prediction_length, freq=self.freq)
             data_future = pd.DataFrame(columns=[self.target], index=future_index, dtype="float32")
         # MLForecast raises exception of target contains NaN. We use inf as placeholder, replace them by NaN afterwards
         data_future[self.target] = float("inf")
@@ -606,7 +615,7 @@ class RecursiveTabularModel(AbstractMLForecastModel):
         if self._max_ts_length is not None:
             new_df = self._shorten_all_series(new_df, self._max_ts_length)
         if known_covariates is None:
-            future_index = get_forecast_horizon_index_ts_dataframe(data, self.prediction_length)
+            future_index = get_forecast_horizon_index_ts_dataframe(data, self.prediction_length, freq=self.freq)
             known_covariates = pd.DataFrame(columns=[self.target], index=future_index, dtype="float32")
         X_df = self._to_mlforecast_df(known_covariates, data.static_features, include_target=False)
         # If both covariates & static features are missing, set X_df = None to avoid exception from MLForecast

{autogluon.timeseries-1.0.1b20240407 → autogluon.timeseries-1.1.0}/src/autogluon/timeseries/models/chronos/model.py RENAMED Viewed

@@ -53,42 +53,6 @@ MODEL_ALIASES = {
 }
-class ChronosInferenceDataset:
-    """A container for time series datasets that implements the ``torch.utils.data.Dataset`` interface"""
-    def __init__(
-        self,
-        target_df: TimeSeriesDataFrame,
-        context_length: int,
-        target_column: str = "target",
-    ):
-        assert context_length > 0
-        self.context_length = context_length
-        self.target_array = target_df[target_column].to_numpy(dtype=np.float32)
-        self.freq = target_df.freq
-        # store pointer to start:end of each time series
-        cum_sizes = target_df.num_timesteps_per_item().values.cumsum()
-        self.indptr = np.append(0, cum_sizes).astype(np.int32)
-    def __len__(self):
-        return len(self.indptr) - 1  # noqa
-    def _get_context(self, a: np.ndarray, pad_value=np.nan):
-        a = a[-self.context_length :]
-        pad_size = self.context_length - len(a)
-        if pad_size > 0:
-            pad = np.full(shape=(pad_size,), fill_value=pad_value)
-            a = np.concatenate((pad, a))
-        return a
-    def __getitem__(self, idx) -> np.ndarray:
-        start_idx = self.indptr[idx]
-        end_idx = self.indptr[idx + 1]
-        return self._get_context(self.target_array[start_idx:end_idx])
 class ChronosModel(AbstractTimeSeriesModel):
     """Chronos pretrained time series forecasting models, based on the original
     `ChronosModel <https://github.com/amazon-science/chronos-forecasting>`_ implementation.
@@ -196,6 +160,7 @@ class ChronosModel(AbstractTimeSeriesModel):
         )
         self.model_pipeline: Optional[Any] = None  # of type OptimizedChronosPipeline
+        self.time_limit: Optional[float] = None
     def save(self, path: str = None, verbose: bool = True) -> str:
         pipeline = self.model_pipeline
@@ -288,14 +253,16 @@ class ChronosModel(AbstractTimeSeriesModel):
         **kwargs,
     ) -> None:
         self._check_fit_params()
+        self.time_limit = time_limit
     def _get_inference_data_loader(
         self,
         data: TimeSeriesDataFrame,
         context_length: int,
         num_workers: int = 0,
+        time_limit: Optional[float] = None,
     ):
-        import torch
+        from .utils import ChronosInferenceDataLoader, ChronosInferenceDataset, timeout_callback
         chronos_dataset = ChronosInferenceDataset(
             target_df=data,
@@ -303,11 +270,12 @@ class ChronosModel(AbstractTimeSeriesModel):
             context_length=context_length,
         )
-        return torch.utils.data.DataLoader(
+        return ChronosInferenceDataLoader(
             chronos_dataset,
             batch_size=self.batch_size,
             shuffle=False,
             num_workers=num_workers,
+            on_batch=timeout_callback(seconds=time_limit),
         )
     def _predict(
@@ -333,6 +301,12 @@ class ChronosModel(AbstractTimeSeriesModel):
                 # load model pipeline to device memory
                 self.load_model_pipeline(context_length=context_length)
+            inference_data_loader = self._get_inference_data_loader(
+                data=data,
+                num_workers=self.data_loader_num_workers,
+                context_length=context_length,
+                time_limit=kwargs.get("time_limit"),
+            )
             self.model_pipeline.model.eval()
             with torch.inference_mode():
                 prediction_samples = [
@@ -345,11 +319,7 @@ class ChronosModel(AbstractTimeSeriesModel):
                     .detach()
                     .cpu()
                     .numpy()
-                    for batch in self._get_inference_data_loader(
-                        data=data,
-                        num_workers=self.data_loader_num_workers,
-                        context_length=context_length,
-                    )
+                    for batch in inference_data_loader
                 ]
         samples = np.concatenate(prediction_samples, axis=0).swapaxes(1, 2).reshape(-1, self.num_samples)
@@ -360,10 +330,23 @@ class ChronosModel(AbstractTimeSeriesModel):
         df = pd.DataFrame(
             np.concatenate([mean, quantiles], axis=1),
             columns=["mean"] + [str(q) for q in self.quantile_levels],
-            index=get_forecast_horizon_index_ts_dataframe(data, self.prediction_length),
+            index=get_forecast_horizon_index_ts_dataframe(data, self.prediction_length, freq=self.freq),
         )
         return TimeSeriesDataFrame(df)
     def _more_tags(self) -> Dict:
         return {"allow_nan": True}
+    def score_and_cache_oof(
+        self,
+        val_data: TimeSeriesDataFrame,
+        store_val_score: bool = False,
+        store_predict_time: bool = False,
+        **predict_kwargs,
+    ) -> None:
+        # All computation happens during inference, so we provide the time_limit at prediction time
+        # TODO: Once custom predict_kwargs is allowed, make sure that `time_limit` is not among the keys
+        super().score_and_cache_oof(
+            val_data, store_val_score, store_predict_time, time_limit=self.time_limit, **predict_kwargs
+        )

autogluon.timeseries-1.1.0/src/autogluon/timeseries/models/chronos/utils.py ADDED Viewed

@@ -0,0 +1,66 @@
+import time
+from typing import Callable, Optional
+import numpy as np
+import torch
+from autogluon.core.utils.exceptions import TimeLimitExceeded
+from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
+class ChronosInferenceDataset:
+    """A container for time series datasets that implements the ``torch.utils.data.Dataset`` interface"""
+    def __init__(
+        self,
+        target_df: TimeSeriesDataFrame,
+        context_length: int,
+        target_column: str = "target",
+    ):
+        assert context_length > 0
+        self.context_length = context_length
+        self.target_array = target_df[target_column].to_numpy(dtype=np.float32)
+        self.freq = target_df.freq
+        # store pointer to start:end of each time series
+        cum_sizes = target_df.num_timesteps_per_item().values.cumsum()
+        self.indptr = np.append(0, cum_sizes).astype(np.int32)
+    def __len__(self):
+        return len(self.indptr) - 1  # noqa
+    def _get_context(self, a: np.ndarray, pad_value=np.nan):
+        a = a[-self.context_length :]
+        pad_size = self.context_length - len(a)
+        if pad_size > 0:
+            pad = np.full(shape=(pad_size,), fill_value=pad_value)
+            a = np.concatenate((pad, a))
+        return a
+    def __getitem__(self, idx) -> np.ndarray:
+        start_idx = self.indptr[idx]
+        end_idx = self.indptr[idx + 1]
+        return self._get_context(self.target_array[start_idx:end_idx])
+class ChronosInferenceDataLoader(torch.utils.data.DataLoader):
+    def __init__(self, *args, **kwargs):
+        self.callback: Callable = kwargs.pop("on_batch", lambda: None)
+        super().__init__(*args, **kwargs)
+    def __iter__(self):
+        for item in super().__iter__():
+            yield item
+            self.callback()
+def timeout_callback(seconds: Optional[float]) -> Callable:
+    """Return a callback object that raises an exception if time limit is exceeded."""
+    start_time = time.time()
+    def callback() -> None:
+        if seconds is not None and time.time() - start_time > seconds:
+            raise TimeLimitExceeded
+    return callback

{autogluon.timeseries-1.0.1b20240407 → autogluon.timeseries-1.1.0}/src/autogluon/timeseries/models/gluonts/abstract_gluonts.py RENAMED Viewed

@@ -15,7 +15,6 @@ from gluonts.dataset.field_names import FieldName
 from gluonts.model.estimator import Estimator as GluonTSEstimator
 from gluonts.model.forecast import Forecast, QuantileForecast, SampleForecast
 from gluonts.model.predictor import Predictor as GluonTSPredictor
-from pandas.tseries.frequencies import to_offset
 from sklearn.compose import ColumnTransformer
 from sklearn.preprocessing import QuantileTransformer, StandardScaler
@@ -37,15 +36,13 @@ logger = logging.getLogger(__name__)
 gts_logger = logging.getLogger(gluonts.__name__)
-GLUONTS_SUPPORTED_OFFSETS = ["Y", "Q", "M", "W", "D", "B", "H", "T", "min", "S"]
 class SimpleGluonTSDataset(GluonTSDataset):
     """Wrapper for TimeSeriesDataFrame that is compatible with the GluonTS Dataset API."""
     def __init__(
         self,
         target_df: TimeSeriesDataFrame,
+        freq: str,
         target_column: str = "target",
         feat_static_cat: Optional[np.ndarray] = None,
         feat_static_real: Optional[np.ndarray] = None,
@@ -57,7 +54,6 @@ class SimpleGluonTSDataset(GluonTSDataset):
         prediction_length: int = None,
     ):
         assert target_df is not None
-        assert target_df.freq, "Initializing GluonTS data sets without freq is not allowed"
         # Convert TimeSeriesDataFrame to pd.Series for faster processing
         self.target_array = target_df[target_column].to_numpy(np.float32)
         self.feat_static_cat = self._astype(feat_static_cat, dtype=np.int64)
@@ -66,7 +62,7 @@ class SimpleGluonTSDataset(GluonTSDataset):
         self.feat_dynamic_real = self._astype(feat_dynamic_real, dtype=np.float32)
         self.past_feat_dynamic_cat = self._astype(past_feat_dynamic_cat, dtype=np.int64)
         self.past_feat_dynamic_real = self._astype(past_feat_dynamic_real, dtype=np.float32)
-        self.freq = self._to_gluonts_freq(target_df.freq)
+        self.freq = self._get_freq_for_period(freq)
         # Necessary to compute indptr for known_covariates at prediction time
         self.includes_future = includes_future
@@ -89,19 +85,22 @@ class SimpleGluonTSDataset(GluonTSDataset):
             return array.astype(dtype)
     @staticmethod
-    def _to_gluonts_freq(freq: str) -> str:
-        # FIXME: GluonTS expects a frequency string, but only supports a limited number of such strings
-        # for feature generation. If the frequency string doesn't match or is not provided, it raises an exception.
-        # Here we bypass this by issuing a default "yearly" frequency, tricking it into not producing
-        # any lags or features.
-        pd_offset = to_offset(freq)
-        # normalize freq str to handle peculiarities such as W-SUN
-        offset_base_alias = norm_freq_str(pd_offset)
-        if offset_base_alias not in GLUONTS_SUPPORTED_OFFSETS:
-            return "A"
+    def _get_freq_for_period(freq: str) -> str:
+        """Convert freq to format compatible with pd.Period.
+        For example, ME freq must be converted to M when creating a pd.Period.
+        """
+        offset = pd.tseries.frequencies.to_offset(freq)
+        freq_name = norm_freq_str(offset)
+        if freq_name == "SME":
+            # Replace unsupported frequency "SME" with "2W"
+            return "2W"
+        elif freq_name == "bh":
+            # Replace unsupported frequency "bh" with dummy value "Y"
+            return "Y"
         else:
-            return f"{pd_offset.n}{offset_base_alias}"
+            freq_name_for_period = {"YE": "Y", "QE": "Q", "ME": "M"}.get(freq_name, freq_name)
+            return f"{offset.n}{freq_name_for_period}"
     def __len__(self):
         return len(self.indptr) - 1  # noqa
@@ -161,6 +160,8 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
     """
     gluonts_model_path = "gluon_ts"
+    # we pass dummy freq compatible with pandas 2.1 & 2.2 to GluonTS models
+    _dummy_gluonts_freq = "D"
     # default number of samples for prediction
     default_num_samples: int = 250
     supports_cat_covariates: bool = False
@@ -234,13 +235,6 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
     def _deferred_init_params_aux(self, dataset: TimeSeriesDataFrame) -> None:
         """Update GluonTS specific parameters with information available only at training time."""
-        self.freq = dataset.freq or self.freq
-        if not self.freq:
-            raise ValueError(
-                "Dataset frequency not provided in the dataset, fit arguments or "
-                "during initialization. Please provide a `freq` string to `fit`."
-            )
         model_params = self._get_model_params()
         disable_static_features = model_params.get("disable_static_features", False)
         if not disable_static_features:
@@ -371,7 +365,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
         init_args.setdefault("early_stopping_patience", 20)
         init_args.update(
             dict(
-                freq=self.freq,
+                freq=self._dummy_gluonts_freq,
                 prediction_length=self.prediction_length,
                 quantiles=self.quantile_levels,
                 callbacks=self.callbacks,
@@ -502,6 +496,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
             return SimpleGluonTSDataset(
                 target_df=time_series_df[[self.target]],
+                freq=self.freq,
                 target_column=self.target,
                 feat_static_cat=feat_static_cat,
                 feat_static_real=feat_static_real,
@@ -592,7 +587,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
             predicted_targets = self._predict_gluonts_forecasts(data, known_covariates=known_covariates, **kwargs)
             df = self._gluonts_forecasts_to_data_frame(
                 predicted_targets,
-                forecast_index=get_forecast_horizon_index_ts_dataframe(data, self.prediction_length),
+                forecast_index=get_forecast_horizon_index_ts_dataframe(data, self.prediction_length, freq=self.freq),
             )
         return df

{autogluon.timeseries-1.0.1b20240407 → autogluon.timeseries-1.1.0}/src/autogluon/timeseries/models/gluonts/torch/models.py RENAMED Viewed

@@ -423,6 +423,4 @@ class WaveNetModel(AbstractGluonTSModel):
         init_kwargs.setdefault("seasonality", get_seasonality(self.freq))
         init_kwargs.setdefault("time_features", get_time_features_for_frequency(self.freq))
         init_kwargs.setdefault("num_parallel_samples", self.default_num_samples)
-        # WaveNet model fails if an unsupported frequency such as "SM" is provided. We provide a dummy freq instead
-        init_kwargs["freq"] = "H"
         return init_kwargs

{autogluon.timeseries-1.0.1b20240407 → autogluon.timeseries-1.1.0}/src/autogluon/timeseries/models/local/abstract_local_model.py RENAMED Viewed

@@ -144,9 +144,10 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
         # timeout ensures that no individual job takes longer than time_limit
         # TODO: a job started late may still exceed time_limit - how to prevent that?
-        timeout = None if self.n_jobs == 1 else self.time_limit
+        time_limit = kwargs.get("time_limit")
+        timeout = None if self.n_jobs == 1 else time_limit
         # end_time ensures that no new jobs are started after time_limit is exceeded
-        end_time = None if self.time_limit is None else time.time() + self.time_limit
+        end_time = None if time_limit is None else time.time() + time_limit
         executor = Parallel(self.n_jobs, timeout=timeout)
         try:
@@ -165,23 +166,28 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
                 f"({fraction_failed_models:.1%}). Fallback model SeasonalNaive was used for these time series."
             )
         predictions_df = pd.concat([pred for pred, _ in predictions_with_flags])
-        predictions_df.index = get_forecast_horizon_index_ts_dataframe(data, self.prediction_length)
+        predictions_df.index = get_forecast_horizon_index_ts_dataframe(data, self.prediction_length, freq=self.freq)
         return TimeSeriesDataFrame(predictions_df)
     def score_and_cache_oof(
-        self, val_data: TimeSeriesDataFrame, store_val_score: bool = False, store_predict_time: bool = False
+        self,
+        val_data: TimeSeriesDataFrame,
+        store_val_score: bool = False,
+        store_predict_time: bool = False,
+        **predict_kwargs,
     ) -> None:
-        super().score_and_cache_oof(val_data, store_val_score, store_predict_time)
-        # Remove time_limit for future predictions
-        self.time_limit = None
+        # All computation happens during inference, so we provide the time_limit at prediction time
+        super().score_and_cache_oof(
+            val_data, store_val_score, store_predict_time, time_limit=self.time_limit, **predict_kwargs
+        )
     def _predict_wrapper(self, time_series: pd.Series, end_time: Optional[float] = None) -> Tuple[pd.DataFrame, bool]:
         if end_time is not None and time.time() >= end_time:
             raise TimeLimitExceeded
+        model_failed = False
         if time_series.isna().all():
             result = self._dummy_forecast.copy()
-            model_failed = True
         else:
             try:
                 result = self._predict_with_local_model(
@@ -190,7 +196,6 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
                 )
                 if not np.isfinite(result.values).all():
                     raise RuntimeError("Forecast contains NaN or Inf values.")
-                model_failed = False
             except Exception:
                 if self.use_fallback_model:
                     result = seasonal_naive_forecast(
@@ -225,8 +230,6 @@ def seasonal_naive_forecast(
         return arr[np.maximum.accumulate(idx)]
     forecast = {}
-    # Convert to float64 since std computation can be unstable in float32
-    target = target.astype(np.float64)
     # At least seasonal_period + 2 values are required to compute sigma for seasonal naive
     if len(target) > seasonal_period + 1 and seasonal_period > 1:
         if np.isnan(target[-(seasonal_period + 2) :]).any():

{autogluon.timeseries-1.0.1b20240407 → autogluon.timeseries-1.1.0}/src/autogluon/timeseries/models/multi_window/multi_window_model.py RENAMED Viewed

@@ -189,6 +189,7 @@ class MultiWindowBacktestingModel(AbstractTimeSeriesModel):
         val_data: TimeSeriesDataFrame,
         store_val_score: bool = False,
         store_predict_time: bool = False,
+        **predict_kwargs,
     ) -> None:
         # self.val_score, self.predict_time, self._oof_predictions already saved during _fit()
         assert self._oof_predictions is not None

autogluon.timeseries 1.0.1b20240407__tar.gz → 1.1.0__tar.gz

Potentially problematic release.

autogluon.timeseries 1.0.1b20240407tar.gz → 1.1.0tar.gz