PyPI - autogluon.timeseries - Versions diffs - 1.0.1b20240304__py3-none-any.whl → 1.4.1b20251210__py3-none-any.whl - Mend

autogluon.timeseries 1.0.1b20240304py3-none-any.whl → 1.4.1b20251210py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (108) hide show

autogluon/timeseries/models/local/abstract_local_model.py CHANGED Viewed

@@ -1,27 +1,24 @@
 import logging
 import time
-from multiprocessing import TimeoutError, cpu_count
-from typing import Any, Dict, List, Optional, Tuple, Union
+from multiprocessing import TimeoutError
+from typing import Any, Callable
 import numpy as np
 import pandas as pd
-from joblib import Parallel, delayed
+from joblib import Parallel, cpu_count, delayed
 from scipy.stats import norm
 from autogluon.core.utils.exceptions import TimeLimitExceeded
-from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TimeSeriesDataFrame
+from autogluon.timeseries.dataset import TimeSeriesDataFrame
+from autogluon.timeseries.metrics import TimeSeriesScorer
 from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
+from autogluon.timeseries.utils.constants import AG_DEFAULT_N_JOBS
 from autogluon.timeseries.utils.datetime import get_seasonality
-from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe
 from autogluon.timeseries.utils.warning_filters import warning_filter
 logger = logging.getLogger(__name__)
-# We use the same default n_jobs across AG-TS to ensure that Joblib reuses the process pool
-AG_DEFAULT_N_JOBS = max(int(cpu_count() * 0.5), 1)
 class AbstractLocalModel(AbstractTimeSeriesModel):
     """Abstract class for local forecasting models that are trained separately for each time series.
@@ -29,49 +26,31 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
     Attributes
     ----------
-    allowed_local_model_args : List[str]
+    allowed_local_model_args
         Argument that can be passed to the underlying local model.
-    default_n_jobs : Union[int, float]
-        Default number of CPU cores used to train models. If float, this fraction of CPU cores will be used.
-    default_max_ts_length : Optional[int]
+    default_max_ts_length
         If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
         This significantly speeds up fitting and usually leads to no change in accuracy.
-    init_time_in_seconds : int
+    init_time_in_seconds
         Time that it takes to initialize the model in seconds (e.g., because of JIT compilation by Numba).
         If time_limit is below this number, model won't be trained.
     """
-    allowed_local_model_args: List[str] = []
-    default_n_jobs: Union[int, float] = AG_DEFAULT_N_JOBS
-    default_max_ts_length: Optional[int] = 2500
+    allowed_local_model_args: list[str] = []
+    default_max_ts_length: int | None = 2500
+    default_max_time_limit_ratio = 1.0
     init_time_in_seconds: int = 0
     def __init__(
         self,
-        freq: Optional[str] = None,
+        freq: str | None = None,
         prediction_length: int = 1,
-        path: Optional[str] = None,
-        name: Optional[str] = None,
-        eval_metric: str = None,
-        hyperparameters: Dict[str, Any] = None,
+        path: str | None = None,
+        name: str | None = None,
+        eval_metric: str | TimeSeriesScorer | None = None,
+        hyperparameters: dict[str, Any] | None = None,
         **kwargs,  # noqa
     ):
-        if hyperparameters is None:
-            hyperparameters = {}
-        else:
-            hyperparameters = hyperparameters.copy()
-        # TODO: Replace with 'num_cpus' argument passed to fit (after predictor API is changed)
-        n_jobs = hyperparameters.pop("n_jobs", self.default_n_jobs)
-        if isinstance(n_jobs, float) and 0 < n_jobs <= 1:
-            self.n_jobs = max(int(cpu_count() * n_jobs), 1)
-        elif isinstance(n_jobs, int):
-            self.n_jobs = n_jobs
-        else:
-            raise ValueError(f"n_jobs must be a float between 0 and 1 or an integer (received n_jobs = {n_jobs})")
-        # Default values, potentially overridden inside _fit()
-        self.use_fallback_model = hyperparameters.pop("use_fallback_model", True)
-        self.max_ts_length = hyperparameters.pop("max_ts_length", self.default_max_ts_length)
         super().__init__(
             path=path,
             freq=freq,
@@ -82,63 +61,107 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
             **kwargs,
         )
-        self._local_model_args: Dict[str, Any] = None
-        self._seasonal_period: Optional[int] = None
-        self.time_limit: Optional[float] = None
+        self._local_model_args: dict[str, Any]
+        self._seasonal_period: int
+        self._dummy_forecast: pd.DataFrame
+    @property
+    def allowed_hyperparameters(self) -> list[str]:
+        return (
+            super().allowed_hyperparameters
+            + ["use_fallback_model", "max_ts_length", "n_jobs"]
+            + self.allowed_local_model_args
+        )
+    def preprocess(
+        self,
+        data: TimeSeriesDataFrame,
+        known_covariates: TimeSeriesDataFrame | None = None,
+        is_train: bool = False,
+        **kwargs,
+    ) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame | None]:
+        if not self._get_tags()["allow_nan"]:
+            data = data.fill_missing_values()
+        return data, known_covariates
+    def _get_default_hyperparameters(self) -> dict:
+        return {
+            "n_jobs": AG_DEFAULT_N_JOBS,
+            "use_fallback_model": True,
+            "max_ts_length": self.default_max_ts_length,
+        }
+    @staticmethod
+    def _compute_n_jobs(n_jobs: int | float) -> int:
+        if isinstance(n_jobs, float) and 0 < n_jobs <= 1:
+            return max(int(cpu_count() * n_jobs), 1)
+        elif isinstance(n_jobs, int):
+            return n_jobs
+        else:
+            raise ValueError(f"n_jobs must be a float between 0 and 1 or an integer (received n_jobs = {n_jobs})")
-    def _fit(self, train_data: TimeSeriesDataFrame, time_limit: Optional[int] = None, **kwargs):
+    def _fit(self, train_data: TimeSeriesDataFrame, time_limit: int | None = None, **kwargs):
         self._check_fit_params()
         if time_limit is not None and time_limit < self.init_time_in_seconds:
             raise TimeLimitExceeded
-        # Initialize parameters passed to each local model
-        raw_local_model_args = self._get_model_params().copy()
-        unused_local_model_args = []
         local_model_args = {}
-        for key, value in raw_local_model_args.items():
+        for key, value in self.get_hyperparameters().items():
             if key in self.allowed_local_model_args:
                 local_model_args[key] = value
-            else:
-                unused_local_model_args.append(key)
-        if len(unused_local_model_args):
-            logger.warning(
-                f"{self.name} ignores following hyperparameters: {unused_local_model_args}. "
-                f"See the docstring of {self.name} for the list of supported hyperparameters."
-            )
+        self._log_unused_hyperparameters(extra_allowed_hyperparameters=self.allowed_local_model_args)
         if "seasonal_period" not in local_model_args or local_model_args["seasonal_period"] is None:
-            local_model_args["seasonal_period"] = get_seasonality(train_data.freq)
+            local_model_args["seasonal_period"] = get_seasonality(self.freq)
         self._seasonal_period = local_model_args["seasonal_period"]
         self._local_model_args = self._update_local_model_args(local_model_args=local_model_args)
-        self.time_limit = time_limit
+        self._dummy_forecast = self._get_dummy_forecast(train_data)
         return self
-    def _update_local_model_args(self, local_model_args: Dict[str, Any]) -> Dict[str, Any]:
+    def _get_dummy_forecast(self, train_data: TimeSeriesDataFrame, max_num_rows: int = 20_000) -> pd.DataFrame:
+        agg_functions = ["mean"] + [get_quantile_function(q) for q in self.quantile_levels]
+        target_series = train_data[self.target]
+        if len(target_series) > max_num_rows:
+            target_series = target_series.sample(max_num_rows, replace=True)
+        stats_marginal = target_series.agg(agg_functions)
+        stats_repeated = np.tile(stats_marginal.values, [self.prediction_length, 1])
+        return pd.DataFrame(stats_repeated, columns=stats_marginal.index)
+    def _update_local_model_args(self, local_model_args: dict[str, Any]) -> dict[str, Any]:
         return local_model_args
     def _predict(self, data: TimeSeriesDataFrame, **kwargs) -> TimeSeriesDataFrame:
-        if self.max_ts_length is not None:
-            logger.debug(f"Shortening all time series to at most {self.max_ts_length}")
-            data = data.groupby(level=ITEMID, sort=False).tail(self.max_ts_length)
+        model_params = self.get_hyperparameters()
+        max_ts_length = model_params["max_ts_length"]
+        if max_ts_length is not None:
+            logger.debug(f"Shortening all time series to at most {max_ts_length}")
+            data = data.slice_by_timestep(-max_ts_length, None)
-        df = pd.DataFrame(data).reset_index(level=ITEMID)
-        all_series = (ts for _, ts in df.groupby(by=ITEMID, as_index=False, sort=False)[self.target])
+        indptr = data.get_indptr()
+        target_series = data[self.target].droplevel(level=TimeSeriesDataFrame.ITEMID)
+        all_series = (target_series[indptr[i] : indptr[i + 1]] for i in range(len(indptr) - 1))
         # timeout ensures that no individual job takes longer than time_limit
         # TODO: a job started late may still exceed time_limit - how to prevent that?
-        timeout = None if self.n_jobs == 1 else self.time_limit
+        time_limit = kwargs.get("time_limit")
+        # TODO: Take into account num_cpus once the TimeSeriesPredictor API is updated
+        n_jobs = self._compute_n_jobs(model_params["n_jobs"])
+        timeout = None if n_jobs == 1 else time_limit
         # end_time ensures that no new jobs are started after time_limit is exceeded
-        end_time = None if self.time_limit is None else time.time() + self.time_limit
-        executor = Parallel(self.n_jobs, timeout=timeout)
+        end_time = None if time_limit is None else time.time() + time_limit
+        executor = Parallel(n_jobs=n_jobs, timeout=timeout)
         try:
             with warning_filter():
                 predictions_with_flags = executor(
-                    delayed(self._predict_wrapper)(ts, end_time=end_time) for ts in all_series
+                    delayed(self._predict_wrapper)(
+                        ts, use_fallback_model=model_params["use_fallback_model"], end_time=end_time
+                    )
+                    for ts in all_series
                 )
         except TimeoutError:
             raise TimeLimitExceeded
@@ -151,38 +174,40 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
                 f"({fraction_failed_models:.1%}). Fallback model SeasonalNaive was used for these time series."
             )
         predictions_df = pd.concat([pred for pred, _ in predictions_with_flags])
-        predictions_df.index = get_forecast_horizon_index_ts_dataframe(data, self.prediction_length)
+        predictions_df.index = self.get_forecast_horizon_index(data)
         return TimeSeriesDataFrame(predictions_df)
-    def score_and_cache_oof(
-        self, val_data: TimeSeriesDataFrame, store_val_score: bool = False, store_predict_time: bool = False
-    ) -> None:
-        super().score_and_cache_oof(val_data, store_val_score, store_predict_time)
-        # Remove time_limit for future predictions
-        self.time_limit = None
-    def _predict_wrapper(self, time_series: pd.Series, end_time: Optional[float] = None) -> Tuple[pd.DataFrame, bool]:
+    def _predict_wrapper(
+        self,
+        time_series: pd.Series,
+        use_fallback_model: bool,
+        end_time: float | None = None,
+    ) -> tuple[pd.DataFrame, bool]:
         if end_time is not None and time.time() >= end_time:
             raise TimeLimitExceeded
-        try:
-            result = self._predict_with_local_model(
-                time_series=time_series,
-                local_model_args=self._local_model_args.copy(),
-            )
-            if not np.isfinite(result.values).all():
-                raise RuntimeError("Forecast contains NaN or Inf values.")
-            model_failed = False
-        except Exception:
-            if self.use_fallback_model:
-                result = seasonal_naive_forecast(
-                    target=time_series.values.ravel(),
-                    prediction_length=self.prediction_length,
-                    quantile_levels=self.quantile_levels,
-                    seasonal_period=self._seasonal_period,
+        model_failed = False
+        if time_series.isna().all():
+            result = self._dummy_forecast.copy()
+        else:
+            try:
+                result = self._predict_with_local_model(
+                    time_series=time_series,
+                    local_model_args=self._local_model_args.copy(),
                 )
-                model_failed = True
-            else:
-                raise
+                if not np.isfinite(result.values).all():
+                    raise RuntimeError("Forecast contains NaN or Inf values.")
+            except Exception:
+                if use_fallback_model:
+                    result = seasonal_naive_forecast(
+                        target=time_series.values.ravel(),
+                        prediction_length=self.prediction_length,
+                        quantile_levels=self.quantile_levels,
+                        seasonal_period=self._seasonal_period,
+                    )
+                    model_failed = True
+                else:
+                    raise
         return result, model_failed
     def _predict_with_local_model(
@@ -194,28 +219,56 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
 def seasonal_naive_forecast(
-    target: np.ndarray, prediction_length: int, quantile_levels: List[float], seasonal_period: int
+    target: np.ndarray, prediction_length: int, quantile_levels: list[float], seasonal_period: int
 ) -> pd.DataFrame:
     """Generate seasonal naive forecast, predicting the last observed value from the same period."""
+    def numpy_fillna(arr: np.ndarray) -> np.ndarray:
+        """Fast implementation of forward fill + avg fill in numpy."""
+        # First apply forward fill
+        idx = np.arange(len(arr))
+        mask = np.isnan(arr)
+        idx[mask] = 0
+        arr_filled = arr[np.maximum.accumulate(idx)]
+        # Leading NaNs are filled with the mean
+        arr_filled[np.isnan(arr_filled)] = np.nanmean(arr_filled)
+        return arr_filled
     forecast = {}
     # At least seasonal_period + 2 values are required to compute sigma for seasonal naive
     if len(target) > seasonal_period + 1 and seasonal_period > 1:
+        if np.isnan(target[-(seasonal_period + 2) :]).any():
+            target = numpy_fillna(target)
         indices = [len(target) - seasonal_period + k % seasonal_period for k in range(prediction_length)]
         forecast["mean"] = target[indices]
         residuals = target[seasonal_period:] - target[:-seasonal_period]
-        sigma = np.sqrt(np.mean(np.square(residuals)))
+        sigma = np.sqrt(np.nanmean(np.square(residuals)))
         num_full_seasons = np.arange(1, prediction_length + 1) // seasonal_period
         sigma_per_timestep = sigma * np.sqrt(num_full_seasons + 1)
     else:
         # Fall back to naive forecast
-        forecast["mean"] = np.full(shape=[prediction_length], fill_value=target[-1])
+        last_observed_value = target[np.isfinite(target)][-1]
+        forecast["mean"] = np.full(shape=[prediction_length], fill_value=last_observed_value)
         residuals = target[1:] - target[:-1]
-        sigma = np.sqrt(np.mean(np.square(residuals)))
+        sigma = np.sqrt(np.nanmean(np.square(residuals)))
+        if np.isnan(sigma):  # happens if there are no two consecutive non-nan observations
+            sigma = 0.0
         sigma_per_timestep = sigma * np.sqrt(np.arange(1, prediction_length + 1))
     for q in quantile_levels:
         forecast[str(q)] = forecast["mean"] + norm.ppf(q) * sigma_per_timestep
     return pd.DataFrame(forecast)
+def get_quantile_function(q: float) -> Callable:
+    """Returns a function with name "q" that computes the q'th quantile of a pandas.Series."""
+    def quantile_fn(x: pd.Series) -> pd.Series:
+        return x.quantile(q)
+    quantile_fn.__name__ = str(q)
+    return quantile_fn

autogluon/timeseries/models/local/naive.py CHANGED Viewed

@@ -1,9 +1,11 @@
-from typing import Callable
 import numpy as np
 import pandas as pd
-from autogluon.timeseries.models.local.abstract_local_model import AbstractLocalModel, seasonal_naive_forecast
+from autogluon.timeseries.models.local.abstract_local_model import (
+    AbstractLocalModel,
+    get_quantile_function,
+    seasonal_naive_forecast,
+)
 class NaiveModel(AbstractLocalModel):
@@ -15,13 +17,14 @@ class NaiveModel(AbstractLocalModel):
     Other Parameters
     ----------------
-    n_jobs : int or float, default = 0.5
+    n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
         Number of CPU cores used to fit the models in parallel.
         When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
         When set to a positive integer, that many cores are used.
         When set to -1, all CPU cores are used.
     """
+    ag_priority = 100
     allowed_local_model_args = ["seasonal_period"]
     def _predict_with_local_model(
@@ -36,6 +39,9 @@ class NaiveModel(AbstractLocalModel):
             seasonal_period=1,
         )
+    def _more_tags(self) -> dict:
+        return {"allow_nan": True}
 class SeasonalNaiveModel(AbstractLocalModel):
     """Baseline model that sets the forecast equal to the last observed value from the same season.
@@ -54,18 +60,19 @@ class SeasonalNaiveModel(AbstractLocalModel):
         specified manually by providing an integer > 1.
         If seasonal_period (inferred or provided) is equal to 1, will fall back to Naive forecast.
         Seasonality will also be disabled, if the length of the time series is < seasonal_period.
-    n_jobs : int or float, default = 0.5
+    n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
         Number of CPU cores used to fit the models in parallel.
         When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
         When set to a positive integer, that many cores are used.
         When set to -1, all CPU cores are used.
     """
+    ag_priority = 100
     allowed_local_model_args = ["seasonal_period"]
     def _predict_with_local_model(
         self,
-        time_series: np.ndarray,
+        time_series: pd.Series,
         local_model_args: dict,
     ) -> pd.DataFrame:
         return seasonal_naive_forecast(
@@ -75,32 +82,26 @@ class SeasonalNaiveModel(AbstractLocalModel):
             seasonal_period=local_model_args["seasonal_period"],
         )
-def _get_quantile_function(q: float) -> Callable:
-    """Returns a function with name "q" that computes the q'th quantile of a pandas.Series."""
-    def quantile_fn(x: pd.Series) -> pd.Series:
-        return x.quantile(q)
-    quantile_fn.__name__ = str(q)
-    return quantile_fn
+    def _more_tags(self) -> dict:
+        return {"allow_nan": True}
 class AverageModel(AbstractLocalModel):
-    """Baseline model that sets the forecast equal to the historic average or quantile.
+    """Baseline model that sets the forecast equal to the historical average or quantile.
     Other Parameters
     ----------------
-    n_jobs : int or float, default = 0.5
+    n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
         Number of CPU cores used to fit the models in parallel.
         When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
         When set to a positive integer, that many cores are used.
         When set to -1, all CPU cores are used.
-    max_ts_length : Optional[int], default = None
+    max_ts_length : int | None, default = None
         If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
         This significantly speeds up fitting and usually leads to no change in accuracy.
     """
+    ag_priority = 100
     allowed_local_model_args = ["seasonal_period"]
     default_max_ts_length = None
@@ -109,14 +110,17 @@ class AverageModel(AbstractLocalModel):
         time_series: pd.Series,
         local_model_args: dict,
     ) -> pd.DataFrame:
-        agg_functions = ["mean"] + [_get_quantile_function(q) for q in self.quantile_levels]
+        agg_functions = ["mean"] + [get_quantile_function(q) for q in self.quantile_levels]
         stats_marginal = time_series.agg(agg_functions)
         stats_repeated = np.tile(stats_marginal.values, [self.prediction_length, 1])
         return pd.DataFrame(stats_repeated, columns=stats_marginal.index)
+    def _more_tags(self) -> dict:
+        return {"allow_nan": True}
 class SeasonalAverageModel(AbstractLocalModel):
-    """Baseline model that sets the forecast equal to the historic average or quantile in the same season.
+    """Baseline model that sets the forecast equal to the historical average or quantile in the same season.
     Other Parameters
     ----------------
@@ -127,16 +131,17 @@ class SeasonalAverageModel(AbstractLocalModel):
         specified manually by providing an integer > 1.
         If seasonal_period (inferred or provided) is equal to 1, will fall back to Naive forecast.
         Seasonality will also be disabled, if the length of the time series is < seasonal_period.
-    n_jobs : int or float, default = 0.5
+    n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
         Number of CPU cores used to fit the models in parallel.
         When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
         When set to a positive integer, that many cores are used.
         When set to -1, all CPU cores are used.
-    max_ts_length : Optional[int], default = None
+    max_ts_length : int | None, default = None
         If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
         This significantly speeds up fitting and usually leads to no change in accuracy.
     """
+    ag_priority = 100
     allowed_local_model_args = ["seasonal_period"]
     default_max_ts_length = None
@@ -146,7 +151,7 @@ class SeasonalAverageModel(AbstractLocalModel):
         local_model_args: dict,
     ) -> pd.DataFrame:
         seasonal_period = local_model_args["seasonal_period"]
-        agg_functions = ["mean"] + [_get_quantile_function(q) for q in self.quantile_levels]
+        agg_functions = ["mean"] + [get_quantile_function(q) for q in self.quantile_levels]
         # Compute mean & quantiles for each season
         ts_df = time_series.reset_index(drop=True).to_frame()
@@ -162,3 +167,6 @@ class SeasonalAverageModel(AbstractLocalModel):
             stats_marginal = time_series.agg(agg_functions)
             result = result.fillna(stats_marginal)
         return result
+    def _more_tags(self) -> dict:
+        return {"allow_nan": True}

autogluon/timeseries/models/local/npts.py CHANGED Viewed

@@ -26,16 +26,17 @@ class NPTSModel(AbstractLocalModel):
         Number of samples generated by the forecast.
     num_default_time_features : int, default = 1
         Number of time features used by seasonal model.
-    n_jobs : int or float, default = 0.5
+    n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
         Number of CPU cores used to fit the models in parallel.
         When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
         When set to a positive integer, that many cores are used.
         When set to -1, all CPU cores are used.
-    max_ts_length : Optional[int], default = 2500
+    max_ts_length : int | None, default = 2500
         If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
         This significantly speeds up fitting and usually leads to no change in accuracy.
     """
+    ag_priority = 80
     allowed_local_model_args = [
         "kernel_type",
         "exp_kernel_weights",
@@ -88,3 +89,6 @@ class NPTSModel(AbstractLocalModel):
         for q in self.quantile_levels:
             forecast_dict[str(q)] = forecast.quantile(q)
         return pd.DataFrame(forecast_dict)
+    def _more_tags(self) -> dict:
+        return {"allow_nan": True}

autogluon.timeseries 1.0.1b20240304__py3-none-any.whl → 1.4.1b20251210__py3-none-any.whl

Potentially problematic release.

autogluon.timeseries 1.0.1b20240304py3-none-any.whl → 1.4.1b20251210py3-none-any.whl