PyPI - autogluon.timeseries - Versions diffs - 1.4.1b20251115__py3-none-any.whl → 1.5.0b20251221__py3-none-any.whl - Mend

autogluon.timeseries 1.4.1b20251115py3-none-any.whl → 1.5.0b20251221py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (82) hide show

autogluon/timeseries/models/ensemble/weighted/basic.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Optional
+from typing import Any
 import numpy as np
@@ -8,14 +8,20 @@ from .abstract import AbstractWeightedTimeSeriesEnsembleModel
 class SimpleAverageEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
-    """Constructs a weighted ensemble using a simple average of the constituent models' predictions."""
+    """Simple ensemble that assigns equal weights to all base models for uniform averaging.
+    This ensemble computes predictions as the arithmetic mean of all base model forecasts,
+    giving each model equal influence. Simple averaging is robust and often performs well when base
+    models have similar accuracy levels or when validation data is insufficient to reliably
+    estimate performance differences.
+    """
     def _fit(
         self,
         predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
         data_per_window: list[TimeSeriesDataFrame],
-        model_scores: Optional[dict[str, float]] = None,
-        time_limit: Optional[float] = None,
+        model_scores: dict[str, float] | None = None,
+        time_limit: float | None = None,
     ):
         self.model_to_weight = {}
         num_models = len(predictions_per_window)
@@ -24,16 +30,23 @@ class SimpleAverageEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
 class PerformanceWeightedEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
-    """Constructs a weighted ensemble, where the weights are assigned in proportion to the
-    (inverse) validation scores.
+    """Performance-based weighted ensemble that assigns weights proportional to validation scores.
+    This ensemble computes model weights based on their validation performance, giving higher
+    weights to better-performing models. The weighting scheme transforms validation scores
+    (higher is better) into ensemble weights using configurable transformation functions.
+    .. warning::
+        This ensemble method is deprecated and may be removed in a future version.
     Other Parameters
     ----------------
-    weight_scheme: Literal["sq", "inv", "loginv"], default = "loginv"
+    weight_scheme : Literal["sq", "inv", "sqrt"], default = "sqrt"
         Method used to compute the weights as a function of the validation scores.
-        - "sqrt" computes weights in proportion to `sqrt(1 / S)`. This is the default.
-        - "inv" computes weights in proportion to `(1 / S)`.
-        - "sq" computes the weights in proportion to `(1 / S)^2` as outlined in [PC2020]_.
+        - "sqrt" computes weights in proportion to ``sqrt(1 / S)``. This is the default.
+        - "inv" computes weights in proportion to ``(1 / S)``.
+        - "sq" computes the weights in proportion to ``(1 / S)^2`` as outlined in [PC2020]_.
     References
     ----------
@@ -49,12 +62,12 @@ class PerformanceWeightedEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
         self,
         predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
         data_per_window: list[TimeSeriesDataFrame],
-        model_scores: Optional[dict[str, float]] = None,
-        time_limit: Optional[float] = None,
+        model_scores: dict[str, float] | None = None,
+        time_limit: float | None = None,
     ):
         assert model_scores is not None
-        weight_scheme = self.get_hyperparameters()["weight_scheme"]
+        weight_scheme = self.get_hyperparameter("weight_scheme")
         # drop NaNs
         model_scores = {k: v for k, v in model_scores.items() if np.isfinite(v)}

autogluon/timeseries/models/ensemble/weighted/greedy.py CHANGED Viewed

@@ -1,156 +1,38 @@
-import copy
 import logging
 import pprint
-from typing import Any, Optional
+from typing import Any
-import numpy as np
-import autogluon.core as ag
-from autogluon.core.models.greedy_ensemble.ensemble_selection import EnsembleSelection
 from autogluon.timeseries import TimeSeriesDataFrame
-from autogluon.timeseries.metrics import TimeSeriesScorer
-from autogluon.timeseries.utils.datetime import get_seasonality
+from ..ensemble_selection import fit_time_series_ensemble_selection
 from .abstract import AbstractWeightedTimeSeriesEnsembleModel
 logger = logging.getLogger(__name__)
-class TimeSeriesEnsembleSelection(EnsembleSelection):
-    def __init__(
-        self,
-        ensemble_size: int,
-        metric: TimeSeriesScorer,
-        problem_type: str = ag.constants.QUANTILE,
-        sorted_initialization: bool = False,
-        bagging: bool = False,
-        tie_breaker: str = "random",
-        random_state: Optional[np.random.RandomState] = None,
-        prediction_length: int = 1,
-        target: str = "target",
-        **kwargs,
-    ):
-        super().__init__(
-            ensemble_size=ensemble_size,
-            metric=metric,  # type: ignore
-            problem_type=problem_type,
-            sorted_initialization=sorted_initialization,
-            bagging=bagging,
-            tie_breaker=tie_breaker,
-            random_state=random_state,
-            **kwargs,
-        )
-        self.prediction_length = prediction_length
-        self.target = target
-        self.metric: TimeSeriesScorer
-        self.dummy_pred_per_window = []
-        self.scorer_per_window = []
-        self.dummy_pred_per_window: Optional[list[TimeSeriesDataFrame]]
-        self.scorer_per_window: Optional[list[TimeSeriesScorer]]
-        self.data_future_per_window: Optional[list[TimeSeriesDataFrame]]
-    def fit(  # type: ignore
-        self,
-        predictions: list[list[TimeSeriesDataFrame]],
-        labels: list[TimeSeriesDataFrame],
-        time_limit: Optional[float] = None,
-    ):
-        return super().fit(
-            predictions=predictions,  # type: ignore
-            labels=labels,  # type: ignore
-            time_limit=time_limit,
-        )
-    def _fit(  # type: ignore
-        self,
-        predictions: list[list[TimeSeriesDataFrame]],
-        labels: list[TimeSeriesDataFrame],
-        time_limit: Optional[float] = None,
-        sample_weight: Optional[list[float]] = None,
-    ):
-        # Stack predictions for each model into a 3d tensor of shape [num_val_windows, num_rows, num_cols]
-        stacked_predictions = [np.stack(preds) for preds in predictions]
-        self.dummy_pred_per_window = []
-        self.scorer_per_window = []
-        self.data_future_per_window = []
-        seasonal_period = self.metric.seasonal_period
-        if seasonal_period is None:
-            seasonal_period = get_seasonality(labels[0].freq)
-        for window_idx, data in enumerate(labels):
-            dummy_pred = copy.deepcopy(predictions[0][window_idx])
-            # This should never happen; sanity check to make sure that all predictions have the same index
-            assert all(dummy_pred.index.equals(pred[window_idx].index) for pred in predictions)
-            assert all(dummy_pred.columns.equals(pred[window_idx].columns) for pred in predictions)
-            self.dummy_pred_per_window.append(dummy_pred)
-            scorer = copy.deepcopy(self.metric)
-            # Split the observed time series once to avoid repeated computations inside the evaluator
-            data_past = data.slice_by_timestep(None, -self.prediction_length)
-            data_future = data.slice_by_timestep(-self.prediction_length, None)
-            scorer.save_past_metrics(data_past, target=self.target, seasonal_period=seasonal_period)
-            self.scorer_per_window.append(scorer)
-            self.data_future_per_window.append(data_future)
-        super()._fit(
-            predictions=stacked_predictions,
-            labels=data_future,  # type: ignore
-            time_limit=time_limit,
-        )
-        self.dummy_pred_per_window = None
-        self.evaluator_per_window = None
-        self.data_future_per_window = None
-    def _calculate_regret(  # type: ignore
-        self,
-        y_true,
-        y_pred_proba,
-        metric: TimeSeriesScorer,
-        sample_weight=None,
-    ):
-        # Compute average score across all validation windows
-        total_score = 0.0
-        assert self.data_future_per_window is not None
-        assert self.dummy_pred_per_window is not None
-        assert self.scorer_per_window is not None
-        for window_idx, data_future in enumerate(self.data_future_per_window):
-            dummy_pred = self.dummy_pred_per_window[window_idx]
-            dummy_pred[list(dummy_pred.columns)] = y_pred_proba[window_idx]
-            # We use scorer.compute_metric instead of scorer.score to avoid repeated calls to scorer.save_past_metrics
-            metric_value = self.scorer_per_window[window_idx].compute_metric(
-                data_future,
-                dummy_pred,
-                target=self.target,
-            )
-            total_score += metric.sign * metric_value
-        avg_score = total_score / len(self.data_future_per_window)
-        # score: higher is better, regret: lower is better, so we flip the sign
-        return -avg_score
+class GreedyEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
+    """Greedy ensemble selection algorithm that iteratively builds an ensemble by selecting models with
+    replacement.
+    Also known as ``WeightedEnsemble`` for backward compatibility.
-class GreedyEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
-    """Constructs a weighted ensemble using the greedy Ensemble Selection algorithm by
-    Caruana et al. [Car2004]
+    This class implements the Ensemble Selection algorithm by Caruana et al. [Car2004]_, which starts
+    with an empty ensemble and repeatedly adds the model that most improves the ensemble's validation
+    performance. Models can be selected multiple times, allowing the algorithm to assign higher effective
+    weights to better-performing models.
     Other Parameters
     ----------------
-    ensemble_size: int, default = 100
+    ensemble_size : int, default = 100
         Number of models (with replacement) to include in the ensemble.
     References
     ----------
-    .. [Car2024] Caruana, Rich, et al. "Ensemble selection from libraries of models."
+    .. [Car2004] Caruana, Rich, et al. "Ensemble selection from libraries of models."
         Proceedings of the twenty-first international conference on Machine learning. 2004.
     """
-    def __init__(self, name: Optional[str] = None, **kwargs):
+    def __init__(self, name: str | None = None, **kwargs):
         if name is None:
             # FIXME: the name here is kept for backward compatibility. it will be called
             # GreedyEnsemble in v1.4 once ensemble choices are exposed
@@ -164,24 +46,19 @@ class GreedyEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
         self,
         predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
         data_per_window: list[TimeSeriesDataFrame],
-        model_scores: Optional[dict[str, float]] = None,
-        time_limit: Optional[float] = None,
+        model_scores: dict[str, float] | None = None,
+        time_limit: float | None = None,
     ):
-        ensemble_selection = TimeSeriesEnsembleSelection(
-            ensemble_size=self.get_hyperparameters()["ensemble_size"],
-            metric=self.eval_metric,
+        model_to_weight = fit_time_series_ensemble_selection(
+            data_per_window=data_per_window,
+            predictions_per_window=predictions_per_window,
+            ensemble_size=self.get_hyperparameter("ensemble_size"),
+            eval_metric=self.eval_metric,
             prediction_length=self.prediction_length,
             target=self.target,
-        )
-        ensemble_selection.fit(
-            predictions=list(predictions_per_window.values()),
-            labels=data_per_window,
             time_limit=time_limit,
         )
-        self.model_to_weight = {}
-        for model_name, weight in zip(predictions_per_window.keys(), ensemble_selection.weights_):
-            if weight != 0:
-                self.model_to_weight[model_name] = weight
+        self.model_to_weight = {model: weight for model, weight in model_to_weight.items() if weight > 0}
         weights_for_printing = {model: round(float(weight), 2) for model, weight in self.model_to_weight.items()}
         logger.info(f"\tEnsemble weights: {pprint.pformat(weights_for_printing, width=200)}")

autogluon/timeseries/models/gluonts/abstract.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 import shutil
 from datetime import timedelta
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, Optional, Type, Union, cast, overload
+from typing import TYPE_CHECKING, Any, Callable, Type, cast, overload
 import gluonts
 import gluonts.core.settings
@@ -72,12 +72,12 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
     def __init__(
         self,
-        freq: Optional[str] = None,
+        freq: str | None = None,
         prediction_length: int = 1,
-        path: Optional[str] = None,
-        name: Optional[str] = None,
-        eval_metric: Optional[str] = None,
-        hyperparameters: Optional[dict[str, Any]] = None,
+        path: str | None = None,
+        name: str | None = None,
+        eval_metric: str | None = None,
+        hyperparameters: dict[str, Any] | None = None,
         **kwargs,  # noqa
     ):
         super().__init__(
@@ -89,9 +89,9 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
             hyperparameters=hyperparameters,
             **kwargs,
         )
-        self.gts_predictor: Optional[GluonTSPredictor] = None
-        self._ohe_generator_known: Optional[OneHotEncoder] = None
-        self._ohe_generator_past: Optional[OneHotEncoder] = None
+        self.gts_predictor: GluonTSPredictor | None = None
+        self._ohe_generator_known: OneHotEncoder | None = None
+        self._ohe_generator_past: OneHotEncoder | None = None
         self.callbacks = []
         # Following attributes may be overridden during fit() based on train_data & model parameters
         self.num_feat_static_cat = 0
@@ -105,7 +105,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
         self.past_feat_dynamic_cat_cardinality: list[int] = []
         self.negative_data = True
-    def save(self, path: Optional[str] = None, verbose: bool = True) -> str:
+    def save(self, path: str | None = None, verbose: bool = True) -> str:
         # we flush callbacks instance variable if it has been set. it can keep weak references which breaks training
         self.callbacks = []
         # The GluonTS predictor is serialized using custom logic
@@ -153,18 +153,17 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
                 assert dataset.static_features is not None, (
                     "Static features must be provided if num_feat_static_cat > 0"
                 )
-                feat_static_cat = dataset.static_features[self.covariate_metadata.static_features_cat]
-                self.feat_static_cat_cardinality = feat_static_cat.nunique().tolist()
+                self.feat_static_cat_cardinality = list(self.covariate_metadata.static_cat_cardinality.values())
         disable_known_covariates = model_params.get("disable_known_covariates", False)
         if not disable_known_covariates and self.supports_known_covariates:
             self.num_feat_dynamic_cat = len(self.covariate_metadata.known_covariates_cat)
             self.num_feat_dynamic_real = len(self.covariate_metadata.known_covariates_real)
             if self.num_feat_dynamic_cat > 0:
-                feat_dynamic_cat = dataset[self.covariate_metadata.known_covariates_cat]
                 if self.supports_cat_covariates:
-                    self.feat_dynamic_cat_cardinality = feat_dynamic_cat.nunique().tolist()
+                    self.feat_dynamic_cat_cardinality = list(self.covariate_metadata.known_cat_cardinality.values())
                 else:
+                    feat_dynamic_cat = dataset[self.covariate_metadata.known_covariates_cat]
                     # If model doesn't support categorical covariates, convert them to real via one hot encoding
                     self._ohe_generator_known = OneHotEncoder(
                         max_levels=model_params.get("max_cat_cardinality", 100),
@@ -180,10 +179,12 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
             self.num_past_feat_dynamic_cat = len(self.covariate_metadata.past_covariates_cat)
             self.num_past_feat_dynamic_real = len(self.covariate_metadata.past_covariates_real)
             if self.num_past_feat_dynamic_cat > 0:
-                past_feat_dynamic_cat = dataset[self.covariate_metadata.past_covariates_cat]
                 if self.supports_cat_covariates:
-                    self.past_feat_dynamic_cat_cardinality = past_feat_dynamic_cat.nunique().tolist()
+                    self.past_feat_dynamic_cat_cardinality = list(
+                        self.covariate_metadata.past_cat_cardinality.values()
+                    )
                 else:
+                    past_feat_dynamic_cat = dataset[self.covariate_metadata.past_covariates_cat]
                     # If model doesn't support categorical covariates, convert them to real via one hot encoding
                     self._ohe_generator_past = OneHotEncoder(
                         max_levels=model_params.get("max_cat_cardinality", 100),
@@ -277,8 +278,8 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
         return torch.cuda.is_available()
-    def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, Union[int, float]]:
-        minimum_resources: dict[str, Union[int, float]] = {"num_cpus": 1}
+    def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, int | float]:
+        minimum_resources: dict[str, int | float] = {"num_cpus": 1}
         # if GPU is available, we train with 1 GPU per trial
         if is_gpu_available:
             minimum_resources["num_gpus"] = 1
@@ -289,8 +290,8 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
     @overload
     def _to_gluonts_dataset(self, time_series_df: TimeSeriesDataFrame, known_covariates=None) -> GluonTSDataset: ...
     def _to_gluonts_dataset(
-        self, time_series_df: Optional[TimeSeriesDataFrame], known_covariates: Optional[TimeSeriesDataFrame] = None
-    ) -> Optional[GluonTSDataset]:
+        self, time_series_df: TimeSeriesDataFrame | None, known_covariates: TimeSeriesDataFrame | None = None
+    ) -> GluonTSDataset | None:
         if time_series_df is not None:
             # TODO: Preprocess real-valued features with StdScaler?
             if self.num_feat_static_cat > 0:
@@ -388,10 +389,10 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
     def _fit(
         self,
         train_data: TimeSeriesDataFrame,
-        val_data: Optional[TimeSeriesDataFrame] = None,
-        time_limit: Optional[float] = None,
-        num_cpus: Optional[int] = None,
-        num_gpus: Optional[int] = None,
+        val_data: TimeSeriesDataFrame | None = None,
+        time_limit: float | None = None,
+        num_cpus: int | None = None,
+        num_gpus: int | None = None,
         verbosity: int = 2,
         **kwargs,
     ) -> None:
@@ -438,8 +439,8 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
     def _get_callbacks(
         self,
-        time_limit: Optional[float],
-        early_stopping_patience: Optional[int] = None,
+        time_limit: float | None,
+        early_stopping_patience: int | None = None,
     ) -> list[Callable]:
         """Retrieve a list of callback objects for the GluonTS trainer"""
         from lightning.pytorch.callbacks import EarlyStopping, Timer
@@ -454,7 +455,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
     def _predict(
         self,
         data: TimeSeriesDataFrame,
-        known_covariates: Optional[TimeSeriesDataFrame] = None,
+        known_covariates: TimeSeriesDataFrame | None = None,
         **kwargs,
     ) -> TimeSeriesDataFrame:
         if self.gts_predictor is None:
@@ -471,8 +472,8 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
     def _predict_gluonts_forecasts(
         self,
         data: TimeSeriesDataFrame,
-        known_covariates: Optional[TimeSeriesDataFrame] = None,
-        num_samples: Optional[int] = None,
+        known_covariates: TimeSeriesDataFrame | None = None,
+        num_samples: int | None = None,
     ) -> list[Forecast]:
         assert self.gts_predictor is not None, "GluonTS models must be fit before predicting."
         gts_data = self._to_gluonts_dataset(data, known_covariates=known_covariates)

autogluon/timeseries/models/gluonts/dataset.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Iterator, Optional, Type
+from typing import Any, Iterator, Type
 import numpy as np
 import pandas as pd
@@ -17,14 +17,14 @@ class SimpleGluonTSDataset(GluonTSDataset):
         target_df: TimeSeriesDataFrame,
         freq: str,
         target_column: str = "target",
-        feat_static_cat: Optional[np.ndarray] = None,
-        feat_static_real: Optional[np.ndarray] = None,
-        feat_dynamic_cat: Optional[np.ndarray] = None,
-        feat_dynamic_real: Optional[np.ndarray] = None,
-        past_feat_dynamic_cat: Optional[np.ndarray] = None,
-        past_feat_dynamic_real: Optional[np.ndarray] = None,
+        feat_static_cat: np.ndarray | None = None,
+        feat_static_real: np.ndarray | None = None,
+        feat_dynamic_cat: np.ndarray | None = None,
+        feat_dynamic_real: np.ndarray | None = None,
+        past_feat_dynamic_cat: np.ndarray | None = None,
+        past_feat_dynamic_real: np.ndarray | None = None,
         includes_future: bool = False,
-        prediction_length: Optional[int] = None,
+        prediction_length: int | None = None,
     ):
         assert target_df is not None
         # Convert TimeSeriesDataFrame to pd.Series for faster processing
@@ -48,7 +48,7 @@ class SimpleGluonTSDataset(GluonTSDataset):
         assert len(self.item_ids) == len(self.start_timestamps)
     @staticmethod
-    def _astype(array: Optional[np.ndarray], dtype: Type[np.generic]) -> Optional[np.ndarray]:
+    def _astype(array: np.ndarray | None, dtype: Type[np.generic]) -> np.ndarray | None:
         if array is None:
             return None
         else:

autogluon/timeseries/models/gluonts/models.py CHANGED Viewed

@@ -41,10 +41,8 @@ class DeepARModel(AbstractGluonTSModel):
         Number of steps to unroll the RNN for before computing predictions
     disable_static_features : bool, default = False
         If True, static features won't be used by the model even if they are present in the dataset.
-        If False, static features will be used by the model if they are present in the dataset.
     disable_known_covariates : bool, default = False
         If True, known covariates won't be used by the model even if they are present in the dataset.
-        If False, known covariates will be used by the model if they are present in the dataset.
     num_layers : int, default = 2
         Number of RNN layers
     hidden_size : int, default = 40
@@ -170,13 +168,10 @@ class TemporalFusionTransformerModel(AbstractGluonTSModel):
         Distribution output object that defines how the model output is converted to a forecast, and how the loss is computed.
     disable_static_features : bool, default = False
         If True, static features won't be used by the model even if they are present in the dataset.
-        If False, static features will be used by the model if they are present in the dataset.
     disable_known_covariates : bool, default = False
         If True, known covariates won't be used by the model even if they are present in the dataset.
-        If False, known covariates will be used by the model if they are present in the dataset.
     disable_past_covariates : bool, default = False
         If True, past covariates won't be used by the model even if they are present in the dataset.
-        If False, past covariates will be used by the model if they are present in the dataset.
     hidden_dim : int, default = 32
         Size of the LSTM & transformer hidden states.
     variable_dim : int, default = 32
@@ -470,10 +465,8 @@ class TiDEModel(AbstractGluonTSModel):
         Number of past values used for prediction.
     disable_static_features : bool, default = False
         If True, static features won't be used by the model even if they are present in the dataset.
-        If False, static features will be used by the model if they are present in the dataset.
     disable_known_covariates : bool, default = False
         If True, known covariates won't be used by the model even if they are present in the dataset.
-        If False, known covariates will be used by the model if they are present in the dataset.
     feat_proj_hidden_dim : int, default = 4
         Size of the feature projection layer.
     encoder_hidden_dim : int, default = 64

autogluon/timeseries/models/local/__init__.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import joblib.externals.loky
 from .naive import AverageModel, NaiveModel, SeasonalAverageModel, SeasonalNaiveModel
 from .npts import NPTSModel
 from .statsforecast import (
@@ -15,8 +13,3 @@ from .statsforecast import (
     ThetaModel,
     ZeroModel,
 )
-# By default, joblib w/ loky backend kills processes that take >300MB of RAM assuming that this is caused by a memory
-# leak. This leads to problems for some memory-hungry models like AutoARIMA/Theta.
-# This monkey patch removes this undesired behavior
-joblib.externals.loky.process_executor._MAX_MEMORY_LEAK_SIZE = int(3e10)

autogluon/timeseries/models/local/abstract_local_model.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import logging
 import time
 from multiprocessing import TimeoutError
-from typing import Any, Callable, Optional, Union
+from typing import Any, Callable
 import numpy as np
 import pandas as pd
@@ -12,16 +12,13 @@ from autogluon.core.utils.exceptions import TimeLimitExceeded
 from autogluon.timeseries.dataset import TimeSeriesDataFrame
 from autogluon.timeseries.metrics import TimeSeriesScorer
 from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
+from autogluon.timeseries.utils.constants import AG_DEFAULT_N_JOBS
 from autogluon.timeseries.utils.datetime import get_seasonality
 from autogluon.timeseries.utils.warning_filters import warning_filter
 logger = logging.getLogger(__name__)
-# We use the same default n_jobs across AG-TS to ensure that Joblib reuses the process pool
-AG_DEFAULT_N_JOBS = max(cpu_count(only_physical_cores=True), 1)
 class AbstractLocalModel(AbstractTimeSeriesModel):
     """Abstract class for local forecasting models that are trained separately for each time series.
@@ -40,18 +37,18 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
     """
     allowed_local_model_args: list[str] = []
-    default_max_ts_length: Optional[int] = 2500
+    default_max_ts_length: int | None = 2500
     default_max_time_limit_ratio = 1.0
     init_time_in_seconds: int = 0
     def __init__(
         self,
-        freq: Optional[str] = None,
+        freq: str | None = None,
         prediction_length: int = 1,
-        path: Optional[str] = None,
-        name: Optional[str] = None,
-        eval_metric: Union[str, TimeSeriesScorer, None] = None,
-        hyperparameters: Optional[dict[str, Any]] = None,
+        path: str | None = None,
+        name: str | None = None,
+        eval_metric: str | TimeSeriesScorer | None = None,
+        hyperparameters: dict[str, Any] | None = None,
         **kwargs,  # noqa
     ):
         super().__init__(
@@ -79,10 +76,10 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
     def preprocess(
         self,
         data: TimeSeriesDataFrame,
-        known_covariates: Optional[TimeSeriesDataFrame] = None,
+        known_covariates: TimeSeriesDataFrame | None = None,
         is_train: bool = False,
         **kwargs,
-    ) -> tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
+    ) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame | None]:
         if not self._get_tags()["allow_nan"]:
             data = data.fill_missing_values()
         return data, known_covariates
@@ -95,7 +92,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
         }
     @staticmethod
-    def _compute_n_jobs(n_jobs: Union[int, float]) -> int:
+    def _compute_n_jobs(n_jobs: int | float) -> int:
         if isinstance(n_jobs, float) and 0 < n_jobs <= 1:
             return max(int(cpu_count() * n_jobs), 1)
         elif isinstance(n_jobs, int):
@@ -103,7 +100,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
         else:
             raise ValueError(f"n_jobs must be a float between 0 and 1 or an integer (received n_jobs = {n_jobs})")
-    def _fit(self, train_data: TimeSeriesDataFrame, time_limit: Optional[int] = None, **kwargs):
+    def _fit(self, train_data: TimeSeriesDataFrame, time_limit: int | None = None, **kwargs):
         self._check_fit_params()
         if time_limit is not None and time_limit < self.init_time_in_seconds:
@@ -184,7 +181,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
         self,
         time_series: pd.Series,
         use_fallback_model: bool,
-        end_time: Optional[float] = None,
+        end_time: float | None = None,
     ) -> tuple[pd.DataFrame, bool]:
         if end_time is not None and time.time() >= end_time:
             raise TimeLimitExceeded

autogluon/timeseries/models/local/naive.py CHANGED Viewed

@@ -96,7 +96,7 @@ class AverageModel(AbstractLocalModel):
         When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
         When set to a positive integer, that many cores are used.
         When set to -1, all CPU cores are used.
-    max_ts_length : Optional[int], default = None
+    max_ts_length : int | None, default = None
         If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
         This significantly speeds up fitting and usually leads to no change in accuracy.
     """
@@ -136,7 +136,7 @@ class SeasonalAverageModel(AbstractLocalModel):
         When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
         When set to a positive integer, that many cores are used.
         When set to -1, all CPU cores are used.
-    max_ts_length : Optional[int], default = None
+    max_ts_length : int | None, default = None
         If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
         This significantly speeds up fitting and usually leads to no change in accuracy.
     """

autogluon/timeseries/models/local/npts.py CHANGED Viewed

@@ -31,7 +31,7 @@ class NPTSModel(AbstractLocalModel):
         When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
         When set to a positive integer, that many cores are used.
         When set to -1, all CPU cores are used.
-    max_ts_length : Optional[int], default = 2500
+    max_ts_length : int | None, default = 2500
         If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
         This significantly speeds up fitting and usually leads to no change in accuracy.
     """
@@ -59,6 +59,11 @@ class NPTSModel(AbstractLocalModel):
     ) -> pd.DataFrame:
         from gluonts.model.npts import NPTSPredictor
+        # NPTS model is non-deterministic due to sampling. Set seed for reproducibility in parallel processes
+        # and restore original state to avoid side effects when running with n_jobs=1
+        original_random_state = np.random.get_state()
+        np.random.seed(123)
         local_model_args.pop("seasonal_period")
         num_samples = local_model_args.pop("num_samples")
         num_default_time_features = local_model_args.pop("num_default_time_features")
@@ -88,6 +93,7 @@ class NPTSModel(AbstractLocalModel):
         forecast_dict = {"mean": forecast.mean}
         for q in self.quantile_levels:
             forecast_dict[str(q)] = forecast.quantile(q)
+        np.random.set_state(original_random_state)
         return pd.DataFrame(forecast_dict)
     def _more_tags(self) -> dict:

autogluon.timeseries 1.4.1b20251115__py3-none-any.whl → 1.5.0b20251221__py3-none-any.whl

Potentially problematic release.

autogluon.timeseries 1.4.1b20251115py3-none-any.whl → 1.5.0b20251221py3-none-any.whl