PyPI - autogluon.timeseries - Versions diffs - 1.1.2b20241021__py3-none-any.whl → 1.1.2b20241023__py3-none-any.whl - Mend

autogluon.timeseries 1.1.2b20241021py3-none-any.whl → 1.1.2b20241023py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

autogluon/timeseries/learner.py CHANGED Viewed

@@ -84,7 +84,7 @@ class TimeSeriesLearner(AbstractLearner):
         self._time_limit = time_limit
         time_start = time.time()
-        train_data = self.feature_generator.fit_transform(train_data, data_frame_name="train_data")
+        train_data = self.feature_generator.fit_transform(train_data)
         if val_data is not None:
             val_data = self.feature_generator.transform(val_data, data_frame_name="tuning_data")

autogluon/timeseries/models/__init__.py CHANGED Viewed

@@ -16,9 +16,7 @@ from .local import (
     AutoCESModel,
     AutoETSModel,
     AverageModel,
-    CrostonClassicModel,
-    CrostonOptimizedModel,
-    CrostonSBAModel,
+    CrostonModel,
     DynamicOptimizedThetaModel,
     ETSModel,
     IMAPAModel,
@@ -37,9 +35,7 @@ __all__ = [
     "AutoCESModel",
     "AutoETSModel",
     "AverageModel",
-    "CrostonClassicModel",
-    "CrostonSBAModel",
-    "CrostonOptimizedModel",
+    "CrostonModel",
     "DLinearModel",
     "DeepARModel",
     "DirectTabularModel",

autogluon/timeseries/models/local/__init__.py CHANGED Viewed

@@ -8,9 +8,7 @@ from .statsforecast import (
     AutoARIMAModel,
     AutoCESModel,
     AutoETSModel,
-    CrostonClassicModel,
-    CrostonOptimizedModel,
-    CrostonSBAModel,
+    CrostonModel,
     DynamicOptimizedThetaModel,
     ETSModel,
     IMAPAModel,

autogluon/timeseries/models/local/statsforecast.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import logging
-from typing import Any, Dict, Type
+from typing import Any, Dict, Optional, Type
 import numpy as np
 import pandas as pd
@@ -19,11 +19,13 @@ class AbstractStatsForecastModel(AbstractLocalModel):
         local_model_args["season_length"] = seasonal_period
         return local_model_args
-    def _get_model_type(self) -> Type:
+    def _get_model_type(self, variant: Optional[str] = None) -> Type:
         raise NotImplementedError
     def _get_local_model(self, local_model_args: Dict):
-        model_type = self._get_model_type()
+        local_model_args = local_model_args.copy()
+        variant = local_model_args.pop("variant", None)
+        model_type = self._get_model_type(variant)
         return model_type(**local_model_args)
     def _get_point_forecast(
@@ -154,7 +156,7 @@ class AutoARIMAModel(AbstractProbabilisticStatsForecastModel):
         local_model_args.setdefault("allowmean", True)
         return local_model_args
-    def _get_model_type(self):
+    def _get_model_type(self, variant: Optional[str] = None):
         from statsforecast.models import AutoARIMA
         return AutoARIMA
@@ -222,7 +224,7 @@ class ARIMAModel(AbstractProbabilisticStatsForecastModel):
         local_model_args.setdefault("order", (1, 1, 1))
         return local_model_args
-    def _get_model_type(self):
+    def _get_model_type(self, variant: Optional[str] = None):
         from statsforecast.models import ARIMA
         return ARIMA
@@ -265,7 +267,7 @@ class AutoETSModel(AbstractProbabilisticStatsForecastModel):
         "seasonal_period",
     ]
-    def _get_model_type(self):
+    def _get_model_type(self, variant: Optional[str] = None):
         from statsforecast.models import AutoETS
         return AutoETS
@@ -365,7 +367,7 @@ class DynamicOptimizedThetaModel(AbstractProbabilisticStatsForecastModel):
         "seasonal_period",
     ]
-    def _get_model_type(self):
+    def _get_model_type(self, variant: Optional[str] = None):
         from statsforecast.models import DynamicOptimizedTheta
         return DynamicOptimizedTheta
@@ -409,7 +411,7 @@ class ThetaModel(AbstractProbabilisticStatsForecastModel):
         "seasonal_period",
     ]
-    def _get_model_type(self):
+    def _get_model_type(self, variant: Optional[str] = None):
         from statsforecast.models import Theta
         return Theta
@@ -529,7 +531,7 @@ class AutoCESModel(AbstractProbabilisticStatsForecastModel):
         "seasonal_period",
     ]
-    def _get_model_type(self):
+    def _get_model_type(self, variant: Optional[str] = None):
         from statsforecast.models import AutoCES
         return AutoCES
@@ -591,58 +593,32 @@ class ADIDAModel(AbstractStatsForecastIntermittentDemandModel):
         This significantly speeds up fitting and usually leads to no change in accuracy.
     """
-    def _get_model_type(self):
+    def _get_model_type(self, variant: Optional[str] = None):
         from statsforecast.models import ADIDA
         return ADIDA
-class CrostonSBAModel(AbstractStatsForecastIntermittentDemandModel):
-    """Intermittent demand forecasting model using Croston's model with the Syntetos-Boylan
-    bias correction approach [SyntetosBoylan2001]_.
-    Based on `statsforecast.models.CrostonSBA <https://nixtla.mintlify.app/statsforecast/docs/models/crostonsba.html>`_.
+class CrostonModel(AbstractStatsForecastIntermittentDemandModel):
+    """Intermittent demand forecasting model using Croston's model from [Croston1972]_ and [SyntetosBoylan2001]_.
     References
     ----------
+    .. [Croston1972] Croston, John D. "Forecasting and stock control for intermittent demands." Journal of
+        the Operational Research Society 23.3 (1972): 289-303.
     .. [SyntetosBoylan2001] Syntetos, Aris A., and John E. Boylan. "On the bias of intermittent
         demand estimates." International journal of production economics 71.1-3 (2001): 457-466.
     Other Parameters
     ----------------
-    n_jobs : int or float, default = 0.5
-        Number of CPU cores used to fit the models in parallel.
-        When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
-        When set to a positive integer, that many cores are used.
-        When set to -1, all CPU cores are used.
-    max_ts_length : int, default = 2500
-        If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
-        This significantly speeds up fitting and usually leads to no change in accuracy.
-    """
-    def _get_model_type(self):
-        from statsforecast.models import CrostonSBA
+    variant : {"SBA", "classic", "optimized"}, default = "SBA"
+        Variant of the Croston model that is used. Available options:
-        return CrostonSBA
+        - `"classic"` - variant of the Croston method where the smoothing parameter is fixed to 0.1 (based on `statsforecast.models.CrostonClassic <https://nixtla.mintlify.app/statsforecast/docs/models/crostonclassic.html>`_)
+        - `"SBA"` - variant of the Croston method based on Syntetos-Boylan Approximation (based on `statsforecast.models.CrostonSBA <https://nixtla.mintlify.app/statsforecast/docs/models/crostonsba.html>`_)
+        - `"optimized"` - variant of the Croston method where the smoothing parameter is optimized (based on `statsforecast.models.CrostonOptimized <https://nixtla.mintlify.app/statsforecast/docs/models/crostonoptimized.html>`_)
-class CrostonOptimizedModel(AbstractStatsForecastIntermittentDemandModel):
-    """Intermittent demand forecasting model using Croston's model where the smoothing parameter
-    is optimized [Croston1972]_.
-    Based on `statsforecast.models.CrostonOptimized <https://nixtla.mintlify.app/statsforecast/docs/models/crostonoptimized.html>`_.
-    References
-    ----------
-    .. [Croston1972] Croston, John D. "Forecasting and stock control for intermittent demands." Journal of
-        the Operational Research Society 23.3 (1972): 289-303.
-    Other Parameters
-    ----------------
     n_jobs : int or float, default = 0.5
         Number of CPU cores used to fit the models in parallel.
         When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
@@ -653,41 +629,30 @@ class CrostonOptimizedModel(AbstractStatsForecastIntermittentDemandModel):
         This significantly speeds up fitting and usually leads to no change in accuracy.
     """
-    def _get_model_type(self):
-        from statsforecast.models import CrostonOptimized
-        return CrostonOptimized
-class CrostonClassicModel(AbstractStatsForecastIntermittentDemandModel):
-    """Intermittent demand forecasting model using Croston's model where the smoothing parameter
-    is fixed to 0.1 [Croston1972]_.
-    Based on `statsforecast.models.CrostonClassic <https://nixtla.mintlify.app/statsforecast/docs/models/crostonclassic.html>`_.
-    References
-    ----------
-    .. [Croston1972] Croston, John D. "Forecasting and stock control for intermittent demands." Journal of
-        the Operational Research Society 23.3 (1972): 289-303.
+    allowed_local_model_args = [
+        "variant",
+    ]
+    def _get_model_type(self, variant: Optional[str] = None):
+        from statsforecast.models import CrostonClassic, CrostonOptimized, CrostonSBA
-    Other Parameters
-    ----------------
-    n_jobs : int or float, default = 0.5
-        Number of CPU cores used to fit the models in parallel.
-        When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
-        When set to a positive integer, that many cores are used.
-        When set to -1, all CPU cores are used.
-    max_ts_length : int, default = 2500
-        If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
-        This significantly speeds up fitting and usually leads to no change in accuracy.
-    """
+        model_variants = {
+            "classic": CrostonClassic,
+            "sba": CrostonSBA,
+            "optimized": CrostonOptimized,
+        }
-    def _get_model_type(self):
-        from statsforecast.models import CrostonClassic
+        if not isinstance(variant, str) or variant.lower() not in model_variants:
+            raise ValueError(
+                f"Invalid model variant '{variant}'. Available Croston model variants: {list(model_variants)}"
+            )
+        else:
+            return model_variants[variant.lower()]
-        return CrostonClassic
+    def _update_local_model_args(self, local_model_args: dict) -> dict:
+        local_model_args = super()._update_local_model_args(local_model_args)
+        local_model_args.setdefault("variant", "SBA")
+        return local_model_args
 class IMAPAModel(AbstractStatsForecastIntermittentDemandModel):
@@ -716,7 +681,7 @@ class IMAPAModel(AbstractStatsForecastIntermittentDemandModel):
         This significantly speeds up fitting and usually leads to no change in accuracy.
     """
-    def _get_model_type(self):
+    def _get_model_type(self, variant: Optional[str] = None):
         from statsforecast.models import IMAPA
         return IMAPA
@@ -738,7 +703,7 @@ class ZeroModel(AbstractStatsForecastIntermittentDemandModel):
         This significantly speeds up fitting and usually leads to no change in accuracy.
     """
-    def _get_model_type(self):
+    def _get_model_type(self, variant: Optional[str] = None):
         # ZeroModel does not depend on a StatsForecast implementation
         raise NotImplementedError

autogluon/timeseries/models/presets.py CHANGED Viewed

@@ -16,7 +16,7 @@ from . import (
     AutoETSModel,
     AverageModel,
     ChronosModel,
-    CrostonSBAModel,
+    CrostonModel,
     DeepARModel,
     DirectTabularModel,
     DLinearModel,
@@ -68,7 +68,8 @@ MODEL_TYPES = dict(
     ETS=ETSModel,
     ARIMA=ARIMAModel,
     ADIDA=ADIDAModel,
-    CrostonSBA=CrostonSBAModel,
+    Croston=CrostonModel,
+    CrostonSBA=CrostonModel,  # Alias for backward compatibility
     IMAPA=IMAPAModel,
     Chronos=ChronosModel,
 )
@@ -85,7 +86,8 @@ DEFAULT_MODEL_PRIORITY = dict(
     # All local models are grouped together to make sure that joblib parallel pool is reused
     NPTS=80,
     ETS=80,
-    CrostonSBA=80,
+    CrostonSBA=80,  # Alias for backward compatibility
+    Croston=80,
     Theta=75,
     DynamicOptimizedTheta=75,
     AutoETS=70,
@@ -141,7 +143,7 @@ def get_default_hps(key):
         },
         "default": {
             "SeasonalNaive": {},
-            "CrostonSBA": {},
+            "Croston": {},
             "AutoETS": {},
             "AutoARIMA": {},
             "NPTS": {},

autogluon/timeseries/predictor.py CHANGED Viewed

@@ -291,7 +291,9 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
             Preprocessed data in TimeSeriesDataFrame format.
         """
         df = self._to_data_frame(data, name=name)
-        df = df.astype({self.target: "float64"})
+        if not pd.api.types.is_numeric_dtype(df[self.target]):
+            raise ValueError(f"Target column {name}['{self.target}'] has a non-numeric dtype {df[self.target].dtype}")
+        df[self.target] = df[self.target].astype("float64")
         # MultiIndex.is_monotonic_increasing checks if index is sorted by ["item_id", "timestamp"]
         if not df.index.is_monotonic_increasing:
             df = df.sort_index()
@@ -442,12 +444,15 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
             Columns of ``train_data`` except ``target`` and those listed in ``known_covariates_names`` will be
             interpreted as ``past_covariates`` - covariates that are known only in the past.
-            If ``train_data`` has static features (i.e., ``train_data.static_features`` is a pandas DataFrame), the
-            predictor will interpret columns with ``int`` and ``float`` dtypes as continuous (real-valued) features,
-            columns with ``object`` and ``str`` dtypes as categorical features, and will ignore the rest of columns.
+            If ``train_data`` contains covariates or static features, they will be interpreted as follows:
-            For example, to ensure that column "store_id" with dtype ``int`` is interpreted as a category,
-            we need to change its type to ``category``::
+                * columns with ``int``, ``bool`` and ``float`` dtypes are interpreted as continuous (real-valued) features
+                * columns with ``object``, ``str`` and ``category`` dtypes are as interpreted as categorical features
+                * columns with other dtypes are ignored
+            To ensure that the column type is interpreted correctly, please convert it to one of the above dtypes.
+            For example, to ensure that column "store_id" with dtype ``int`` is interpreted as a category, change
+            its dtype to ``category``::
                 data.static_features["store_id"] = data.static_features["store_id"].astype("category")
@@ -497,7 +502,7 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
               and ``DirectTabular``. These models are fast to train but may not be very accurate.
             - ``"medium_quality"``: all models mentioned above + deep learning model ``TemporalFusionTransformer``. Default setting that produces good forecasts
               with reasonable training time.
-            - ``"high_quality"``: All ML models available in AutoGluon + additional statistical models (``NPTS``, ``AutoETS``, ``AutoARIMA``, ``CrostonSBA``,
+            - ``"high_quality"``: All ML models available in AutoGluon + additional statistical models (``NPTS``, ``AutoETS``, ``AutoARIMA``, ``Croston``,
               ``DynamicOptimizedTheta``). Much more accurate than ``medium_quality``, but takes longer to train.
             - ``"best_quality"``: Same models as in ``"high_quality"``, but performs validation with multiple backtests. Usually better than ``high_quality``, but takes even longer to train.

autogluon/timeseries/utils/features.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import logging
 import reprlib
+import time
 from dataclasses import dataclass, field
 from typing import Any, List, Literal, Optional, Tuple
@@ -73,7 +74,7 @@ class ContinuousAndCategoricalFeatureGenerator(PipelineFeatureGenerator):
     Imputes missing categorical features with the most frequent value in the training set.
     """
-    def __init__(self, verbosity: int = 0, minimum_cat_count=2, float_dtype: str = "float64", **kwargs):
+    def __init__(self, verbosity: int = 0, minimum_cat_count=2, **kwargs):
         generators = [
             CategoryFeatureGenerator(minimum_cat_count=minimum_cat_count, fillna="mode"),
             IdentityFeatureGenerator(infer_features_in_args={"valid_raw_types": [R_INT, R_FLOAT]}),
@@ -84,34 +85,28 @@ class ContinuousAndCategoricalFeatureGenerator(PipelineFeatureGenerator):
             pre_generators=[AsTypeFeatureGenerator(convert_bool=False)],
             pre_enforce_types=False,
             pre_drop_useless=False,
+            post_drop_duplicates=True,
+            reset_index=False,
             verbosity=verbosity,
             **kwargs,
         )
-        self.float_dtype = float_dtype
-    def _convert_numerical_columns_to_float(self, df: pd.DataFrame) -> pd.DataFrame:
-        """Convert the dtype of all numerical (float or int) columns to the given float dtype."""
-        numeric_columns = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
-        return df.astype({col: self.float_dtype for col in numeric_columns})
     def transform(self, X: pd.DataFrame, *args, **kwargs) -> pd.DataFrame:
-        if isinstance(X, TimeSeriesDataFrame):
-            X = pd.DataFrame(X)
-        return self._convert_numerical_columns_to_float(super().transform(X, *args, **kwargs))
+        return super().transform(X, *args, **kwargs)
     def fit_transform(self, X: pd.DataFrame, *args, **kwargs) -> pd.DataFrame:
         # PipelineFeatureGenerator does not use transform() inside fit_transform(), so we need to override both methods
-        if isinstance(X, TimeSeriesDataFrame):
-            X = pd.DataFrame(X)
-        transformed = self._convert_numerical_columns_to_float(super().fit_transform(X, *args, **kwargs))
+        transformed = super().fit_transform(X, *args, **kwargs)
         # Ignore the '__dummy__' feature generated by PipelineFeatureGenerator if none of the features are informative
-        return transformed.drop(columns=["__dummy__"], errors="ignore")
+        if "__dummy__" in transformed.columns:
+            transformed.drop(columns=["__dummy__"], inplace=True)
+        return transformed
 class TimeSeriesFeatureGenerator:
     """Takes care of preprocessing for static_features and past/known covariates.
-    All covariates & static features are converted into either float64 or categorical dtype.
+    All covariates & static features are converted into either float or categorical dtype.
     Missing values in the target column are left as-is but missing values in static features & covariates are imputed.
     Imputation logic is as follows:
@@ -119,20 +114,38 @@ class TimeSeriesFeatureGenerator:
     2. For real static features, we impute missing values with the median of the training set.
     3. For real covariates (past, known), we ffill + bfill within each time series. If for some time series all
         covariate values are missing, we fill them with the median of the training set.
+    Parameters
+    ----------
+    target : str
+        Name of the target column.
+    known_covariates_names : List[str]
+        Columns that contain covariates that are known into the future.
+    float_dtype : str, default = "float32"
+        Numpy float dtype to which all numeric columns (float, int, bool) will be converted both in static & dynamic dfs.
+    num_samples : int or None, default = 20_000
+        Number of rows sampled from the training dataset to speed up computation of the median (used later for imputation).
+        If set to `None`, median will be computed using all rows.
     """
-    def __init__(self, target: str, known_covariates_names: List[str], float_dtype: str = "float64"):
+    def __init__(
+        self,
+        target: str,
+        known_covariates_names: List[str],
+        float_dtype: str = "float32",
+        num_samples: Optional[int] = 20_000,
+    ):
         self.target = target
         self.float_dtype = float_dtype
+        self.num_samples = num_samples
         self._is_fit = False
         self.known_covariates_names = list(known_covariates_names)
         self.past_covariates_names = []
-        self.known_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator(float_dtype=float_dtype)
-        self.past_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator(float_dtype=float_dtype)
+        self.known_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator()
+        self.past_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator()
         # Cat features with cat_count=1 are fine in static_features since they are repeated for all time steps in a TS
-        self.static_feature_pipeline = ContinuousAndCategoricalFeatureGenerator(
-            minimum_cat_count=1, float_dtype=float_dtype
-        )
+        self.static_feature_pipeline = ContinuousAndCategoricalFeatureGenerator(minimum_cat_count=1)
         self.covariate_metadata: CovariateMetadata = None
         self._train_covariates_real_median: Optional[pd.Series] = None
         self._train_static_real_median: Optional[pd.Series] = None
@@ -142,8 +155,12 @@ class TimeSeriesFeatureGenerator:
         return [self.target] + list(self.known_covariates_names) + list(self.past_covariates_names)
     def fit(self, data: TimeSeriesDataFrame) -> None:
+        self.fit_transform(data)
+    def fit_transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
         assert not self._is_fit, f"{self.__class__.__name__} has already been fit"
+        start_time = time.monotonic()
         self.past_covariates_names = []
         for column in data.columns:
             if column != self.target and column not in self.known_covariates_names:
@@ -153,23 +170,33 @@ class TimeSeriesFeatureGenerator:
             data, required_column_names=self.required_column_names, data_frame_name="train_data"
         )
+        # Convert to a pd.DataFrame and remove index for faster processing
+        df = pd.DataFrame(data)
+        index = df.index
+        df.reset_index(drop=True, inplace=True)
+        df = self._convert_numeric_to_float_dtype(df)
+        dfs_to_concat = [df[[self.target]]]
         logger.info("\nProvided data contains following columns:")
         logger.info(f"\ttarget: '{self.target}'")
         if len(self.known_covariates_names) > 0:
-            known_covariates_df = self.known_covariates_pipeline.fit_transform(data[self.known_covariates_names])
+            known_covariates_df = self.known_covariates_pipeline.fit_transform(df[self.known_covariates_names])
             logger.info("\tknown_covariates:")
             known_covariates_cat, known_covariates_real = self._detect_and_log_column_types(known_covariates_df)
             self.known_covariates_names = self.known_covariates_pipeline.features_in
+            dfs_to_concat.append(known_covariates_df)
         else:
             known_covariates_cat = []
             known_covariates_real = []
         if len(self.past_covariates_names) > 0:
-            past_covariates_df = self.past_covariates_pipeline.fit_transform(data[self.past_covariates_names])
+            past_covariates_df = self.past_covariates_pipeline.fit_transform(df[self.past_covariates_names])
             logger.info("\tpast_covariates:")
             past_covariates_cat, past_covariates_real = self._detect_and_log_column_types(past_covariates_df)
             self.past_covariates_names = self.past_covariates_pipeline.features_in
+            dfs_to_concat.append(past_covariates_df)
         else:
             past_covariates_cat = []
             past_covariates_real = []
@@ -179,7 +206,9 @@ class TimeSeriesFeatureGenerator:
         )
         if data.static_features is not None:
-            static_features_df = self.static_feature_pipeline.fit_transform(data.static_features)
+            static_features_df = self.static_feature_pipeline.fit_transform(
+                self._convert_numeric_to_float_dtype(data.static_features)
+            )
             logger.info("\tstatic_features:")
             static_features_cat, static_features_real = self._detect_and_log_column_types(static_features_df)
             ignored_static_features = data.static_features.columns.difference(self.static_feature_pipeline.features_in)
@@ -188,6 +217,7 @@ class TimeSeriesFeatureGenerator:
             static_features_cat = []
             static_features_real = []
             ignored_static_features = []
+            static_features_df = None
         if len(ignored_covariates) > 0 or len(ignored_static_features) > 0:
             logger.info("\nAutoGluon will ignore following non-numeric/non-informative columns:")
@@ -209,9 +239,47 @@ class TimeSeriesFeatureGenerator:
             static_features_cat=static_features_cat,
             static_features_real=static_features_real,
         )
-        self._train_covariates_real_median = data[self.covariate_metadata.covariates_real].median()
+        # Median of real-valued covariates will be used for missing value imputation
+        if self.num_samples is not None and len(df) > self.num_samples:
+            df = df.sample(n=self.num_samples, replace=True)
+        self._train_covariates_real_median = df[self.covariate_metadata.covariates_real].median()
+        self.fit_time = time.monotonic() - start_time
         self._is_fit = True
+        df_out = self._concat_dfs(dfs_to_concat)
+        df_out.index = index
+        ts_df = TimeSeriesDataFrame(df_out, static_features=self._impute_static_features(static_features_df))
+        return self._impute_covariates(ts_df, column_names=self.covariate_metadata.covariates_real)
+    @staticmethod
+    def _concat_dfs(dfs_to_concat: List[pd.DataFrame]) -> pd.DataFrame:
+        if len(dfs_to_concat) == 1:
+            return dfs_to_concat[0]
+        else:
+            return pd.concat(dfs_to_concat, axis=1, copy=False)
+    def _impute_covariates(self, ts_df: TimeSeriesDataFrame, column_names: List[str]) -> TimeSeriesDataFrame:
+        """Impute missing values in selected columns with ffill, bfill, and median imputation."""
+        if len(column_names) > 0:
+            # ffill + bfill covariates that have at least some observed values
+            covariates_real = ts_df[column_names].fill_missing_values()
+            # If for some items covariates consist completely of NaNs, fill them with median of training data
+            if np.isnan(covariates_real.to_numpy()).any():
+                covariates_real.fillna(self._train_covariates_real_median, inplace=True)
+            ts_df[column_names] = covariates_real
+        return ts_df
+    def _impute_static_features(self, static_df: Optional[pd.DataFrame]) -> Optional[pd.DataFrame]:
+        """Impute missing values in static features using the median."""
+        static_real_names = self.covariate_metadata.static_features_real
+        if static_df is not None and static_real_names:
+            static_real = static_df[static_real_names]
+            if np.isnan(static_real.to_numpy()).any():
+                static_df[static_real_names] = static_real.fillna(self._train_static_real_median)
+        return static_df
     def transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
         """Transform static features and past/known covariates.
@@ -224,38 +292,32 @@ class TimeSeriesFeatureGenerator:
         self._check_required_columns_are_present(
             data, required_column_names=self.required_column_names, data_frame_name=data_frame_name
         )
-        dfs = [data[[self.target]]]
+        # Convert to a pd.DataFrame and remove index for faster processing
+        df = pd.DataFrame(data)
+        index = df.index
+        df.reset_index(drop=True, inplace=True)
+        dfs_to_concat = [df[[self.target]]]
         if len(self.known_covariates_names) > 0:
-            dfs.append(self.known_covariates_pipeline.transform(data[self.known_covariates_names]))
+            known_covariates_df = self.known_covariates_pipeline.transform(df[self.known_covariates_names])
+            dfs_to_concat.append(known_covariates_df)
         if len(self.past_covariates_names) > 0:
-            dfs.append(self.past_covariates_pipeline.transform(data[self.past_covariates_names]))
+            past_covariates_df = self.past_covariates_pipeline.transform(df[self.past_covariates_names])
+            dfs_to_concat.append(past_covariates_df)
         if self.static_feature_pipeline.is_fit():
             if data.static_features is None:
                 raise ValueError(f"Provided {data_frame_name} must contain static_features")
-            static_features = self.static_feature_pipeline.transform(data.static_features)
-            static_real_names = self.covariate_metadata.static_features_real
-            # Fill missing static_features_real with the median of the training set
-            if static_real_names and static_features[static_real_names].isna().any(axis=None):
-                static_features[static_real_names] = static_features[static_real_names].fillna(
-                    self._train_static_real_median
-                )
+            static_features_df = self.static_feature_pipeline.transform(data.static_features)
         else:
-            static_features = None
-        ts_df = TimeSeriesDataFrame(pd.concat(dfs, axis=1), static_features=static_features)
-        covariates_names = self.covariate_metadata.covariates
-        if len(covariates_names) > 0:
-            # ffill + bfill covariates that have at least some observed values
-            ts_df[covariates_names] = ts_df[covariates_names].fill_missing_values()
-            # If for some items covariates consist completely of NaNs, fill them with median of training data
-            if ts_df[covariates_names].isna().any(axis=None):
-                ts_df[covariates_names] = ts_df[covariates_names].fillna(self._train_covariates_real_median)
+            static_features_df = None
-        return ts_df
+        df_out = self._concat_dfs(dfs_to_concat)
+        df_out.index = index
+        ts_df = TimeSeriesDataFrame(df_out, static_features=self._impute_static_features(static_features_df))
+        return self._impute_covariates(ts_df, column_names=self.covariate_metadata.covariates_real)
     def transform_future_known_covariates(
         self, known_covariates: Optional[TimeSeriesDataFrame]
@@ -266,20 +328,15 @@ class TimeSeriesFeatureGenerator:
             self._check_required_columns_are_present(
                 known_covariates, required_column_names=self.known_covariates_names, data_frame_name="known_covariates"
             )
-            known_covariates = TimeSeriesDataFrame(self.known_covariates_pipeline.transform(known_covariates))
-            # ffill + bfill covariates that have at least some observed values
-            known_covariates = known_covariates.fill_missing_values()
-            # If for some items covariates consist completely of NaNs, fill them with median of training data
-            if known_covariates.isna().any(axis=None):
-                known_covariates = known_covariates.fillna(self._train_covariates_real_median)
-            return known_covariates
+            known_covariates = TimeSeriesDataFrame(
+                self.known_covariates_pipeline.transform(pd.DataFrame(known_covariates))
+            )
+            return self._impute_covariates(
+                known_covariates, column_names=self.covariate_metadata.known_covariates_real
+            )
         else:
             return None
-    def fit_transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
-        self.fit(data)
-        return self.transform(data, data_frame_name=data_frame_name)
     @staticmethod
     def _detect_and_log_column_types(transformed_df: pd.DataFrame) -> Tuple[List[str], List[str]]:
         """Log & return names of categorical and real-valued columns in the DataFrame."""
@@ -305,6 +362,15 @@ class TimeSeriesFeatureGenerator:
                 f"{len(missing_columns)} columns are missing from {data_frame_name}: {reprlib.repr(missing_columns.to_list())}"
             )
+    def _convert_numeric_to_float_dtype(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Convert the dtype of all numeric (float, int or bool) columns to self.float_dtype."""
+        numeric_columns = [
+            col for col, dtype in df.dtypes.items() if pd.api.types.is_numeric_dtype(dtype) and col != self.target
+        ]
+        if len(numeric_columns) > 0:
+            df = df.astype({col: self.float_dtype for col in numeric_columns}, copy=False)
+        return df
 class AbstractFeatureImportanceTransform:
     """Abstract class for transforms that replace a given feature with dummy or shuffled values,

autogluon/timeseries/version.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """This is the autogluon version file."""
-__version__ = '1.1.2b20241021'
+__version__ = '1.1.2b20241023'
 __lite__ = False

{autogluon.timeseries-1.1.2b20241021.dist-info → autogluon.timeseries-1.1.2b20241023.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: autogluon.timeseries
-Version: 1.1.2b20241021
+Version: 1.1.2b20241023
 Summary: Fast and Accurate ML in 3 Lines of Code
 Home-page: https://github.com/autogluon/autogluon
 Author: AutoGluon Community
@@ -52,9 +52,9 @@ Requires-Dist: fugue>=0.9.0
 Requires-Dist: tqdm<5,>=4.38
 Requires-Dist: orjson~=3.9
 Requires-Dist: tensorboard<3,>=2.9
-Requires-Dist: autogluon.core[raytune]==1.1.2b20241021
-Requires-Dist: autogluon.common==1.1.2b20241021
-Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.1.2b20241021
+Requires-Dist: autogluon.core[raytune]==1.1.2b20241023
+Requires-Dist: autogluon.common==1.1.2b20241023
+Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.1.2b20241023
 Provides-Extra: all
 Requires-Dist: optimum[onnxruntime]<1.19,>=1.17; extra == "all"
 Provides-Extra: chronos-onnx

{autogluon.timeseries-1.1.2b20241021.dist-info → autogluon.timeseries-1.1.2b20241023.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
-autogluon.timeseries-1.1.2b20241021-py3.8-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
+autogluon.timeseries-1.1.2b20241023-py3.8-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
 autogluon/timeseries/__init__.py,sha256=_CrLLc1fkjen7UzWoO0Os8WZoHOgvZbHKy46I8v_4k4,304
 autogluon/timeseries/evaluator.py,sha256=l642tYfTHsl8WVIq_vV6qhgAFVFr9UuZD7gLra3A_Kc,250
-autogluon/timeseries/learner.py,sha256=NXhftyqMD8Bl1QHIBN82UKP0UlCV_ACughZqkmMf4oY,14043
-autogluon/timeseries/predictor.py,sha256=BUjFX5I_tgim9oo43cRgLBAxKY1JfduKtHQxY8BPC-Y,84561
+autogluon/timeseries/learner.py,sha256=3dUxI-U6TGfNtRQUzWTvBIo1GKeXYOhxIX_q7Fed9eA,14013
+autogluon/timeseries/predictor.py,sha256=1U9ic89B_JEHyzgKSu2-TN4XY9GmA8F1C77_eUBpQlI,84911
 autogluon/timeseries/splitter.py,sha256=eghGwAAN2_cxGk5aJBILgjGWtLzjxJcytMy49gg_q18,3061
-autogluon/timeseries/version.py,sha256=ZopU-NYOc_JfGqp3WtLoNfYh3BkvThJwgZcwgCKXWk0,90
+autogluon/timeseries/version.py,sha256=zNXzLcd2xHl1327Vj6HlIYZP0k8bxYq-NVGTSmrdkOc,90
 autogluon/timeseries/configs/__init__.py,sha256=BTtHIPCYeGjqgOcvqb8qPD4VNX-ICKOg6wnkew1cPOE,98
 autogluon/timeseries/configs/presets_configs.py,sha256=94-yL9teDHKs2irWjP3kpewI7FE1ChYCgEgz9XHJ6gc,1965
 autogluon/timeseries/dataset/__init__.py,sha256=UvnhAN5tjgxXTHoZMQDy64YMDj4Xxa68yY7NP4vAw0o,81
@@ -14,8 +14,8 @@ autogluon/timeseries/metrics/abstract.py,sha256=9xCFQ3NaR1C0hn01M7oBd72a_CiNV-w6
 autogluon/timeseries/metrics/point.py,sha256=xy8sKrBbuxZ7yTW21TDPayKnEj2FBj1AEseJxUdneqE,13399
 autogluon/timeseries/metrics/quantile.py,sha256=owMbOAJYwVyzdRkrJpuCGUXk937GU843QndCZyp5n9Y,3967
 autogluon/timeseries/metrics/utils.py,sha256=eJ63TCR-UwbeJ1c2Qm7B2q-8B3sFthPgiooEccrf2Kc,912
-autogluon/timeseries/models/__init__.py,sha256=WKV7DIpJkrwEj0cUfscESp67Ydap9hAqaNTYvgi2EIA,1303
-autogluon/timeseries/models/presets.py,sha256=7ORBU-7fCwwYlpXaWCXEfNx0pss3mvB6KGSsQ1kyw2k,11673
+autogluon/timeseries/models/__init__.py,sha256=MYD9JJ-wUDE5B6jW6E6LU2eXQ6vflfQBvqQJkdzJa3A,1189
+autogluon/timeseries/models/presets.py,sha256=ujNt_hft_5eNkh-Wj_Na9GBdBmI-JdnBnOEHq8X0qXc,11778
 autogluon/timeseries/models/abstract/__init__.py,sha256=wvDsQAZIV0N3AwBeMaGItoQ82trEfnT-nol2AAOIxBg,102
 autogluon/timeseries/models/abstract/abstract_timeseries_model.py,sha256=siy-OW4zflN61-pnuhvYawDvchm3zXb1ta8HUDLxhWY,24793
 autogluon/timeseries/models/abstract/model_trial.py,sha256=ENPg_7nsdxIvaNM0o0UShZ3x8jFlRmwRc5m0fGPC0TM,3720
@@ -34,11 +34,11 @@ autogluon/timeseries/models/gluonts/__init__.py,sha256=asC1PTj4j9xMbilvk1IT1juln
 autogluon/timeseries/models/gluonts/abstract_gluonts.py,sha256=QRGCLN9ZMw5zCgO5hNAOjHqp17zGn1-Uy0d7VEhYtlQ,34021
 autogluon/timeseries/models/gluonts/torch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autogluon/timeseries/models/gluonts/torch/models.py,sha256=85MWDXPwDncGwLijkm-K1tS-05LvGq4Xl-WbbIcYCO8,24906
-autogluon/timeseries/models/local/__init__.py,sha256=JyckWWgMG1BTIWJqFTW6e1O-eb0LPPOwtXwmb1ErohQ,756
+autogluon/timeseries/models/local/__init__.py,sha256=e2UImoJhmj70E148IIObv90C_bHxgyLNk6YsS4p7pfs,701
 autogluon/timeseries/models/local/abstract_local_model.py,sha256=af3GFfUIGnVNzzZJ-WI61lw83lDFfgB0AfGxmkb-t_4,12226
 autogluon/timeseries/models/local/naive.py,sha256=iwRcFMFmJKPWPbD9TWaIUS51oav69F_VAp6-jb_5SUE,7249
 autogluon/timeseries/models/local/npts.py,sha256=Bp74doKnfpGE8ywP4FWOCI_RwRMsmgocYDfGtq764DA,4143
-autogluon/timeseries/models/local/statsforecast.py,sha256=79swW7g7bn1CmuGY79i7r0uj0QZr6WLIfH_x3p1FTDA,32742
+autogluon/timeseries/models/local/statsforecast.py,sha256=C05waZQ4c2Ewm7FfARkVFWLRk_k0XvgYsQi74tHk_1U,32226
 autogluon/timeseries/models/multi_window/__init__.py,sha256=Bq7AT2Jxdd4WNqmjTdzeqgNiwn1NCyWp4tBIWaM-zfI,60
 autogluon/timeseries/models/multi_window/multi_window_model.py,sha256=EAXzoQo96zTPNz9BTYDmV1878OVKb9F6h39y386N3zU,11740
 autogluon/timeseries/trainer/__init__.py,sha256=lxiOT-Gc6BEnr_yWQqra85kEngeM_wtH2SCaRbmC_qE,170
@@ -47,7 +47,7 @@ autogluon/timeseries/trainer/auto_trainer.py,sha256=psJFZBwWWPlLjNwAgvO4OUJXsRW1
 autogluon/timeseries/transforms/__init__.py,sha256=lzDavxdgGIz5m_DmSpNa9ewNU9Evndam3YXfOEk6kwY,174
 autogluon/timeseries/transforms/scaler.py,sha256=30JrAnZwj58ntes-YP1H_XmeVLGtFepjWnRzPQQ-t4k,5352
 autogluon/timeseries/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-autogluon/timeseries/utils/features.py,sha256=hEir-2lU8fvHjt5r_LG9tLZEk5wNdRdeLRE7qF5z3_Y,19585
+autogluon/timeseries/utils/features.py,sha256=VvBQzaymSSzxI9khtcXbpir-qo1NWHe51O7F6ynyh_s,21943
 autogluon/timeseries/utils/forecast.py,sha256=p0WKM9Q0nLAwwmCgYZI1zi9mCOWXWJfllEt2lPRQl4M,1882
 autogluon/timeseries/utils/warning_filters.py,sha256=HMXNDo9jOUdf9wvyp-Db55xTq_Ctj6uso7qPhngoJPQ,1964
 autogluon/timeseries/utils/datetime/__init__.py,sha256=bTMR8jLh1LW55vHjbOr1zvWRMF_PqbvxpS-cUcNIDWI,173
@@ -55,11 +55,11 @@ autogluon/timeseries/utils/datetime/base.py,sha256=3NdsH3NDq4cVAOSoy3XpaNixyNlbj
 autogluon/timeseries/utils/datetime/lags.py,sha256=GoLtvcZ8oKb3QkoBJ9E59LSPLOP7Qjxrr2UmMSZgjyw,5909
 autogluon/timeseries/utils/datetime/seasonality.py,sha256=h_4w00iEytAz_N_EpCENQ8RCXy7KQITczrYjBgVqWkQ,764
 autogluon/timeseries/utils/datetime/time_features.py,sha256=PAXbYbQ0z_5GFbkxSNi41zLY_2-U3x0Ynm1m_WhdtGc,2572
-autogluon.timeseries-1.1.2b20241021.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
-autogluon.timeseries-1.1.2b20241021.dist-info/METADATA,sha256=T4s0hQwSi_raT80B7FXIhWt_lzlcaM5Ws3KRKL5b8Aw,12355
-autogluon.timeseries-1.1.2b20241021.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
-autogluon.timeseries-1.1.2b20241021.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
-autogluon.timeseries-1.1.2b20241021.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
-autogluon.timeseries-1.1.2b20241021.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
-autogluon.timeseries-1.1.2b20241021.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-autogluon.timeseries-1.1.2b20241021.dist-info/RECORD,,
+autogluon.timeseries-1.1.2b20241023.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
+autogluon.timeseries-1.1.2b20241023.dist-info/METADATA,sha256=J_J15yWFk4ShviPVpn-k4VsvGGvJJ1icrE6goKZ330M,12355
+autogluon.timeseries-1.1.2b20241023.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
+autogluon.timeseries-1.1.2b20241023.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
+autogluon.timeseries-1.1.2b20241023.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
+autogluon.timeseries-1.1.2b20241023.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
+autogluon.timeseries-1.1.2b20241023.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+autogluon.timeseries-1.1.2b20241023.dist-info/RECORD,,

/autogluon.timeseries-1.1.2b20241021-py3.8-nspkg.pth → /autogluon.timeseries-1.1.2b20241023-py3.8-nspkg.pth RENAMED Viewed

File without changes

{autogluon.timeseries-1.1.2b20241021.dist-info → autogluon.timeseries-1.1.2b20241023.dist-info}/LICENSE RENAMED Viewed

File without changes

{autogluon.timeseries-1.1.2b20241021.dist-info → autogluon.timeseries-1.1.2b20241023.dist-info}/NOTICE RENAMED Viewed

File without changes

{autogluon.timeseries-1.1.2b20241021.dist-info → autogluon.timeseries-1.1.2b20241023.dist-info}/WHEEL RENAMED Viewed

File without changes

{autogluon.timeseries-1.1.2b20241021.dist-info → autogluon.timeseries-1.1.2b20241023.dist-info}/namespace_packages.txt RENAMED Viewed

File without changes

{autogluon.timeseries-1.1.2b20241021.dist-info → autogluon.timeseries-1.1.2b20241023.dist-info}/top_level.txt RENAMED Viewed

File without changes

{autogluon.timeseries-1.1.2b20241021.dist-info → autogluon.timeseries-1.1.2b20241023.dist-info}/zip-safe RENAMED Viewed

File without changes

autogluon.timeseries 1.1.2b20241021__py3-none-any.whl → 1.1.2b20241023__py3-none-any.whl

autogluon.timeseries 1.1.2b20241021py3-none-any.whl → 1.1.2b20241023py3-none-any.whl