autogluon.timeseries 1.1.2b20241021__py3-none-any.whl → 1.1.2b20241023__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -84,7 +84,7 @@ class TimeSeriesLearner(AbstractLearner):
84
84
  self._time_limit = time_limit
85
85
  time_start = time.time()
86
86
 
87
- train_data = self.feature_generator.fit_transform(train_data, data_frame_name="train_data")
87
+ train_data = self.feature_generator.fit_transform(train_data)
88
88
  if val_data is not None:
89
89
  val_data = self.feature_generator.transform(val_data, data_frame_name="tuning_data")
90
90
 
@@ -16,9 +16,7 @@ from .local import (
16
16
  AutoCESModel,
17
17
  AutoETSModel,
18
18
  AverageModel,
19
- CrostonClassicModel,
20
- CrostonOptimizedModel,
21
- CrostonSBAModel,
19
+ CrostonModel,
22
20
  DynamicOptimizedThetaModel,
23
21
  ETSModel,
24
22
  IMAPAModel,
@@ -37,9 +35,7 @@ __all__ = [
37
35
  "AutoCESModel",
38
36
  "AutoETSModel",
39
37
  "AverageModel",
40
- "CrostonClassicModel",
41
- "CrostonSBAModel",
42
- "CrostonOptimizedModel",
38
+ "CrostonModel",
43
39
  "DLinearModel",
44
40
  "DeepARModel",
45
41
  "DirectTabularModel",
@@ -8,9 +8,7 @@ from .statsforecast import (
8
8
  AutoARIMAModel,
9
9
  AutoCESModel,
10
10
  AutoETSModel,
11
- CrostonClassicModel,
12
- CrostonOptimizedModel,
13
- CrostonSBAModel,
11
+ CrostonModel,
14
12
  DynamicOptimizedThetaModel,
15
13
  ETSModel,
16
14
  IMAPAModel,
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Any, Dict, Type
2
+ from typing import Any, Dict, Optional, Type
3
3
 
4
4
  import numpy as np
5
5
  import pandas as pd
@@ -19,11 +19,13 @@ class AbstractStatsForecastModel(AbstractLocalModel):
19
19
  local_model_args["season_length"] = seasonal_period
20
20
  return local_model_args
21
21
 
22
- def _get_model_type(self) -> Type:
22
+ def _get_model_type(self, variant: Optional[str] = None) -> Type:
23
23
  raise NotImplementedError
24
24
 
25
25
  def _get_local_model(self, local_model_args: Dict):
26
- model_type = self._get_model_type()
26
+ local_model_args = local_model_args.copy()
27
+ variant = local_model_args.pop("variant", None)
28
+ model_type = self._get_model_type(variant)
27
29
  return model_type(**local_model_args)
28
30
 
29
31
  def _get_point_forecast(
@@ -154,7 +156,7 @@ class AutoARIMAModel(AbstractProbabilisticStatsForecastModel):
154
156
  local_model_args.setdefault("allowmean", True)
155
157
  return local_model_args
156
158
 
157
- def _get_model_type(self):
159
+ def _get_model_type(self, variant: Optional[str] = None):
158
160
  from statsforecast.models import AutoARIMA
159
161
 
160
162
  return AutoARIMA
@@ -222,7 +224,7 @@ class ARIMAModel(AbstractProbabilisticStatsForecastModel):
222
224
  local_model_args.setdefault("order", (1, 1, 1))
223
225
  return local_model_args
224
226
 
225
- def _get_model_type(self):
227
+ def _get_model_type(self, variant: Optional[str] = None):
226
228
  from statsforecast.models import ARIMA
227
229
 
228
230
  return ARIMA
@@ -265,7 +267,7 @@ class AutoETSModel(AbstractProbabilisticStatsForecastModel):
265
267
  "seasonal_period",
266
268
  ]
267
269
 
268
- def _get_model_type(self):
270
+ def _get_model_type(self, variant: Optional[str] = None):
269
271
  from statsforecast.models import AutoETS
270
272
 
271
273
  return AutoETS
@@ -365,7 +367,7 @@ class DynamicOptimizedThetaModel(AbstractProbabilisticStatsForecastModel):
365
367
  "seasonal_period",
366
368
  ]
367
369
 
368
- def _get_model_type(self):
370
+ def _get_model_type(self, variant: Optional[str] = None):
369
371
  from statsforecast.models import DynamicOptimizedTheta
370
372
 
371
373
  return DynamicOptimizedTheta
@@ -409,7 +411,7 @@ class ThetaModel(AbstractProbabilisticStatsForecastModel):
409
411
  "seasonal_period",
410
412
  ]
411
413
 
412
- def _get_model_type(self):
414
+ def _get_model_type(self, variant: Optional[str] = None):
413
415
  from statsforecast.models import Theta
414
416
 
415
417
  return Theta
@@ -529,7 +531,7 @@ class AutoCESModel(AbstractProbabilisticStatsForecastModel):
529
531
  "seasonal_period",
530
532
  ]
531
533
 
532
- def _get_model_type(self):
534
+ def _get_model_type(self, variant: Optional[str] = None):
533
535
  from statsforecast.models import AutoCES
534
536
 
535
537
  return AutoCES
@@ -591,58 +593,32 @@ class ADIDAModel(AbstractStatsForecastIntermittentDemandModel):
591
593
  This significantly speeds up fitting and usually leads to no change in accuracy.
592
594
  """
593
595
 
594
- def _get_model_type(self):
596
+ def _get_model_type(self, variant: Optional[str] = None):
595
597
  from statsforecast.models import ADIDA
596
598
 
597
599
  return ADIDA
598
600
 
599
601
 
600
- class CrostonSBAModel(AbstractStatsForecastIntermittentDemandModel):
601
- """Intermittent demand forecasting model using Croston's model with the Syntetos-Boylan
602
- bias correction approach [SyntetosBoylan2001]_.
603
-
604
- Based on `statsforecast.models.CrostonSBA <https://nixtla.mintlify.app/statsforecast/docs/models/crostonsba.html>`_.
605
-
602
+ class CrostonModel(AbstractStatsForecastIntermittentDemandModel):
603
+ """Intermittent demand forecasting model using Croston's model from [Croston1972]_ and [SyntetosBoylan2001]_.
606
604
 
607
605
  References
608
606
  ----------
607
+ .. [Croston1972] Croston, John D. "Forecasting and stock control for intermittent demands." Journal of
608
+ the Operational Research Society 23.3 (1972): 289-303.
609
609
  .. [SyntetosBoylan2001] Syntetos, Aris A., and John E. Boylan. "On the bias of intermittent
610
610
  demand estimates." International journal of production economics 71.1-3 (2001): 457-466.
611
611
 
612
612
 
613
613
  Other Parameters
614
614
  ----------------
615
- n_jobs : int or float, default = 0.5
616
- Number of CPU cores used to fit the models in parallel.
617
- When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
618
- When set to a positive integer, that many cores are used.
619
- When set to -1, all CPU cores are used.
620
- max_ts_length : int, default = 2500
621
- If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
622
- This significantly speeds up fitting and usually leads to no change in accuracy.
623
- """
624
-
625
- def _get_model_type(self):
626
- from statsforecast.models import CrostonSBA
615
+ variant : {"SBA", "classic", "optimized"}, default = "SBA"
616
+ Variant of the Croston model that is used. Available options:
627
617
 
628
- return CrostonSBA
618
+ - `"classic"` - variant of the Croston method where the smoothing parameter is fixed to 0.1 (based on `statsforecast.models.CrostonClassic <https://nixtla.mintlify.app/statsforecast/docs/models/crostonclassic.html>`_)
619
+ - `"SBA"` - variant of the Croston method based on Syntetos-Boylan Approximation (based on `statsforecast.models.CrostonSBA <https://nixtla.mintlify.app/statsforecast/docs/models/crostonsba.html>`_)
620
+ - `"optimized"` - variant of the Croston method where the smoothing parameter is optimized (based on `statsforecast.models.CrostonOptimized <https://nixtla.mintlify.app/statsforecast/docs/models/crostonoptimized.html>`_)
629
621
 
630
-
631
- class CrostonOptimizedModel(AbstractStatsForecastIntermittentDemandModel):
632
- """Intermittent demand forecasting model using Croston's model where the smoothing parameter
633
- is optimized [Croston1972]_.
634
-
635
- Based on `statsforecast.models.CrostonOptimized <https://nixtla.mintlify.app/statsforecast/docs/models/crostonoptimized.html>`_.
636
-
637
-
638
- References
639
- ----------
640
- .. [Croston1972] Croston, John D. "Forecasting and stock control for intermittent demands." Journal of
641
- the Operational Research Society 23.3 (1972): 289-303.
642
-
643
-
644
- Other Parameters
645
- ----------------
646
622
  n_jobs : int or float, default = 0.5
647
623
  Number of CPU cores used to fit the models in parallel.
648
624
  When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
@@ -653,41 +629,30 @@ class CrostonOptimizedModel(AbstractStatsForecastIntermittentDemandModel):
653
629
  This significantly speeds up fitting and usually leads to no change in accuracy.
654
630
  """
655
631
 
656
- def _get_model_type(self):
657
- from statsforecast.models import CrostonOptimized
658
-
659
- return CrostonOptimized
660
-
661
-
662
- class CrostonClassicModel(AbstractStatsForecastIntermittentDemandModel):
663
- """Intermittent demand forecasting model using Croston's model where the smoothing parameter
664
- is fixed to 0.1 [Croston1972]_.
665
-
666
- Based on `statsforecast.models.CrostonClassic <https://nixtla.mintlify.app/statsforecast/docs/models/crostonclassic.html>`_.
667
-
668
-
669
- References
670
- ----------
671
- .. [Croston1972] Croston, John D. "Forecasting and stock control for intermittent demands." Journal of
672
- the Operational Research Society 23.3 (1972): 289-303.
632
+ allowed_local_model_args = [
633
+ "variant",
634
+ ]
673
635
 
636
+ def _get_model_type(self, variant: Optional[str] = None):
637
+ from statsforecast.models import CrostonClassic, CrostonOptimized, CrostonSBA
674
638
 
675
- Other Parameters
676
- ----------------
677
- n_jobs : int or float, default = 0.5
678
- Number of CPU cores used to fit the models in parallel.
679
- When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
680
- When set to a positive integer, that many cores are used.
681
- When set to -1, all CPU cores are used.
682
- max_ts_length : int, default = 2500
683
- If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
684
- This significantly speeds up fitting and usually leads to no change in accuracy.
685
- """
639
+ model_variants = {
640
+ "classic": CrostonClassic,
641
+ "sba": CrostonSBA,
642
+ "optimized": CrostonOptimized,
643
+ }
686
644
 
687
- def _get_model_type(self):
688
- from statsforecast.models import CrostonClassic
645
+ if not isinstance(variant, str) or variant.lower() not in model_variants:
646
+ raise ValueError(
647
+ f"Invalid model variant '{variant}'. Available Croston model variants: {list(model_variants)}"
648
+ )
649
+ else:
650
+ return model_variants[variant.lower()]
689
651
 
690
- return CrostonClassic
652
+ def _update_local_model_args(self, local_model_args: dict) -> dict:
653
+ local_model_args = super()._update_local_model_args(local_model_args)
654
+ local_model_args.setdefault("variant", "SBA")
655
+ return local_model_args
691
656
 
692
657
 
693
658
  class IMAPAModel(AbstractStatsForecastIntermittentDemandModel):
@@ -716,7 +681,7 @@ class IMAPAModel(AbstractStatsForecastIntermittentDemandModel):
716
681
  This significantly speeds up fitting and usually leads to no change in accuracy.
717
682
  """
718
683
 
719
- def _get_model_type(self):
684
+ def _get_model_type(self, variant: Optional[str] = None):
720
685
  from statsforecast.models import IMAPA
721
686
 
722
687
  return IMAPA
@@ -738,7 +703,7 @@ class ZeroModel(AbstractStatsForecastIntermittentDemandModel):
738
703
  This significantly speeds up fitting and usually leads to no change in accuracy.
739
704
  """
740
705
 
741
- def _get_model_type(self):
706
+ def _get_model_type(self, variant: Optional[str] = None):
742
707
  # ZeroModel does not depend on a StatsForecast implementation
743
708
  raise NotImplementedError
744
709
 
@@ -16,7 +16,7 @@ from . import (
16
16
  AutoETSModel,
17
17
  AverageModel,
18
18
  ChronosModel,
19
- CrostonSBAModel,
19
+ CrostonModel,
20
20
  DeepARModel,
21
21
  DirectTabularModel,
22
22
  DLinearModel,
@@ -68,7 +68,8 @@ MODEL_TYPES = dict(
68
68
  ETS=ETSModel,
69
69
  ARIMA=ARIMAModel,
70
70
  ADIDA=ADIDAModel,
71
- CrostonSBA=CrostonSBAModel,
71
+ Croston=CrostonModel,
72
+ CrostonSBA=CrostonModel, # Alias for backward compatibility
72
73
  IMAPA=IMAPAModel,
73
74
  Chronos=ChronosModel,
74
75
  )
@@ -85,7 +86,8 @@ DEFAULT_MODEL_PRIORITY = dict(
85
86
  # All local models are grouped together to make sure that joblib parallel pool is reused
86
87
  NPTS=80,
87
88
  ETS=80,
88
- CrostonSBA=80,
89
+ CrostonSBA=80, # Alias for backward compatibility
90
+ Croston=80,
89
91
  Theta=75,
90
92
  DynamicOptimizedTheta=75,
91
93
  AutoETS=70,
@@ -141,7 +143,7 @@ def get_default_hps(key):
141
143
  },
142
144
  "default": {
143
145
  "SeasonalNaive": {},
144
- "CrostonSBA": {},
146
+ "Croston": {},
145
147
  "AutoETS": {},
146
148
  "AutoARIMA": {},
147
149
  "NPTS": {},
@@ -291,7 +291,9 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
291
291
  Preprocessed data in TimeSeriesDataFrame format.
292
292
  """
293
293
  df = self._to_data_frame(data, name=name)
294
- df = df.astype({self.target: "float64"})
294
+ if not pd.api.types.is_numeric_dtype(df[self.target]):
295
+ raise ValueError(f"Target column {name}['{self.target}'] has a non-numeric dtype {df[self.target].dtype}")
296
+ df[self.target] = df[self.target].astype("float64")
295
297
  # MultiIndex.is_monotonic_increasing checks if index is sorted by ["item_id", "timestamp"]
296
298
  if not df.index.is_monotonic_increasing:
297
299
  df = df.sort_index()
@@ -442,12 +444,15 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
442
444
  Columns of ``train_data`` except ``target`` and those listed in ``known_covariates_names`` will be
443
445
  interpreted as ``past_covariates`` - covariates that are known only in the past.
444
446
 
445
- If ``train_data`` has static features (i.e., ``train_data.static_features`` is a pandas DataFrame), the
446
- predictor will interpret columns with ``int`` and ``float`` dtypes as continuous (real-valued) features,
447
- columns with ``object`` and ``str`` dtypes as categorical features, and will ignore the rest of columns.
447
+ If ``train_data`` contains covariates or static features, they will be interpreted as follows:
448
448
 
449
- For example, to ensure that column "store_id" with dtype ``int`` is interpreted as a category,
450
- we need to change its type to ``category``::
449
+ * columns with ``int``, ``bool`` and ``float`` dtypes are interpreted as continuous (real-valued) features
450
+ * columns with ``object``, ``str`` and ``category`` dtypes are as interpreted as categorical features
451
+ * columns with other dtypes are ignored
452
+
453
+ To ensure that the column type is interpreted correctly, please convert it to one of the above dtypes.
454
+ For example, to ensure that column "store_id" with dtype ``int`` is interpreted as a category, change
455
+ its dtype to ``category``::
451
456
 
452
457
  data.static_features["store_id"] = data.static_features["store_id"].astype("category")
453
458
 
@@ -497,7 +502,7 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
497
502
  and ``DirectTabular``. These models are fast to train but may not be very accurate.
498
503
  - ``"medium_quality"``: all models mentioned above + deep learning model ``TemporalFusionTransformer``. Default setting that produces good forecasts
499
504
  with reasonable training time.
500
- - ``"high_quality"``: All ML models available in AutoGluon + additional statistical models (``NPTS``, ``AutoETS``, ``AutoARIMA``, ``CrostonSBA``,
505
+ - ``"high_quality"``: All ML models available in AutoGluon + additional statistical models (``NPTS``, ``AutoETS``, ``AutoARIMA``, ``Croston``,
501
506
  ``DynamicOptimizedTheta``). Much more accurate than ``medium_quality``, but takes longer to train.
502
507
  - ``"best_quality"``: Same models as in ``"high_quality"``, but performs validation with multiple backtests. Usually better than ``high_quality``, but takes even longer to train.
503
508
 
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import reprlib
3
+ import time
3
4
  from dataclasses import dataclass, field
4
5
  from typing import Any, List, Literal, Optional, Tuple
5
6
 
@@ -73,7 +74,7 @@ class ContinuousAndCategoricalFeatureGenerator(PipelineFeatureGenerator):
73
74
  Imputes missing categorical features with the most frequent value in the training set.
74
75
  """
75
76
 
76
- def __init__(self, verbosity: int = 0, minimum_cat_count=2, float_dtype: str = "float64", **kwargs):
77
+ def __init__(self, verbosity: int = 0, minimum_cat_count=2, **kwargs):
77
78
  generators = [
78
79
  CategoryFeatureGenerator(minimum_cat_count=minimum_cat_count, fillna="mode"),
79
80
  IdentityFeatureGenerator(infer_features_in_args={"valid_raw_types": [R_INT, R_FLOAT]}),
@@ -84,34 +85,28 @@ class ContinuousAndCategoricalFeatureGenerator(PipelineFeatureGenerator):
84
85
  pre_generators=[AsTypeFeatureGenerator(convert_bool=False)],
85
86
  pre_enforce_types=False,
86
87
  pre_drop_useless=False,
88
+ post_drop_duplicates=True,
89
+ reset_index=False,
87
90
  verbosity=verbosity,
88
91
  **kwargs,
89
92
  )
90
- self.float_dtype = float_dtype
91
-
92
- def _convert_numerical_columns_to_float(self, df: pd.DataFrame) -> pd.DataFrame:
93
- """Convert the dtype of all numerical (float or int) columns to the given float dtype."""
94
- numeric_columns = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
95
- return df.astype({col: self.float_dtype for col in numeric_columns})
96
93
 
97
94
  def transform(self, X: pd.DataFrame, *args, **kwargs) -> pd.DataFrame:
98
- if isinstance(X, TimeSeriesDataFrame):
99
- X = pd.DataFrame(X)
100
- return self._convert_numerical_columns_to_float(super().transform(X, *args, **kwargs))
95
+ return super().transform(X, *args, **kwargs)
101
96
 
102
97
  def fit_transform(self, X: pd.DataFrame, *args, **kwargs) -> pd.DataFrame:
103
98
  # PipelineFeatureGenerator does not use transform() inside fit_transform(), so we need to override both methods
104
- if isinstance(X, TimeSeriesDataFrame):
105
- X = pd.DataFrame(X)
106
- transformed = self._convert_numerical_columns_to_float(super().fit_transform(X, *args, **kwargs))
99
+ transformed = super().fit_transform(X, *args, **kwargs)
107
100
  # Ignore the '__dummy__' feature generated by PipelineFeatureGenerator if none of the features are informative
108
- return transformed.drop(columns=["__dummy__"], errors="ignore")
101
+ if "__dummy__" in transformed.columns:
102
+ transformed.drop(columns=["__dummy__"], inplace=True)
103
+ return transformed
109
104
 
110
105
 
111
106
  class TimeSeriesFeatureGenerator:
112
107
  """Takes care of preprocessing for static_features and past/known covariates.
113
108
 
114
- All covariates & static features are converted into either float64 or categorical dtype.
109
+ All covariates & static features are converted into either float or categorical dtype.
115
110
 
116
111
  Missing values in the target column are left as-is but missing values in static features & covariates are imputed.
117
112
  Imputation logic is as follows:
@@ -119,20 +114,38 @@ class TimeSeriesFeatureGenerator:
119
114
  2. For real static features, we impute missing values with the median of the training set.
120
115
  3. For real covariates (past, known), we ffill + bfill within each time series. If for some time series all
121
116
  covariate values are missing, we fill them with the median of the training set.
117
+
118
+ Parameters
119
+ ----------
120
+ target : str
121
+ Name of the target column.
122
+ known_covariates_names : List[str]
123
+ Columns that contain covariates that are known into the future.
124
+ float_dtype : str, default = "float32"
125
+ Numpy float dtype to which all numeric columns (float, int, bool) will be converted both in static & dynamic dfs.
126
+ num_samples : int or None, default = 20_000
127
+ Number of rows sampled from the training dataset to speed up computation of the median (used later for imputation).
128
+ If set to `None`, median will be computed using all rows.
122
129
  """
123
130
 
124
- def __init__(self, target: str, known_covariates_names: List[str], float_dtype: str = "float64"):
131
+ def __init__(
132
+ self,
133
+ target: str,
134
+ known_covariates_names: List[str],
135
+ float_dtype: str = "float32",
136
+ num_samples: Optional[int] = 20_000,
137
+ ):
125
138
  self.target = target
126
139
  self.float_dtype = float_dtype
140
+ self.num_samples = num_samples
141
+
127
142
  self._is_fit = False
128
143
  self.known_covariates_names = list(known_covariates_names)
129
144
  self.past_covariates_names = []
130
- self.known_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator(float_dtype=float_dtype)
131
- self.past_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator(float_dtype=float_dtype)
145
+ self.known_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator()
146
+ self.past_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator()
132
147
  # Cat features with cat_count=1 are fine in static_features since they are repeated for all time steps in a TS
133
- self.static_feature_pipeline = ContinuousAndCategoricalFeatureGenerator(
134
- minimum_cat_count=1, float_dtype=float_dtype
135
- )
148
+ self.static_feature_pipeline = ContinuousAndCategoricalFeatureGenerator(minimum_cat_count=1)
136
149
  self.covariate_metadata: CovariateMetadata = None
137
150
  self._train_covariates_real_median: Optional[pd.Series] = None
138
151
  self._train_static_real_median: Optional[pd.Series] = None
@@ -142,8 +155,12 @@ class TimeSeriesFeatureGenerator:
142
155
  return [self.target] + list(self.known_covariates_names) + list(self.past_covariates_names)
143
156
 
144
157
  def fit(self, data: TimeSeriesDataFrame) -> None:
158
+ self.fit_transform(data)
159
+
160
+ def fit_transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
145
161
  assert not self._is_fit, f"{self.__class__.__name__} has already been fit"
146
162
 
163
+ start_time = time.monotonic()
147
164
  self.past_covariates_names = []
148
165
  for column in data.columns:
149
166
  if column != self.target and column not in self.known_covariates_names:
@@ -153,23 +170,33 @@ class TimeSeriesFeatureGenerator:
153
170
  data, required_column_names=self.required_column_names, data_frame_name="train_data"
154
171
  )
155
172
 
173
+ # Convert to a pd.DataFrame and remove index for faster processing
174
+ df = pd.DataFrame(data)
175
+ index = df.index
176
+ df.reset_index(drop=True, inplace=True)
177
+ df = self._convert_numeric_to_float_dtype(df)
178
+
179
+ dfs_to_concat = [df[[self.target]]]
180
+
156
181
  logger.info("\nProvided data contains following columns:")
157
182
  logger.info(f"\ttarget: '{self.target}'")
158
183
 
159
184
  if len(self.known_covariates_names) > 0:
160
- known_covariates_df = self.known_covariates_pipeline.fit_transform(data[self.known_covariates_names])
185
+ known_covariates_df = self.known_covariates_pipeline.fit_transform(df[self.known_covariates_names])
161
186
  logger.info("\tknown_covariates:")
162
187
  known_covariates_cat, known_covariates_real = self._detect_and_log_column_types(known_covariates_df)
163
188
  self.known_covariates_names = self.known_covariates_pipeline.features_in
189
+ dfs_to_concat.append(known_covariates_df)
164
190
  else:
165
191
  known_covariates_cat = []
166
192
  known_covariates_real = []
167
193
 
168
194
  if len(self.past_covariates_names) > 0:
169
- past_covariates_df = self.past_covariates_pipeline.fit_transform(data[self.past_covariates_names])
195
+ past_covariates_df = self.past_covariates_pipeline.fit_transform(df[self.past_covariates_names])
170
196
  logger.info("\tpast_covariates:")
171
197
  past_covariates_cat, past_covariates_real = self._detect_and_log_column_types(past_covariates_df)
172
198
  self.past_covariates_names = self.past_covariates_pipeline.features_in
199
+ dfs_to_concat.append(past_covariates_df)
173
200
  else:
174
201
  past_covariates_cat = []
175
202
  past_covariates_real = []
@@ -179,7 +206,9 @@ class TimeSeriesFeatureGenerator:
179
206
  )
180
207
 
181
208
  if data.static_features is not None:
182
- static_features_df = self.static_feature_pipeline.fit_transform(data.static_features)
209
+ static_features_df = self.static_feature_pipeline.fit_transform(
210
+ self._convert_numeric_to_float_dtype(data.static_features)
211
+ )
183
212
  logger.info("\tstatic_features:")
184
213
  static_features_cat, static_features_real = self._detect_and_log_column_types(static_features_df)
185
214
  ignored_static_features = data.static_features.columns.difference(self.static_feature_pipeline.features_in)
@@ -188,6 +217,7 @@ class TimeSeriesFeatureGenerator:
188
217
  static_features_cat = []
189
218
  static_features_real = []
190
219
  ignored_static_features = []
220
+ static_features_df = None
191
221
 
192
222
  if len(ignored_covariates) > 0 or len(ignored_static_features) > 0:
193
223
  logger.info("\nAutoGluon will ignore following non-numeric/non-informative columns:")
@@ -209,9 +239,47 @@ class TimeSeriesFeatureGenerator:
209
239
  static_features_cat=static_features_cat,
210
240
  static_features_real=static_features_real,
211
241
  )
212
- self._train_covariates_real_median = data[self.covariate_metadata.covariates_real].median()
242
+
243
+ # Median of real-valued covariates will be used for missing value imputation
244
+ if self.num_samples is not None and len(df) > self.num_samples:
245
+ df = df.sample(n=self.num_samples, replace=True)
246
+ self._train_covariates_real_median = df[self.covariate_metadata.covariates_real].median()
247
+
248
+ self.fit_time = time.monotonic() - start_time
213
249
  self._is_fit = True
214
250
 
251
+ df_out = self._concat_dfs(dfs_to_concat)
252
+ df_out.index = index
253
+ ts_df = TimeSeriesDataFrame(df_out, static_features=self._impute_static_features(static_features_df))
254
+ return self._impute_covariates(ts_df, column_names=self.covariate_metadata.covariates_real)
255
+
256
+ @staticmethod
257
+ def _concat_dfs(dfs_to_concat: List[pd.DataFrame]) -> pd.DataFrame:
258
+ if len(dfs_to_concat) == 1:
259
+ return dfs_to_concat[0]
260
+ else:
261
+ return pd.concat(dfs_to_concat, axis=1, copy=False)
262
+
263
+ def _impute_covariates(self, ts_df: TimeSeriesDataFrame, column_names: List[str]) -> TimeSeriesDataFrame:
264
+ """Impute missing values in selected columns with ffill, bfill, and median imputation."""
265
+ if len(column_names) > 0:
266
+ # ffill + bfill covariates that have at least some observed values
267
+ covariates_real = ts_df[column_names].fill_missing_values()
268
+ # If for some items covariates consist completely of NaNs, fill them with median of training data
269
+ if np.isnan(covariates_real.to_numpy()).any():
270
+ covariates_real.fillna(self._train_covariates_real_median, inplace=True)
271
+ ts_df[column_names] = covariates_real
272
+ return ts_df
273
+
274
+ def _impute_static_features(self, static_df: Optional[pd.DataFrame]) -> Optional[pd.DataFrame]:
275
+ """Impute missing values in static features using the median."""
276
+ static_real_names = self.covariate_metadata.static_features_real
277
+ if static_df is not None and static_real_names:
278
+ static_real = static_df[static_real_names]
279
+ if np.isnan(static_real.to_numpy()).any():
280
+ static_df[static_real_names] = static_real.fillna(self._train_static_real_median)
281
+ return static_df
282
+
215
283
  def transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
216
284
  """Transform static features and past/known covariates.
217
285
 
@@ -224,38 +292,32 @@ class TimeSeriesFeatureGenerator:
224
292
  self._check_required_columns_are_present(
225
293
  data, required_column_names=self.required_column_names, data_frame_name=data_frame_name
226
294
  )
227
- dfs = [data[[self.target]]]
295
+ # Convert to a pd.DataFrame and remove index for faster processing
296
+ df = pd.DataFrame(data)
297
+ index = df.index
298
+ df.reset_index(drop=True, inplace=True)
299
+
300
+ dfs_to_concat = [df[[self.target]]]
228
301
 
229
302
  if len(self.known_covariates_names) > 0:
230
- dfs.append(self.known_covariates_pipeline.transform(data[self.known_covariates_names]))
303
+ known_covariates_df = self.known_covariates_pipeline.transform(df[self.known_covariates_names])
304
+ dfs_to_concat.append(known_covariates_df)
231
305
 
232
306
  if len(self.past_covariates_names) > 0:
233
- dfs.append(self.past_covariates_pipeline.transform(data[self.past_covariates_names]))
307
+ past_covariates_df = self.past_covariates_pipeline.transform(df[self.past_covariates_names])
308
+ dfs_to_concat.append(past_covariates_df)
234
309
 
235
310
  if self.static_feature_pipeline.is_fit():
236
311
  if data.static_features is None:
237
312
  raise ValueError(f"Provided {data_frame_name} must contain static_features")
238
- static_features = self.static_feature_pipeline.transform(data.static_features)
239
- static_real_names = self.covariate_metadata.static_features_real
240
- # Fill missing static_features_real with the median of the training set
241
- if static_real_names and static_features[static_real_names].isna().any(axis=None):
242
- static_features[static_real_names] = static_features[static_real_names].fillna(
243
- self._train_static_real_median
244
- )
313
+ static_features_df = self.static_feature_pipeline.transform(data.static_features)
245
314
  else:
246
- static_features = None
247
-
248
- ts_df = TimeSeriesDataFrame(pd.concat(dfs, axis=1), static_features=static_features)
249
-
250
- covariates_names = self.covariate_metadata.covariates
251
- if len(covariates_names) > 0:
252
- # ffill + bfill covariates that have at least some observed values
253
- ts_df[covariates_names] = ts_df[covariates_names].fill_missing_values()
254
- # If for some items covariates consist completely of NaNs, fill them with median of training data
255
- if ts_df[covariates_names].isna().any(axis=None):
256
- ts_df[covariates_names] = ts_df[covariates_names].fillna(self._train_covariates_real_median)
315
+ static_features_df = None
257
316
 
258
- return ts_df
317
+ df_out = self._concat_dfs(dfs_to_concat)
318
+ df_out.index = index
319
+ ts_df = TimeSeriesDataFrame(df_out, static_features=self._impute_static_features(static_features_df))
320
+ return self._impute_covariates(ts_df, column_names=self.covariate_metadata.covariates_real)
259
321
 
260
322
  def transform_future_known_covariates(
261
323
  self, known_covariates: Optional[TimeSeriesDataFrame]
@@ -266,20 +328,15 @@ class TimeSeriesFeatureGenerator:
266
328
  self._check_required_columns_are_present(
267
329
  known_covariates, required_column_names=self.known_covariates_names, data_frame_name="known_covariates"
268
330
  )
269
- known_covariates = TimeSeriesDataFrame(self.known_covariates_pipeline.transform(known_covariates))
270
- # ffill + bfill covariates that have at least some observed values
271
- known_covariates = known_covariates.fill_missing_values()
272
- # If for some items covariates consist completely of NaNs, fill them with median of training data
273
- if known_covariates.isna().any(axis=None):
274
- known_covariates = known_covariates.fillna(self._train_covariates_real_median)
275
- return known_covariates
331
+ known_covariates = TimeSeriesDataFrame(
332
+ self.known_covariates_pipeline.transform(pd.DataFrame(known_covariates))
333
+ )
334
+ return self._impute_covariates(
335
+ known_covariates, column_names=self.covariate_metadata.known_covariates_real
336
+ )
276
337
  else:
277
338
  return None
278
339
 
279
- def fit_transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
280
- self.fit(data)
281
- return self.transform(data, data_frame_name=data_frame_name)
282
-
283
340
  @staticmethod
284
341
  def _detect_and_log_column_types(transformed_df: pd.DataFrame) -> Tuple[List[str], List[str]]:
285
342
  """Log & return names of categorical and real-valued columns in the DataFrame."""
@@ -305,6 +362,15 @@ class TimeSeriesFeatureGenerator:
305
362
  f"{len(missing_columns)} columns are missing from {data_frame_name}: {reprlib.repr(missing_columns.to_list())}"
306
363
  )
307
364
 
365
+ def _convert_numeric_to_float_dtype(self, df: pd.DataFrame) -> pd.DataFrame:
366
+ """Convert the dtype of all numeric (float, int or bool) columns to self.float_dtype."""
367
+ numeric_columns = [
368
+ col for col, dtype in df.dtypes.items() if pd.api.types.is_numeric_dtype(dtype) and col != self.target
369
+ ]
370
+ if len(numeric_columns) > 0:
371
+ df = df.astype({col: self.float_dtype for col in numeric_columns}, copy=False)
372
+ return df
373
+
308
374
 
309
375
  class AbstractFeatureImportanceTransform:
310
376
  """Abstract class for transforms that replace a given feature with dummy or shuffled values,
@@ -1,3 +1,3 @@
1
1
  """This is the autogluon version file."""
2
- __version__ = '1.1.2b20241021'
2
+ __version__ = '1.1.2b20241023'
3
3
  __lite__ = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.timeseries
3
- Version: 1.1.2b20241021
3
+ Version: 1.1.2b20241023
4
4
  Summary: Fast and Accurate ML in 3 Lines of Code
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -52,9 +52,9 @@ Requires-Dist: fugue>=0.9.0
52
52
  Requires-Dist: tqdm<5,>=4.38
53
53
  Requires-Dist: orjson~=3.9
54
54
  Requires-Dist: tensorboard<3,>=2.9
55
- Requires-Dist: autogluon.core[raytune]==1.1.2b20241021
56
- Requires-Dist: autogluon.common==1.1.2b20241021
57
- Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.1.2b20241021
55
+ Requires-Dist: autogluon.core[raytune]==1.1.2b20241023
56
+ Requires-Dist: autogluon.common==1.1.2b20241023
57
+ Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.1.2b20241023
58
58
  Provides-Extra: all
59
59
  Requires-Dist: optimum[onnxruntime]<1.19,>=1.17; extra == "all"
60
60
  Provides-Extra: chronos-onnx
@@ -1,10 +1,10 @@
1
- autogluon.timeseries-1.1.2b20241021-py3.8-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
1
+ autogluon.timeseries-1.1.2b20241023-py3.8-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
2
2
  autogluon/timeseries/__init__.py,sha256=_CrLLc1fkjen7UzWoO0Os8WZoHOgvZbHKy46I8v_4k4,304
3
3
  autogluon/timeseries/evaluator.py,sha256=l642tYfTHsl8WVIq_vV6qhgAFVFr9UuZD7gLra3A_Kc,250
4
- autogluon/timeseries/learner.py,sha256=NXhftyqMD8Bl1QHIBN82UKP0UlCV_ACughZqkmMf4oY,14043
5
- autogluon/timeseries/predictor.py,sha256=BUjFX5I_tgim9oo43cRgLBAxKY1JfduKtHQxY8BPC-Y,84561
4
+ autogluon/timeseries/learner.py,sha256=3dUxI-U6TGfNtRQUzWTvBIo1GKeXYOhxIX_q7Fed9eA,14013
5
+ autogluon/timeseries/predictor.py,sha256=1U9ic89B_JEHyzgKSu2-TN4XY9GmA8F1C77_eUBpQlI,84911
6
6
  autogluon/timeseries/splitter.py,sha256=eghGwAAN2_cxGk5aJBILgjGWtLzjxJcytMy49gg_q18,3061
7
- autogluon/timeseries/version.py,sha256=ZopU-NYOc_JfGqp3WtLoNfYh3BkvThJwgZcwgCKXWk0,90
7
+ autogluon/timeseries/version.py,sha256=zNXzLcd2xHl1327Vj6HlIYZP0k8bxYq-NVGTSmrdkOc,90
8
8
  autogluon/timeseries/configs/__init__.py,sha256=BTtHIPCYeGjqgOcvqb8qPD4VNX-ICKOg6wnkew1cPOE,98
9
9
  autogluon/timeseries/configs/presets_configs.py,sha256=94-yL9teDHKs2irWjP3kpewI7FE1ChYCgEgz9XHJ6gc,1965
10
10
  autogluon/timeseries/dataset/__init__.py,sha256=UvnhAN5tjgxXTHoZMQDy64YMDj4Xxa68yY7NP4vAw0o,81
@@ -14,8 +14,8 @@ autogluon/timeseries/metrics/abstract.py,sha256=9xCFQ3NaR1C0hn01M7oBd72a_CiNV-w6
14
14
  autogluon/timeseries/metrics/point.py,sha256=xy8sKrBbuxZ7yTW21TDPayKnEj2FBj1AEseJxUdneqE,13399
15
15
  autogluon/timeseries/metrics/quantile.py,sha256=owMbOAJYwVyzdRkrJpuCGUXk937GU843QndCZyp5n9Y,3967
16
16
  autogluon/timeseries/metrics/utils.py,sha256=eJ63TCR-UwbeJ1c2Qm7B2q-8B3sFthPgiooEccrf2Kc,912
17
- autogluon/timeseries/models/__init__.py,sha256=WKV7DIpJkrwEj0cUfscESp67Ydap9hAqaNTYvgi2EIA,1303
18
- autogluon/timeseries/models/presets.py,sha256=7ORBU-7fCwwYlpXaWCXEfNx0pss3mvB6KGSsQ1kyw2k,11673
17
+ autogluon/timeseries/models/__init__.py,sha256=MYD9JJ-wUDE5B6jW6E6LU2eXQ6vflfQBvqQJkdzJa3A,1189
18
+ autogluon/timeseries/models/presets.py,sha256=ujNt_hft_5eNkh-Wj_Na9GBdBmI-JdnBnOEHq8X0qXc,11778
19
19
  autogluon/timeseries/models/abstract/__init__.py,sha256=wvDsQAZIV0N3AwBeMaGItoQ82trEfnT-nol2AAOIxBg,102
20
20
  autogluon/timeseries/models/abstract/abstract_timeseries_model.py,sha256=siy-OW4zflN61-pnuhvYawDvchm3zXb1ta8HUDLxhWY,24793
21
21
  autogluon/timeseries/models/abstract/model_trial.py,sha256=ENPg_7nsdxIvaNM0o0UShZ3x8jFlRmwRc5m0fGPC0TM,3720
@@ -34,11 +34,11 @@ autogluon/timeseries/models/gluonts/__init__.py,sha256=asC1PTj4j9xMbilvk1IT1juln
34
34
  autogluon/timeseries/models/gluonts/abstract_gluonts.py,sha256=QRGCLN9ZMw5zCgO5hNAOjHqp17zGn1-Uy0d7VEhYtlQ,34021
35
35
  autogluon/timeseries/models/gluonts/torch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  autogluon/timeseries/models/gluonts/torch/models.py,sha256=85MWDXPwDncGwLijkm-K1tS-05LvGq4Xl-WbbIcYCO8,24906
37
- autogluon/timeseries/models/local/__init__.py,sha256=JyckWWgMG1BTIWJqFTW6e1O-eb0LPPOwtXwmb1ErohQ,756
37
+ autogluon/timeseries/models/local/__init__.py,sha256=e2UImoJhmj70E148IIObv90C_bHxgyLNk6YsS4p7pfs,701
38
38
  autogluon/timeseries/models/local/abstract_local_model.py,sha256=af3GFfUIGnVNzzZJ-WI61lw83lDFfgB0AfGxmkb-t_4,12226
39
39
  autogluon/timeseries/models/local/naive.py,sha256=iwRcFMFmJKPWPbD9TWaIUS51oav69F_VAp6-jb_5SUE,7249
40
40
  autogluon/timeseries/models/local/npts.py,sha256=Bp74doKnfpGE8ywP4FWOCI_RwRMsmgocYDfGtq764DA,4143
41
- autogluon/timeseries/models/local/statsforecast.py,sha256=79swW7g7bn1CmuGY79i7r0uj0QZr6WLIfH_x3p1FTDA,32742
41
+ autogluon/timeseries/models/local/statsforecast.py,sha256=C05waZQ4c2Ewm7FfARkVFWLRk_k0XvgYsQi74tHk_1U,32226
42
42
  autogluon/timeseries/models/multi_window/__init__.py,sha256=Bq7AT2Jxdd4WNqmjTdzeqgNiwn1NCyWp4tBIWaM-zfI,60
43
43
  autogluon/timeseries/models/multi_window/multi_window_model.py,sha256=EAXzoQo96zTPNz9BTYDmV1878OVKb9F6h39y386N3zU,11740
44
44
  autogluon/timeseries/trainer/__init__.py,sha256=lxiOT-Gc6BEnr_yWQqra85kEngeM_wtH2SCaRbmC_qE,170
@@ -47,7 +47,7 @@ autogluon/timeseries/trainer/auto_trainer.py,sha256=psJFZBwWWPlLjNwAgvO4OUJXsRW1
47
47
  autogluon/timeseries/transforms/__init__.py,sha256=lzDavxdgGIz5m_DmSpNa9ewNU9Evndam3YXfOEk6kwY,174
48
48
  autogluon/timeseries/transforms/scaler.py,sha256=30JrAnZwj58ntes-YP1H_XmeVLGtFepjWnRzPQQ-t4k,5352
49
49
  autogluon/timeseries/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- autogluon/timeseries/utils/features.py,sha256=hEir-2lU8fvHjt5r_LG9tLZEk5wNdRdeLRE7qF5z3_Y,19585
50
+ autogluon/timeseries/utils/features.py,sha256=VvBQzaymSSzxI9khtcXbpir-qo1NWHe51O7F6ynyh_s,21943
51
51
  autogluon/timeseries/utils/forecast.py,sha256=p0WKM9Q0nLAwwmCgYZI1zi9mCOWXWJfllEt2lPRQl4M,1882
52
52
  autogluon/timeseries/utils/warning_filters.py,sha256=HMXNDo9jOUdf9wvyp-Db55xTq_Ctj6uso7qPhngoJPQ,1964
53
53
  autogluon/timeseries/utils/datetime/__init__.py,sha256=bTMR8jLh1LW55vHjbOr1zvWRMF_PqbvxpS-cUcNIDWI,173
@@ -55,11 +55,11 @@ autogluon/timeseries/utils/datetime/base.py,sha256=3NdsH3NDq4cVAOSoy3XpaNixyNlbj
55
55
  autogluon/timeseries/utils/datetime/lags.py,sha256=GoLtvcZ8oKb3QkoBJ9E59LSPLOP7Qjxrr2UmMSZgjyw,5909
56
56
  autogluon/timeseries/utils/datetime/seasonality.py,sha256=h_4w00iEytAz_N_EpCENQ8RCXy7KQITczrYjBgVqWkQ,764
57
57
  autogluon/timeseries/utils/datetime/time_features.py,sha256=PAXbYbQ0z_5GFbkxSNi41zLY_2-U3x0Ynm1m_WhdtGc,2572
58
- autogluon.timeseries-1.1.2b20241021.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
59
- autogluon.timeseries-1.1.2b20241021.dist-info/METADATA,sha256=T4s0hQwSi_raT80B7FXIhWt_lzlcaM5Ws3KRKL5b8Aw,12355
60
- autogluon.timeseries-1.1.2b20241021.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
61
- autogluon.timeseries-1.1.2b20241021.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
62
- autogluon.timeseries-1.1.2b20241021.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
63
- autogluon.timeseries-1.1.2b20241021.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
64
- autogluon.timeseries-1.1.2b20241021.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
65
- autogluon.timeseries-1.1.2b20241021.dist-info/RECORD,,
58
+ autogluon.timeseries-1.1.2b20241023.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
59
+ autogluon.timeseries-1.1.2b20241023.dist-info/METADATA,sha256=J_J15yWFk4ShviPVpn-k4VsvGGvJJ1icrE6goKZ330M,12355
60
+ autogluon.timeseries-1.1.2b20241023.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
61
+ autogluon.timeseries-1.1.2b20241023.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
62
+ autogluon.timeseries-1.1.2b20241023.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
63
+ autogluon.timeseries-1.1.2b20241023.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
64
+ autogluon.timeseries-1.1.2b20241023.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
65
+ autogluon.timeseries-1.1.2b20241023.dist-info/RECORD,,