autogluon.timeseries 1.2.1b20250224__py3-none-any.whl → 1.4.1b20251215__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (108) hide show
  1. autogluon/timeseries/configs/__init__.py +3 -2
  2. autogluon/timeseries/configs/hyperparameter_presets.py +62 -0
  3. autogluon/timeseries/configs/predictor_presets.py +106 -0
  4. autogluon/timeseries/dataset/ts_dataframe.py +256 -141
  5. autogluon/timeseries/learner.py +86 -52
  6. autogluon/timeseries/metrics/__init__.py +42 -8
  7. autogluon/timeseries/metrics/abstract.py +89 -19
  8. autogluon/timeseries/metrics/point.py +142 -53
  9. autogluon/timeseries/metrics/quantile.py +46 -21
  10. autogluon/timeseries/metrics/utils.py +4 -4
  11. autogluon/timeseries/models/__init__.py +8 -2
  12. autogluon/timeseries/models/abstract/__init__.py +2 -2
  13. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +361 -592
  14. autogluon/timeseries/models/abstract/model_trial.py +2 -1
  15. autogluon/timeseries/models/abstract/tunable.py +189 -0
  16. autogluon/timeseries/models/autogluon_tabular/__init__.py +2 -0
  17. autogluon/timeseries/models/autogluon_tabular/mlforecast.py +282 -194
  18. autogluon/timeseries/models/autogluon_tabular/per_step.py +513 -0
  19. autogluon/timeseries/models/autogluon_tabular/transforms.py +25 -18
  20. autogluon/timeseries/models/chronos/__init__.py +2 -1
  21. autogluon/timeseries/models/chronos/chronos2.py +361 -0
  22. autogluon/timeseries/models/chronos/model.py +219 -138
  23. autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +81 -50
  24. autogluon/timeseries/models/ensemble/__init__.py +37 -2
  25. autogluon/timeseries/models/ensemble/abstract.py +107 -0
  26. autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
  27. autogluon/timeseries/models/ensemble/array_based/abstract.py +240 -0
  28. autogluon/timeseries/models/ensemble/array_based/models.py +185 -0
  29. autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
  30. autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
  31. autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
  32. autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
  33. autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
  34. autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
  35. autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
  36. autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
  37. autogluon/timeseries/models/ensemble/weighted/abstract.py +45 -0
  38. autogluon/timeseries/models/ensemble/weighted/basic.py +91 -0
  39. autogluon/timeseries/models/ensemble/weighted/greedy.py +62 -0
  40. autogluon/timeseries/models/gluonts/__init__.py +1 -1
  41. autogluon/timeseries/models/gluonts/{abstract_gluonts.py → abstract.py} +148 -208
  42. autogluon/timeseries/models/gluonts/dataset.py +109 -0
  43. autogluon/timeseries/models/gluonts/{torch/models.py → models.py} +38 -22
  44. autogluon/timeseries/models/local/__init__.py +0 -7
  45. autogluon/timeseries/models/local/abstract_local_model.py +71 -74
  46. autogluon/timeseries/models/local/naive.py +13 -9
  47. autogluon/timeseries/models/local/npts.py +9 -2
  48. autogluon/timeseries/models/local/statsforecast.py +52 -36
  49. autogluon/timeseries/models/multi_window/multi_window_model.py +65 -45
  50. autogluon/timeseries/models/registry.py +64 -0
  51. autogluon/timeseries/models/toto/__init__.py +3 -0
  52. autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
  53. autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
  54. autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
  55. autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
  56. autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
  57. autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
  58. autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
  59. autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
  60. autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
  61. autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
  62. autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
  63. autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
  64. autogluon/timeseries/models/toto/dataloader.py +108 -0
  65. autogluon/timeseries/models/toto/hf_pretrained_model.py +200 -0
  66. autogluon/timeseries/models/toto/model.py +249 -0
  67. autogluon/timeseries/predictor.py +685 -297
  68. autogluon/timeseries/regressor.py +94 -44
  69. autogluon/timeseries/splitter.py +8 -32
  70. autogluon/timeseries/trainer/__init__.py +3 -0
  71. autogluon/timeseries/trainer/ensemble_composer.py +444 -0
  72. autogluon/timeseries/trainer/model_set_builder.py +256 -0
  73. autogluon/timeseries/trainer/prediction_cache.py +149 -0
  74. autogluon/timeseries/{trainer.py → trainer/trainer.py} +387 -390
  75. autogluon/timeseries/trainer/utils.py +17 -0
  76. autogluon/timeseries/transforms/__init__.py +2 -13
  77. autogluon/timeseries/transforms/covariate_scaler.py +34 -40
  78. autogluon/timeseries/transforms/target_scaler.py +37 -20
  79. autogluon/timeseries/utils/constants.py +10 -0
  80. autogluon/timeseries/utils/datetime/lags.py +3 -5
  81. autogluon/timeseries/utils/datetime/seasonality.py +1 -3
  82. autogluon/timeseries/utils/datetime/time_features.py +2 -2
  83. autogluon/timeseries/utils/features.py +70 -47
  84. autogluon/timeseries/utils/forecast.py +19 -14
  85. autogluon/timeseries/utils/timer.py +173 -0
  86. autogluon/timeseries/utils/warning_filters.py +4 -2
  87. autogluon/timeseries/version.py +1 -1
  88. autogluon.timeseries-1.4.1b20251215-py3.11-nspkg.pth +1 -0
  89. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/METADATA +49 -36
  90. autogluon_timeseries-1.4.1b20251215.dist-info/RECORD +103 -0
  91. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/WHEEL +1 -1
  92. autogluon/timeseries/configs/presets_configs.py +0 -79
  93. autogluon/timeseries/evaluator.py +0 -6
  94. autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -11
  95. autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
  96. autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -585
  97. autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -518
  98. autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -78
  99. autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -170
  100. autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
  101. autogluon/timeseries/models/presets.py +0 -360
  102. autogluon.timeseries-1.2.1b20250224-py3.9-nspkg.pth +0 -1
  103. autogluon.timeseries-1.2.1b20250224.dist-info/RECORD +0 -68
  104. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/LICENSE +0 -0
  105. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/NOTICE +0 -0
  106. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/namespace_packages.txt +0 -0
  107. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/top_level.txt +0 -0
  108. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/zip-safe +0 -0
@@ -1,16 +1,20 @@
1
+ import copy
1
2
  import logging
2
3
  import math
3
- import os
4
4
  import time
5
- from typing import Any, Dict, List, Optional, Tuple
5
+ import warnings
6
+ from typing import Any, Callable, Collection, Type
6
7
 
7
8
  import numpy as np
8
9
  import pandas as pd
9
10
  from sklearn.base import BaseEstimator
10
11
 
11
12
  import autogluon.core as ag
12
- from autogluon.tabular import TabularPredictor
13
- from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TIMESTAMP, TimeSeriesDataFrame
13
+ from autogluon.core.models import AbstractModel as AbstractTabularModel
14
+ from autogluon.features import AutoMLPipelineFeatureGenerator
15
+ from autogluon.tabular.registry import ag_model_registry
16
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
17
+ from autogluon.timeseries.metrics.abstract import TimeSeriesScorer
14
18
  from autogluon.timeseries.metrics.utils import in_sample_squared_seasonal_error
15
19
  from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
16
20
  from autogluon.timeseries.models.local import SeasonalNaiveModel
@@ -19,37 +23,38 @@ from autogluon.timeseries.utils.datetime import (
19
23
  get_seasonality,
20
24
  get_time_features_for_frequency,
21
25
  )
22
- from autogluon.timeseries.utils.warning_filters import warning_filter
26
+ from autogluon.timeseries.utils.warning_filters import set_loggers_level, warning_filter
23
27
 
24
28
  from .utils import MLF_ITEMID, MLF_TARGET, MLF_TIMESTAMP
25
29
 
26
30
  logger = logging.getLogger(__name__)
27
31
 
28
32
 
29
- class TabularEstimator(BaseEstimator):
30
- """Scikit-learn compatible interface for TabularPredictor."""
33
+ class TabularModel(BaseEstimator):
34
+ """A scikit-learn compatible wrapper for arbitrary autogluon.tabular models"""
31
35
 
32
- def __init__(self, predictor_init_kwargs: Optional[dict] = None, predictor_fit_kwargs: Optional[dict] = None):
33
- self.predictor_init_kwargs = predictor_init_kwargs if predictor_init_kwargs is not None else {}
34
- self.predictor_fit_kwargs = predictor_fit_kwargs if predictor_fit_kwargs is not None else {}
36
+ def __init__(self, model_class: Type[AbstractTabularModel], model_kwargs: dict | None = None):
37
+ self.model_class = model_class
38
+ self.model_kwargs = {} if model_kwargs is None else model_kwargs
39
+ self.feature_pipeline = AutoMLPipelineFeatureGenerator(verbosity=0)
35
40
 
36
- def get_params(self, deep: bool = True) -> dict:
37
- return {
38
- "predictor_init_kwargs": self.predictor_init_kwargs,
39
- "predictor_fit_kwargs": self.predictor_fit_kwargs,
40
- }
41
-
42
- def fit(self, X: pd.DataFrame, y: pd.Series) -> "TabularEstimator":
43
- assert isinstance(X, pd.DataFrame) and isinstance(y, pd.Series)
44
- df = pd.concat([X, y.rename(MLF_TARGET).to_frame()], axis=1)
45
- self.predictor = TabularPredictor(**self.predictor_init_kwargs)
46
- with warning_filter():
47
- self.predictor.fit(df, **self.predictor_fit_kwargs)
41
+ def fit(self, X: pd.DataFrame, y: pd.Series, X_val: pd.DataFrame, y_val: pd.Series, **kwargs):
42
+ self.model = self.model_class(**self.model_kwargs)
43
+ X = self.feature_pipeline.fit_transform(X=X)
44
+ X_val = self.feature_pipeline.transform(X=X_val)
45
+ self.model.fit(X=X, y=y, X_val=X_val, y_val=y_val, **kwargs)
48
46
  return self
49
47
 
50
- def predict(self, X: pd.DataFrame) -> np.ndarray:
51
- assert isinstance(X, pd.DataFrame)
52
- return self.predictor.predict(X).values
48
+ def predict(self, X: pd.DataFrame, **kwargs):
49
+ X = self.feature_pipeline.transform(X=X)
50
+ return self.model.predict(X=X, **kwargs)
51
+
52
+ def get_params(self, deep=True):
53
+ params = {"model_class": self.model_class, "model_kwargs": self.model_kwargs}
54
+ if deep:
55
+ return copy.deepcopy(params)
56
+ else:
57
+ return params
53
58
 
54
59
 
55
60
  class AbstractMLForecastModel(AbstractTimeSeriesModel):
@@ -58,13 +63,13 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
58
63
 
59
64
  def __init__(
60
65
  self,
61
- freq: Optional[str] = None,
66
+ freq: str | None = None,
62
67
  prediction_length: int = 1,
63
- path: Optional[str] = None,
64
- name: Optional[str] = None,
65
- eval_metric: str = None,
66
- hyperparameters: Dict[str, Any] = None,
67
- **kwargs, # noqa
68
+ path: str | None = None,
69
+ name: str | None = None,
70
+ eval_metric: str | TimeSeriesScorer | None = None,
71
+ hyperparameters: dict[str, Any] | None = None,
72
+ **kwargs,
68
73
  ):
69
74
  super().__init__(
70
75
  path=path,
@@ -79,46 +84,45 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
79
84
  from mlforecast.target_transforms import BaseTargetTransform
80
85
 
81
86
  self._sum_of_differences: int = 0 # number of time steps removed from each series by differencing
82
- self._max_ts_length: Optional[int] = None
83
- self._target_lags: Optional[List[int]] = None
84
- self._date_features: Optional[List[str]] = None
85
- self._mlf: Optional[MLForecast] = None
86
- self._scaler: Optional[BaseTargetTransform] = None
87
- self._residuals_std_per_item: Optional[pd.Series] = None
88
- self._train_target_median: Optional[float] = None
89
- self._non_boolean_real_covariates: List[str] = []
87
+ self._max_ts_length: int | None = None
88
+ self._target_lags: np.ndarray
89
+ self._date_features: list[Callable]
90
+ self._mlf: MLForecast
91
+ self._scaler: BaseTargetTransform | None = None
92
+ self._residuals_std_per_item: pd.Series
93
+ self._train_target_median: float | None = None
94
+ self._non_boolean_real_covariates: list[str] = []
95
+
96
+ def _initialize_transforms_and_regressor(self):
97
+ super()._initialize_transforms_and_regressor()
98
+ # Do not create a scaler in the model, scaler will be passed to MLForecast
99
+ self.target_scaler = None
90
100
 
91
101
  @property
92
- def tabular_predictor_path(self) -> str:
93
- return os.path.join(self.path, "tabular_predictor")
94
-
95
- def save(self, path: str = None, verbose: bool = True) -> str:
96
- assert "mean" in self._mlf.models_, "TabularPredictor must be trained before saving"
97
- tabular_predictor = self._mlf.models_["mean"].predictor
98
- self._mlf.models_["mean"].predictor = None
99
- save_path = super().save(path=path, verbose=verbose)
100
- self._mlf.models_["mean"].predictor = tabular_predictor
101
- return save_path
102
-
103
- @classmethod
104
- def load(
105
- cls, path: str, reset_paths: bool = True, load_oof: bool = False, verbose: bool = True
106
- ) -> "AbstractTimeSeriesModel":
107
- model = super().load(path=path, reset_paths=reset_paths, load_oof=load_oof, verbose=verbose)
108
- assert "mean" in model._mlf.models_, "Loaded model doesn't have a trained TabularPredictor"
109
- model._mlf.models_["mean"].predictor = TabularPredictor.load(model.tabular_predictor_path)
110
- return model
102
+ def allowed_hyperparameters(self) -> list[str]:
103
+ return super().allowed_hyperparameters + [
104
+ "lags",
105
+ "date_features",
106
+ "differences",
107
+ "model_name",
108
+ "model_hyperparameters",
109
+ "max_num_items",
110
+ "max_num_samples",
111
+ "lag_transforms",
112
+ ]
111
113
 
112
114
  def preprocess(
113
115
  self,
114
116
  data: TimeSeriesDataFrame,
115
- known_covariates: Optional[TimeSeriesDataFrame] = None,
117
+ known_covariates: TimeSeriesDataFrame | None = None,
116
118
  is_train: bool = False,
117
119
  **kwargs,
118
- ) -> Tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
120
+ ) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame | None]:
119
121
  if is_train:
120
122
  # All-NaN series are removed; partially-NaN series in train_data are handled inside _generate_train_val_dfs
121
- all_nan_items = data.item_ids[data[self.target].isna().groupby(ITEMID, sort=False).all()]
123
+ all_nan_items = data.item_ids[
124
+ data[self.target].isna().groupby(TimeSeriesDataFrame.ITEMID, sort=False).all()
125
+ ]
122
126
  if len(all_nan_items):
123
127
  data = data.query("item_id not in @all_nan_items")
124
128
  else:
@@ -128,34 +132,42 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
128
132
  data[self.target] = data[self.target].fillna(value=self._train_target_median)
129
133
  return data, known_covariates
130
134
 
131
- def _get_extra_tabular_init_kwargs(self) -> dict:
132
- raise NotImplementedError
135
+ def _get_default_hyperparameters(self) -> dict[str, Any]:
136
+ return {
137
+ "max_num_items": 20_000,
138
+ "max_num_samples": 1_000_000,
139
+ "model_name": "GBM",
140
+ "model_hyperparameters": {},
141
+ }
133
142
 
134
- def _get_model_params(self) -> dict:
135
- model_params = super()._get_model_params().copy()
136
- model_params.setdefault("max_num_items", 20_000)
137
- model_params.setdefault("max_num_samples", 1_000_000)
138
- model_params.setdefault("tabular_hyperparameters", {"GBM": {}})
139
- model_params.setdefault("tabular_fit_kwargs", {})
140
- return model_params
143
+ def _create_tabular_model(self, model_name: str, model_hyperparameters: dict[str, Any]) -> TabularModel:
144
+ raise NotImplementedError
141
145
 
142
- def _get_mlforecast_init_args(self, train_data: TimeSeriesDataFrame, model_params: dict) -> dict:
146
+ def _get_mlforecast_init_args(
147
+ self, train_data: TimeSeriesDataFrame, model_params: dict[str, Any]
148
+ ) -> dict[str, Any]:
143
149
  from mlforecast.target_transforms import Differences
144
150
 
145
151
  from .transforms import MLForecastScaler
146
152
 
147
153
  lags = model_params.get("lags")
148
154
  if lags is None:
155
+ assert self.freq is not None
149
156
  lags = get_lags_for_frequency(self.freq)
150
157
  self._target_lags = np.array(sorted(set(lags)), dtype=np.int64)
151
158
 
152
159
  date_features = model_params.get("date_features")
153
160
  if date_features is None:
154
161
  date_features = get_time_features_for_frequency(self.freq)
155
- self._date_features = date_features
162
+ known_covariates = self.covariate_metadata.known_covariates
163
+ conflicting = [f.__name__ for f in date_features if f.__name__ in known_covariates]
164
+ if conflicting:
165
+ logger.info(f"\tRemoved automatic date_features {conflicting} since they clash with known_covariates")
166
+ self._date_features = [f for f in date_features if f.__name__ not in known_covariates]
156
167
 
157
168
  target_transforms = []
158
169
  differences = model_params.get("differences")
170
+ assert isinstance(differences, Collection)
159
171
 
160
172
  ts_lengths = train_data.num_timesteps_per_item()
161
173
  required_ts_length = sum(differences) + 1
@@ -172,6 +184,11 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
172
184
  target_transforms.append(Differences(differences))
173
185
  self._sum_of_differences = sum(differences)
174
186
 
187
+ if "target_scaler" in model_params and "scaler" in model_params:
188
+ warnings.warn(
189
+ f"Both 'target_scaler' and 'scaler' hyperparameters are provided to {self.__class__.__name__}. "
190
+ "Please only set the 'target_scaler' parameter."
191
+ )
175
192
  # Support "scaler" for backward compatibility
176
193
  scaler_type = model_params.get("target_scaler", model_params.get("scaler"))
177
194
  if scaler_type is not None:
@@ -182,6 +199,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
182
199
  "lags": self._target_lags.tolist(),
183
200
  "date_features": self._date_features,
184
201
  "target_transforms": target_transforms,
202
+ "lag_transforms": model_params.get("lag_transforms"),
185
203
  }
186
204
 
187
205
  def _mask_df(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -193,13 +211,13 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
193
211
  return df
194
212
 
195
213
  @staticmethod
196
- def _shorten_all_series(mlforecast_df: pd.DataFrame, max_length: int):
214
+ def _shorten_all_series(mlforecast_df: pd.DataFrame, max_length: int) -> pd.DataFrame:
197
215
  logger.debug(f"Shortening all series to at most {max_length}")
198
216
  return mlforecast_df.groupby(MLF_ITEMID, as_index=False, sort=False).tail(max_length)
199
217
 
200
218
  def _generate_train_val_dfs(
201
- self, data: TimeSeriesDataFrame, max_num_items: Optional[int] = None, max_num_samples: Optional[int] = None
202
- ) -> Tuple[pd.DataFrame, pd.DataFrame]:
219
+ self, data: TimeSeriesDataFrame, max_num_items: int | None = None, max_num_samples: int | None = None
220
+ ) -> tuple[pd.DataFrame, pd.DataFrame]:
203
221
  # Exclude items that are too short for chosen differences - otherwise exception will be raised
204
222
  if self._sum_of_differences > 0:
205
223
  ts_lengths = data.num_timesteps_per_item()
@@ -228,7 +246,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
228
246
  # Unless we set static_features=[], MLForecast interprets all known covariates as static features
229
247
  df = self._mlf.preprocess(mlforecast_df, dropna=False, static_features=[])
230
248
  # df.query results in 2x memory saving compared to df.dropna(subset="y")
231
- df = df.query("y.notnull()")
249
+ df = df.query("y.notnull()") # type: ignore
232
250
 
233
251
  df = self._mask_df(df)
234
252
 
@@ -247,12 +265,12 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
247
265
  val_df = grouped_df.tail(val_rows_per_item)
248
266
  logger.debug(f"train_df shape: {train_df.shape}, val_df shape: {val_df.shape}")
249
267
 
250
- return train_df.drop(columns=[MLF_TIMESTAMP]), val_df.drop(columns=[MLF_TIMESTAMP])
268
+ return train_df.drop(columns=[MLF_TIMESTAMP]), val_df.drop(columns=[MLF_TIMESTAMP]) # type: ignore
251
269
 
252
270
  def _to_mlforecast_df(
253
271
  self,
254
272
  data: TimeSeriesDataFrame,
255
- static_features: pd.DataFrame,
273
+ static_features: pd.DataFrame | None,
256
274
  include_target: bool = True,
257
275
  ) -> pd.DataFrame:
258
276
  """Convert TimeSeriesDataFrame to a format expected by MLForecast methods `predict` and `preprocess`.
@@ -260,19 +278,29 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
260
278
  Each row contains unique_id, ds, y, and (optionally) known covariates & static features.
261
279
  """
262
280
  # TODO: Add support for past_covariates
263
- selected_columns = self.metadata.known_covariates.copy()
264
- column_name_mapping = {ITEMID: MLF_ITEMID, TIMESTAMP: MLF_TIMESTAMP}
281
+ selected_columns = self.covariate_metadata.known_covariates.copy()
282
+ column_name_mapping = {TimeSeriesDataFrame.ITEMID: MLF_ITEMID, TimeSeriesDataFrame.TIMESTAMP: MLF_TIMESTAMP}
265
283
  if include_target:
266
284
  selected_columns += [self.target]
267
285
  column_name_mapping[self.target] = MLF_TARGET
268
286
 
269
287
  df = pd.DataFrame(data)[selected_columns].reset_index()
270
288
  if static_features is not None:
271
- df = pd.merge(df, static_features, how="left", on=ITEMID, suffixes=(None, "_static_feat"))
289
+ df = pd.merge(
290
+ df, static_features, how="left", on=TimeSeriesDataFrame.ITEMID, suffixes=(None, "_static_feat")
291
+ )
272
292
 
273
293
  for col in self._non_boolean_real_covariates:
274
294
  # Normalize non-boolean features using mean_abs scaling
275
- df[f"__scaled_{col}"] = df[col] / df[col].abs().groupby(df[ITEMID]).mean().reindex(df[ITEMID]).values
295
+ df[f"__scaled_{col}"] = (
296
+ df[col]
297
+ / df[col]
298
+ .abs()
299
+ .groupby(df[TimeSeriesDataFrame.ITEMID])
300
+ .mean()
301
+ .reindex(df[TimeSeriesDataFrame.ITEMID])
302
+ .values
303
+ )
276
304
 
277
305
  # Convert float64 to float32 to reduce memory usage
278
306
  float64_cols = list(df.select_dtypes(include="float64"))
@@ -284,23 +312,26 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
284
312
  def _fit(
285
313
  self,
286
314
  train_data: TimeSeriesDataFrame,
287
- val_data: Optional[TimeSeriesDataFrame] = None,
288
- time_limit: Optional[int] = None,
315
+ val_data: TimeSeriesDataFrame | None = None,
316
+ time_limit: float | None = None,
317
+ num_cpus: int | None = None,
318
+ num_gpus: int | None = None,
289
319
  verbosity: int = 2,
290
320
  **kwargs,
291
321
  ) -> None:
292
322
  from mlforecast import MLForecast
293
323
 
294
324
  self._check_fit_params()
325
+ self._log_unused_hyperparameters()
295
326
  fit_start_time = time.time()
296
327
  self._train_target_median = train_data[self.target].median()
297
- for col in self.metadata.known_covariates_real:
328
+ for col in self.covariate_metadata.known_covariates_real:
298
329
  if not set(train_data[col].unique()) == set([0, 1]):
299
330
  self._non_boolean_real_covariates.append(col)
300
- # TabularEstimator is passed to MLForecast later to include tuning_data
301
- model_params = self._get_model_params()
331
+ model_params = self.get_hyperparameters()
302
332
 
303
333
  mlforecast_init_args = self._get_mlforecast_init_args(train_data, model_params)
334
+ assert self.freq is not None
304
335
  self._mlf = MLForecast(models={}, freq=self.freq, **mlforecast_init_args)
305
336
 
306
337
  # We generate train/val splits from train_data and ignore val_data to avoid overfitting
@@ -310,57 +341,65 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
310
341
  max_num_samples=model_params["max_num_samples"],
311
342
  )
312
343
 
313
- estimator = TabularEstimator(
314
- predictor_init_kwargs={
315
- "path": self.tabular_predictor_path,
316
- "verbosity": verbosity - 2,
317
- "label": MLF_TARGET,
318
- **self._get_extra_tabular_init_kwargs(),
319
- },
320
- predictor_fit_kwargs={
321
- "tuning_data": val_df.drop(columns=[MLF_ITEMID]),
322
- "time_limit": (None if time_limit is None else time_limit - (time.time() - fit_start_time)),
323
- "hyperparameters": model_params["tabular_hyperparameters"],
324
- **model_params["tabular_fit_kwargs"],
325
- },
326
- )
327
- self._mlf.models = {"mean": estimator}
344
+ with set_loggers_level(regex=r"^autogluon\.(tabular|features).*", level=logging.ERROR):
345
+ tabular_model = self._create_tabular_model(
346
+ model_name=model_params["model_name"], model_hyperparameters=model_params["model_hyperparameters"]
347
+ )
348
+ tabular_model.fit(
349
+ X=train_df.drop(columns=[MLF_TARGET, MLF_ITEMID]),
350
+ y=train_df[MLF_TARGET],
351
+ X_val=val_df.drop(columns=[MLF_TARGET, MLF_ITEMID]),
352
+ y_val=val_df[MLF_TARGET],
353
+ time_limit=(None if time_limit is None else time_limit - (time.time() - fit_start_time)),
354
+ verbosity=verbosity - 1,
355
+ )
328
356
 
329
- with warning_filter():
330
- self._mlf.fit_models(X=train_df.drop(columns=[MLF_TARGET, MLF_ITEMID]), y=train_df[MLF_TARGET])
357
+ # We directly insert the trained model into models_ since calling _mlf.fit_models does not support X_val, y_val
358
+ self._mlf.models_ = {"mean": tabular_model}
331
359
 
332
360
  self._save_residuals_std(val_df)
333
361
 
362
+ def get_tabular_model(self) -> TabularModel:
363
+ """Get the underlying tabular regression model."""
364
+ assert "mean" in self._mlf.models_, "Call `fit` before calling `get_tabular_model`"
365
+ mean_estimator = self._mlf.models_["mean"]
366
+ assert isinstance(mean_estimator, TabularModel)
367
+ return mean_estimator
368
+
334
369
  def _save_residuals_std(self, val_df: pd.DataFrame) -> None:
335
370
  """Compute standard deviation of residuals for each item using the validation set.
336
371
 
337
372
  Saves per-item residuals to `self.residuals_std_per_item`.
338
373
  """
339
374
  residuals_df = val_df[[MLF_ITEMID, MLF_TARGET]]
340
- residuals_df = residuals_df.assign(y_pred=self._mlf.models_["mean"].predict(val_df))
375
+ mean_estimator = self.get_tabular_model()
376
+
377
+ residuals_df = residuals_df.assign(y_pred=mean_estimator.predict(val_df))
341
378
  if self._scaler is not None:
342
379
  # Scaler expects to find column MLF_TIMESTAMP even though it's not used - fill with dummy
343
- residuals_df = residuals_df.assign(**{MLF_TIMESTAMP: 1})
380
+ residuals_df = residuals_df.assign(**{MLF_TIMESTAMP: np.datetime64("2010-01-01")})
344
381
  residuals_df = self._scaler.inverse_transform(residuals_df)
382
+
383
+ assert isinstance(residuals_df, pd.DataFrame)
345
384
  residuals = residuals_df[MLF_TARGET] - residuals_df["y_pred"]
346
385
  self._residuals_std_per_item = (
347
- residuals.pow(2.0).groupby(val_df[MLF_ITEMID].values, sort=False).mean().pow(0.5)
386
+ residuals.pow(2.0).groupby(val_df[MLF_ITEMID].values, sort=False).mean().pow(0.5) # type: ignore
348
387
  )
349
388
 
350
389
  def _remove_short_ts_and_generate_fallback_forecast(
351
390
  self,
352
391
  data: TimeSeriesDataFrame,
353
- known_covariates: Optional[TimeSeriesDataFrame] = None,
354
- ) -> Tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame], Optional[TimeSeriesDataFrame]]:
392
+ known_covariates: TimeSeriesDataFrame | None = None,
393
+ ) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame, TimeSeriesDataFrame | None]:
355
394
  """Remove series that are too short for chosen differencing from data and generate naive forecast for them.
356
395
 
357
396
  Returns
358
397
  -------
359
- data_long : TimeSeriesDataFrame
398
+ data_long
360
399
  Data containing only time series that are long enough for the model to predict.
361
- known_covariates_long : TimeSeriesDataFrame or None
400
+ known_covariates_long
362
401
  Future known covariates containing only time series that are long enough for the model to predict.
363
- forecast_for_short_series : TimeSeriesDataFrame or None
402
+ forecast_for_short_series
364
403
  Seasonal naive forecast for short series, if there are any in the dataset.
365
404
  """
366
405
  ts_lengths = data.num_timesteps_per_item()
@@ -392,7 +431,9 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
392
431
  forecast_for_short_series = None
393
432
  return data_long, known_covariates_long, forecast_for_short_series
394
433
 
395
- def _add_gaussian_quantiles(self, predictions: pd.DataFrame, repeated_item_ids: pd.Series, past_target: pd.Series):
434
+ def _add_gaussian_quantiles(
435
+ self, predictions: pd.DataFrame, repeated_item_ids: pd.Series, past_target: pd.Series
436
+ ) -> pd.DataFrame:
396
437
  """
397
438
  Add quantile levels assuming that residuals follow normal distribution
398
439
  """
@@ -407,9 +448,9 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
407
448
  # Use in-sample seasonal error in for items not seen during fit
408
449
  items_not_seen_during_fit = residuals_std_per_timestep.index[residuals_std_per_timestep.isna()].unique()
409
450
  if len(items_not_seen_during_fit) > 0:
410
- scale_for_new_items: pd.Series = np.sqrt(
411
- in_sample_squared_seasonal_error(y_past=past_target.loc[items_not_seen_during_fit])
412
- )
451
+ scale_for_new_items: pd.Series = in_sample_squared_seasonal_error(
452
+ y_past=past_target.loc[items_not_seen_during_fit]
453
+ ).pow(0.5)
413
454
  residuals_std_per_timestep = residuals_std_per_timestep.fillna(scale_for_new_items)
414
455
 
415
456
  std_per_timestep = residuals_std_per_timestep * normal_scale_per_timestep
@@ -417,18 +458,14 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
417
458
  predictions[str(q)] = predictions["mean"] + norm.ppf(q) * std_per_timestep.to_numpy()
418
459
  return predictions
419
460
 
420
- def _more_tags(self) -> dict:
461
+ def _more_tags(self) -> dict[str, Any]:
421
462
  return {"allow_nan": True, "can_refit_full": True}
422
463
 
423
- def _create_target_scaler(self):
424
- # Do not create a scaler in the model, scaler will be passed to MLForecast
425
- return None
426
-
427
464
 
428
465
  class DirectTabularModel(AbstractMLForecastModel):
429
- """Predict all future time series values simultaneously using TabularPredictor from AutoGluon-Tabular.
466
+ """Predict all future time series values simultaneously using a regression model from AutoGluon-Tabular.
430
467
 
431
- A single TabularPredictor is used to forecast all future time series values using the following features:
468
+ A single tabular model is used to forecast all future time series values using the following features:
432
469
 
433
470
  - lag features (observed time series values) based on ``freq`` of the data
434
471
  - time features (e.g., day of the week) based on the timestamp of the measurement
@@ -437,8 +474,8 @@ class DirectTabularModel(AbstractMLForecastModel):
437
474
 
438
475
  Features not known during the forecast horizon (e.g., future target values) are replaced by NaNs.
439
476
 
440
- If ``eval_metric.needs_quantile``, the TabularPredictor will be trained with ``"quantile"`` problem type.
441
- Otherwise, TabularPredictor will be trained with ``"regression"`` problem type, and dummy quantiles will be
477
+ If ``eval_metric.needs_quantile``, the tabular regression model will be trained with ``"quantile"`` problem type.
478
+ Otherwise, the model will be trained with ``"regression"`` problem type, and dummy quantiles will be
442
479
  obtained by assuming that the residuals follow zero-mean normal distribution.
443
480
 
444
481
  Based on the `mlforecast <https://github.com/Nixtla/mlforecast>`_ library.
@@ -446,46 +483,55 @@ class DirectTabularModel(AbstractMLForecastModel):
446
483
 
447
484
  Other Parameters
448
485
  ----------------
449
- lags : List[int], default = None
486
+ lags : list[int], default = None
450
487
  Lags of the target that will be used as features for predictions. If None, will be determined automatically
451
488
  based on the frequency of the data.
452
- date_features : List[Union[str, Callable]], default = None
489
+ date_features : list[str | Callable], default = None
453
490
  Features computed from the dates. Can be pandas date attributes or functions that will take the dates as input.
454
491
  If None, will be determined automatically based on the frequency of the data.
455
- differences : List[int], default = []
492
+ differences : list[int], default = []
456
493
  Differences to take of the target before computing the features. These are restored at the forecasting step.
457
- If None, will be set to ``[seasonal_period]``, where seasonal_period is determined based on the data frequency.
458
494
  Defaults to no differencing.
459
495
  target_scaler : {"standard", "mean_abs", "min_max", "robust", None}, default = "mean_abs"
460
496
  Scaling applied to each time series. Scaling is applied after differencing.
461
- tabular_hyperparameters : Dict[Dict[str, Any]], optional
462
- Hyperparameters dictionary passed to ``TabularPredictor.fit``. Contains the names of models that should be fit.
463
- Defaults to ``{"GBM": {}}``.
464
- tabular_fit_kwargs : Dict[str, Any], optional
465
- Additional keyword arguments passed to ``TabularPredictor.fit``. Defaults to an empty dict.
497
+ model_name : str, default = "GBM"
498
+ Name of the tabular regression model. See ``autogluon.tabular.registry.ag_model_registry`` or
499
+ `the documentation <https://auto.gluon.ai/stable/api/autogluon.tabular.models.html>`_ for the list of available
500
+ tabular models.
501
+ model_hyperparameters : dict[str, Any], optional
502
+ Hyperparameters passed to the tabular regression model.
466
503
  max_num_items : int or None, default = 20_000
467
504
  If not None, the model will randomly select this many time series for training and validation.
468
505
  max_num_samples : int or None, default = 1_000_000
469
- If not None, training dataset passed to TabularPredictor will contain at most this many rows (starting from the
470
- end of each time series).
506
+ If not None, training dataset passed to the tabular regression model will contain at most this many rows
507
+ (starting from the end of each time series).
471
508
  """
472
509
 
510
+ ag_priority = 85
511
+
473
512
  @property
474
513
  def is_quantile_model(self) -> bool:
475
514
  return self.eval_metric.needs_quantile
476
515
 
477
- def _get_model_params(self) -> dict:
478
- model_params = super()._get_model_params()
479
- model_params.setdefault("target_scaler", "mean_abs")
516
+ def get_hyperparameters(self) -> dict[str, Any]:
517
+ model_params = super().get_hyperparameters()
518
+ # We don't set 'target_scaler' if user already provided 'scaler' to avoid overriding the user-provided value
519
+ if "scaler" not in model_params:
520
+ model_params.setdefault("target_scaler", "mean_abs")
480
521
  if "differences" not in model_params or model_params["differences"] is None:
481
522
  model_params["differences"] = []
523
+ if "lag_transforms" in model_params:
524
+ model_params.pop("lag_transforms")
525
+ logger.warning(f"{self.name} does not support the 'lag_transforms' hyperparameter.")
482
526
  return model_params
483
527
 
484
528
  def _mask_df(self, df: pd.DataFrame) -> pd.DataFrame:
485
529
  """Apply a mask that mimics the situation at prediction time when target/covariates are unknown during the
486
530
  forecast horizon.
487
531
  """
488
- num_hidden = np.random.randint(0, self.prediction_length, size=len(df))
532
+ # Fix seed to make the model deterministic
533
+ rng = np.random.default_rng(seed=123)
534
+ num_hidden = rng.integers(0, self.prediction_length, size=len(df))
489
535
  lag_cols = [f"lag{lag}" for lag in self._target_lags]
490
536
  mask = num_hidden[:, None] < self._target_lags[None] # shape [len(num_hidden), len(_target_lags)]
491
537
  # use df.loc[:, lag_cols] instead of df[lag_cols] to avoid SettingWithCopyWarning
@@ -502,7 +548,7 @@ class DirectTabularModel(AbstractMLForecastModel):
502
548
  def _predict(
503
549
  self,
504
550
  data: TimeSeriesDataFrame,
505
- known_covariates: Optional[TimeSeriesDataFrame] = None,
551
+ known_covariates: TimeSeriesDataFrame | None = None,
506
552
  **kwargs,
507
553
  ) -> TimeSeriesDataFrame:
508
554
  from .transforms import apply_inverse_transform
@@ -513,6 +559,7 @@ class DirectTabularModel(AbstractMLForecastModel):
513
559
  )
514
560
  if len(data) == 0:
515
561
  # All time series are too short for chosen differences
562
+ assert forecast_for_short_series is not None
516
563
  return forecast_for_short_series
517
564
 
518
565
  if known_covariates is not None:
@@ -523,15 +570,18 @@ class DirectTabularModel(AbstractMLForecastModel):
523
570
  # MLForecast raises exception of target contains NaN. We use inf as placeholder, replace them by NaN afterwards
524
571
  data_future[self.target] = float("inf")
525
572
  data_extended = pd.concat([data, data_future])
526
- mlforecast_df = self._to_mlforecast_df(data_extended, data.static_features)
573
+ mlforecast_df = self._to_mlforecast_df(data_extended, data.static_features) # type: ignore
527
574
  if self._max_ts_length is not None:
528
575
  # We appended `prediction_length` time steps to each series, so increase length
529
576
  mlforecast_df = self._shorten_all_series(mlforecast_df, self._max_ts_length + self.prediction_length)
530
577
  df = self._mlf.preprocess(mlforecast_df, dropna=False, static_features=[])
578
+ assert isinstance(df, pd.DataFrame)
579
+
531
580
  df = df.groupby(MLF_ITEMID, sort=False).tail(self.prediction_length)
532
581
  df = df.replace(float("inf"), float("nan"))
533
582
 
534
- raw_predictions = self._mlf.models_["mean"].predict(df)
583
+ mean_estimator = self.get_tabular_model()
584
+ raw_predictions = mean_estimator.predict(df)
535
585
  predictions = self._postprocess_predictions(raw_predictions, repeated_item_ids=df[MLF_ITEMID])
536
586
  # Paste columns one by one to preserve dtypes
537
587
  predictions[MLF_ITEMID] = df[MLF_ITEMID].values
@@ -543,6 +593,7 @@ class DirectTabularModel(AbstractMLForecastModel):
543
593
  if self._max_ts_length is not None:
544
594
  mlforecast_df_past = self._shorten_all_series(mlforecast_df_past, self._max_ts_length)
545
595
  self._mlf.preprocess(mlforecast_df_past, static_features=[], dropna=False)
596
+ assert self._mlf.ts.target_transforms is not None
546
597
  for tfm in self._mlf.ts.target_transforms[::-1]:
547
598
  predictions = apply_inverse_transform(predictions, transform=tfm)
548
599
 
@@ -550,49 +601,63 @@ class DirectTabularModel(AbstractMLForecastModel):
550
601
  predictions = self._add_gaussian_quantiles(
551
602
  predictions, repeated_item_ids=predictions[MLF_ITEMID], past_target=data[self.target]
552
603
  )
553
- predictions = TimeSeriesDataFrame(predictions.rename(columns={MLF_ITEMID: ITEMID, MLF_TIMESTAMP: TIMESTAMP}))
604
+ predictions_tsdf: TimeSeriesDataFrame = TimeSeriesDataFrame(
605
+ predictions.rename(
606
+ columns={MLF_ITEMID: TimeSeriesDataFrame.ITEMID, MLF_TIMESTAMP: TimeSeriesDataFrame.TIMESTAMP}
607
+ )
608
+ )
554
609
 
555
610
  if forecast_for_short_series is not None:
556
- predictions = pd.concat([predictions, forecast_for_short_series])
557
- predictions = predictions.reindex(original_item_id_order, level=ITEMID)
558
- return predictions
611
+ predictions_tsdf = pd.concat([predictions_tsdf, forecast_for_short_series]) # type: ignore
612
+ predictions_tsdf = predictions_tsdf.reindex(original_item_id_order, level=TimeSeriesDataFrame.ITEMID)
613
+
614
+ return predictions_tsdf
559
615
 
560
- def _postprocess_predictions(self, predictions: np.ndarray, repeated_item_ids: pd.Series) -> pd.DataFrame:
616
+ def _postprocess_predictions(
617
+ self, predictions: np.ndarray | pd.Series, repeated_item_ids: pd.Series
618
+ ) -> pd.DataFrame:
561
619
  if self.is_quantile_model:
562
- predictions = pd.DataFrame(predictions, columns=[str(q) for q in self.quantile_levels])
563
- predictions.values.sort(axis=1)
564
- predictions["mean"] = predictions["0.5"]
620
+ predictions_df = pd.DataFrame(predictions, columns=[str(q) for q in self.quantile_levels])
621
+ predictions_df.values.sort(axis=1)
622
+ predictions_df["mean"] = predictions_df["0.5"]
565
623
  else:
566
- predictions = pd.DataFrame(predictions, columns=["mean"])
624
+ predictions_df = pd.DataFrame(predictions, columns=["mean"])
567
625
 
568
- column_order = ["mean"] + [col for col in predictions.columns if col != "mean"]
569
- return predictions[column_order]
626
+ column_order = ["mean"] + [col for col in predictions_df.columns if col != "mean"]
627
+ return predictions_df[column_order]
570
628
 
571
- def _get_extra_tabular_init_kwargs(self) -> dict:
629
+ def _create_tabular_model(self, model_name: str, model_hyperparameters: dict[str, Any]) -> TabularModel:
630
+ model_class = ag_model_registry.key_to_cls(model_name)
572
631
  if self.is_quantile_model:
573
- return {
574
- "problem_type": ag.constants.QUANTILE,
575
- "quantile_levels": self.quantile_levels,
576
- "eval_metric": "pinball_loss",
577
- }
632
+ problem_type = ag.constants.QUANTILE
633
+ eval_metric = "pinball_loss"
634
+ model_hyperparameters["ag.quantile_levels"] = self.quantile_levels
578
635
  else:
579
- return {
580
- "problem_type": ag.constants.REGRESSION,
581
- "eval_metric": self.eval_metric.equivalent_tabular_regression_metric or "mean_absolute_error",
582
- }
636
+ problem_type = ag.constants.REGRESSION
637
+ eval_metric = self.eval_metric.equivalent_tabular_regression_metric or "mean_absolute_error"
638
+ return TabularModel(
639
+ model_class=model_class,
640
+ model_kwargs={
641
+ "path": "",
642
+ "name": model_class.__name__,
643
+ "hyperparameters": model_hyperparameters,
644
+ "problem_type": problem_type,
645
+ "eval_metric": eval_metric,
646
+ },
647
+ )
583
648
 
584
649
 
585
650
  class RecursiveTabularModel(AbstractMLForecastModel):
586
- """Predict future time series values one by one using TabularPredictor from AutoGluon-Tabular.
651
+ """Predict future time series values one by one using a regression model from AutoGluon-Tabular.
587
652
 
588
- A single TabularPredictor is used to forecast the future time series values using the following features:
653
+ A single tabular regression model is used to forecast the future time series values using the following features:
589
654
 
590
655
  - lag features (observed time series values) based on ``freq`` of the data
591
656
  - time features (e.g., day of the week) based on the timestamp of the measurement
592
657
  - known covariates (if available)
593
658
  - static features of each item (if available)
594
659
 
595
- TabularPredictor will always be trained with ``"regression"`` problem type, and dummy quantiles will be
660
+ The tabular model will always be trained with ``"regression"`` problem type, and dummy quantiles will be
596
661
  obtained by assuming that the residuals follow zero-mean normal distribution.
597
662
 
598
663
  Based on the `mlforecast <https://github.com/Nixtla/mlforecast>`_ library.
@@ -600,32 +665,40 @@ class RecursiveTabularModel(AbstractMLForecastModel):
600
665
 
601
666
  Other Parameters
602
667
  ----------------
603
- lags : List[int], default = None
668
+ lags : list[int], default = None
604
669
  Lags of the target that will be used as features for predictions. If None, will be determined automatically
605
670
  based on the frequency of the data.
606
- date_features : List[Union[str, Callable]], default = None
671
+ date_features : list[str | Callable], default = None
607
672
  Features computed from the dates. Can be pandas date attributes or functions that will take the dates as input.
608
673
  If None, will be determined automatically based on the frequency of the data.
609
- differences : List[int], default = None
674
+ differences : list[int], default = None
610
675
  Differences to take of the target before computing the features. These are restored at the forecasting step.
611
676
  If None, will be set to ``[seasonal_period]``, where seasonal_period is determined based on the data frequency.
612
677
  target_scaler : {"standard", "mean_abs", "min_max", "robust", None}, default = "standard"
613
678
  Scaling applied to each time series. Scaling is applied after differencing.
614
- tabular_hyperparameters : Dict[Dict[str, Any]], optional
615
- Hyperparameters dictionary passed to ``TabularPredictor.fit``. Contains the names of models that should be fit.
616
- Defaults to ``{"GBM": {}}``.
617
- tabular_fit_kwargs : Dict[str, Any], optional
618
- Additional keyword arguments passed to ``TabularPredictor.fit``. Defaults to an empty dict.
679
+ lag_transforms : dict[int, list[Callable]], default = None
680
+ Dictionary mapping lag periods to transformation functions applied to lagged target values (e.g., rolling mean).
681
+ See `MLForecast documentation <https://nixtlaverse.nixtla.io/mlforecast/lag_transforms.html>`_ for more details.
682
+ model_name : str, default = "GBM"
683
+ Name of the tabular regression model. See ``autogluon.tabular.registry.ag_model_registry`` or
684
+ `the documentation <https://auto.gluon.ai/stable/api/autogluon.tabular.models.html>`_ for the list of available
685
+ tabular models.
686
+ model_hyperparameters : dict[str, Any], optional
687
+ Hyperparameters passed to the tabular regression model.
619
688
  max_num_items : int or None, default = 20_000
620
689
  If not None, the model will randomly select this many time series for training and validation.
621
690
  max_num_samples : int or None, default = 1_000_000
622
- If not None, training dataset passed to TabularPredictor will contain at most this many rows (starting from the
623
- end of each time series).
691
+ If not None, training dataset passed to the tabular regression model will contain at most this many rows
692
+ (starting from the end of each time series).
624
693
  """
625
694
 
626
- def _get_model_params(self) -> dict:
627
- model_params = super()._get_model_params()
628
- model_params.setdefault("target_scaler", "standard")
695
+ ag_priority = 90
696
+
697
+ def get_hyperparameters(self) -> dict[str, Any]:
698
+ model_params = super().get_hyperparameters()
699
+ # We don't set 'target_scaler' if user already provided 'scaler' to avoid overriding the user-provided value
700
+ if "scaler" not in model_params:
701
+ model_params.setdefault("target_scaler", "standard")
629
702
  if "differences" not in model_params or model_params["differences"] is None:
630
703
  model_params["differences"] = [get_seasonality(self.freq)]
631
704
  return model_params
@@ -633,7 +706,7 @@ class RecursiveTabularModel(AbstractMLForecastModel):
633
706
  def _predict(
634
707
  self,
635
708
  data: TimeSeriesDataFrame,
636
- known_covariates: Optional[TimeSeriesDataFrame] = None,
709
+ known_covariates: TimeSeriesDataFrame | None = None,
637
710
  **kwargs,
638
711
  ) -> TimeSeriesDataFrame:
639
712
  original_item_id_order = data.item_ids
@@ -642,6 +715,7 @@ class RecursiveTabularModel(AbstractMLForecastModel):
642
715
  )
643
716
  if len(data) == 0:
644
717
  # All time series are too short for chosen differences
718
+ assert forecast_for_short_series is not None
645
719
  return forecast_for_short_series
646
720
 
647
721
  new_df = self._to_mlforecast_df(data, data.static_features)
@@ -649,7 +723,9 @@ class RecursiveTabularModel(AbstractMLForecastModel):
649
723
  new_df = self._shorten_all_series(new_df, self._max_ts_length)
650
724
  if known_covariates is None:
651
725
  future_index = self.get_forecast_horizon_index(data)
652
- known_covariates = pd.DataFrame(columns=[self.target], index=future_index, dtype="float32")
726
+ known_covariates = TimeSeriesDataFrame(
727
+ pd.DataFrame(columns=[self.target], index=future_index, dtype="float32")
728
+ )
653
729
  X_df = self._to_mlforecast_df(known_covariates, data.static_features, include_target=False)
654
730
  # If both covariates & static features are missing, set X_df = None to avoid exception from MLForecast
655
731
  if len(X_df.columns.difference([MLF_ITEMID, MLF_TIMESTAMP])) == 0:
@@ -660,19 +736,31 @@ class RecursiveTabularModel(AbstractMLForecastModel):
660
736
  new_df=new_df,
661
737
  X_df=X_df,
662
738
  )
663
- predictions = raw_predictions.rename(columns={MLF_ITEMID: ITEMID, MLF_TIMESTAMP: TIMESTAMP})
664
- predictions = TimeSeriesDataFrame(
739
+ assert isinstance(raw_predictions, pd.DataFrame)
740
+ raw_predictions = raw_predictions.rename(
741
+ columns={MLF_ITEMID: TimeSeriesDataFrame.ITEMID, MLF_TIMESTAMP: TimeSeriesDataFrame.TIMESTAMP}
742
+ )
743
+
744
+ predictions: TimeSeriesDataFrame = TimeSeriesDataFrame(
665
745
  self._add_gaussian_quantiles(
666
- predictions, repeated_item_ids=predictions[ITEMID], past_target=data[self.target]
746
+ raw_predictions,
747
+ repeated_item_ids=raw_predictions[TimeSeriesDataFrame.ITEMID],
748
+ past_target=data[self.target],
667
749
  )
668
750
  )
669
-
670
751
  if forecast_for_short_series is not None:
671
- predictions = pd.concat([predictions, forecast_for_short_series])
672
- return predictions.reindex(original_item_id_order, level=ITEMID)
673
-
674
- def _get_extra_tabular_init_kwargs(self) -> dict:
675
- return {
676
- "problem_type": ag.constants.REGRESSION,
677
- "eval_metric": self.eval_metric.equivalent_tabular_regression_metric or "mean_absolute_error",
678
- }
752
+ predictions = pd.concat([predictions, forecast_for_short_series]) # type: ignore
753
+ return predictions.reindex(original_item_id_order, level=TimeSeriesDataFrame.ITEMID)
754
+
755
+ def _create_tabular_model(self, model_name: str, model_hyperparameters: dict[str, Any]) -> TabularModel:
756
+ model_class = ag_model_registry.key_to_cls(model_name)
757
+ return TabularModel(
758
+ model_class=model_class,
759
+ model_kwargs={
760
+ "path": "",
761
+ "name": model_class.__name__,
762
+ "hyperparameters": model_hyperparameters,
763
+ "problem_type": ag.constants.REGRESSION,
764
+ "eval_metric": self.eval_metric.equivalent_tabular_regression_metric or "mean_absolute_error",
765
+ },
766
+ )