autogluon.timeseries 1.2.1b20250424__py3-none-any.whl → 1.2.1b20250426__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. autogluon/timeseries/dataset/ts_dataframe.py +9 -2
  2. autogluon/timeseries/learner.py +1 -4
  3. autogluon/timeseries/metrics/__init__.py +36 -8
  4. autogluon/timeseries/metrics/abstract.py +77 -7
  5. autogluon/timeseries/metrics/point.py +136 -47
  6. autogluon/timeseries/metrics/quantile.py +42 -17
  7. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +7 -20
  8. autogluon/timeseries/models/autogluon_tabular/mlforecast.py +106 -66
  9. autogluon/timeseries/models/autogluon_tabular/transforms.py +15 -10
  10. autogluon/timeseries/models/ensemble/greedy.py +8 -7
  11. autogluon/timeseries/models/local/abstract_local_model.py +43 -36
  12. autogluon/timeseries/models/multi_window/multi_window_model.py +1 -1
  13. autogluon/timeseries/models/presets.py +0 -2
  14. autogluon/timeseries/predictor.py +37 -29
  15. autogluon/timeseries/trainer.py +23 -16
  16. autogluon/timeseries/version.py +1 -1
  17. {autogluon.timeseries-1.2.1b20250424.dist-info → autogluon.timeseries-1.2.1b20250426.dist-info}/METADATA +5 -5
  18. {autogluon.timeseries-1.2.1b20250424.dist-info → autogluon.timeseries-1.2.1b20250426.dist-info}/RECORD +25 -25
  19. /autogluon.timeseries-1.2.1b20250424-py3.9-nspkg.pth → /autogluon.timeseries-1.2.1b20250426-py3.9-nspkg.pth +0 -0
  20. {autogluon.timeseries-1.2.1b20250424.dist-info → autogluon.timeseries-1.2.1b20250426.dist-info}/LICENSE +0 -0
  21. {autogluon.timeseries-1.2.1b20250424.dist-info → autogluon.timeseries-1.2.1b20250426.dist-info}/NOTICE +0 -0
  22. {autogluon.timeseries-1.2.1b20250424.dist-info → autogluon.timeseries-1.2.1b20250426.dist-info}/WHEEL +0 -0
  23. {autogluon.timeseries-1.2.1b20250424.dist-info → autogluon.timeseries-1.2.1b20250426.dist-info}/namespace_packages.txt +0 -0
  24. {autogluon.timeseries-1.2.1b20250424.dist-info → autogluon.timeseries-1.2.1b20250426.dist-info}/top_level.txt +0 -0
  25. {autogluon.timeseries-1.2.1b20250424.dist-info → autogluon.timeseries-1.2.1b20250426.dist-info}/zip-safe +0 -0
@@ -1,4 +1,4 @@
1
- from typing import Optional
1
+ from typing import Optional, Sequence
2
2
 
3
3
  import numpy as np
4
4
  import pandas as pd
@@ -25,6 +25,7 @@ class WQL(TimeSeriesScorer):
25
25
  - scale-dependent (time series with large absolute value contribute more to the loss)
26
26
  - equivalent to WAPE if ``quantile_levels = [0.5]``
27
27
 
28
+ If `horizon_weight` is provided, both the errors and the target time series in the denominator will be re-weighted.
28
29
 
29
30
  References
30
31
  ----------
@@ -34,16 +35,25 @@ class WQL(TimeSeriesScorer):
34
35
  needs_quantile = True
35
36
 
36
37
  def compute_metric(
37
- self, data_future: TimeSeriesDataFrame, predictions: TimeSeriesDataFrame, target: str = "target", **kwargs
38
+ self,
39
+ data_future: TimeSeriesDataFrame,
40
+ predictions: TimeSeriesDataFrame,
41
+ target: str = "target",
42
+ **kwargs,
38
43
  ) -> float:
39
44
  y_true, q_pred, quantile_levels = self._get_quantile_forecast_score_inputs(data_future, predictions, target)
40
- values_true = y_true.values[:, None] # shape [N, 1]
41
- values_pred = q_pred.values # shape [N, len(quantile_levels)]
45
+ y_true = y_true.to_numpy()[:, None] # shape [N, 1]
46
+ q_pred = q_pred.to_numpy() # shape [N, len(quantile_levels)]
42
47
 
43
- return 2 * np.mean(
44
- np.nansum(np.abs((values_true - values_pred) * ((values_true <= values_pred) - quantile_levels)), axis=0)
45
- / np.nansum(np.abs(values_true))
48
+ errors = (
49
+ np.abs((q_pred - y_true) * ((y_true <= q_pred) - quantile_levels))
50
+ .mean(axis=1)
51
+ .reshape([-1, self.prediction_length])
46
52
  )
53
+ if self.horizon_weight is not None:
54
+ errors *= self.horizon_weight
55
+ y_true = y_true.reshape([-1, self.prediction_length]) * self.horizon_weight
56
+ return 2 * np.nansum(errors) / np.nansum(np.abs(y_true))
47
57
 
48
58
 
49
59
  class SQL(TimeSeriesScorer):
@@ -79,7 +89,15 @@ class SQL(TimeSeriesScorer):
79
89
 
80
90
  needs_quantile = True
81
91
 
82
- def __init__(self):
92
+ def __init__(
93
+ self,
94
+ prediction_length: int = 1,
95
+ seasonal_period: Optional[int] = None,
96
+ horizon_weight: Optional[Sequence[float]] = None,
97
+ ):
98
+ super().__init__(
99
+ prediction_length=prediction_length, seasonal_period=seasonal_period, horizon_weight=horizon_weight
100
+ )
83
101
  self._past_abs_seasonal_error: Optional[pd.Series] = None
84
102
 
85
103
  def save_past_metrics(
@@ -93,17 +111,24 @@ class SQL(TimeSeriesScorer):
93
111
  self._past_abs_seasonal_error = None
94
112
 
95
113
  def compute_metric(
96
- self, data_future: TimeSeriesDataFrame, predictions: TimeSeriesDataFrame, target: str = "target", **kwargs
114
+ self,
115
+ data_future: TimeSeriesDataFrame,
116
+ predictions: TimeSeriesDataFrame,
117
+ target: str = "target",
118
+ **kwargs,
97
119
  ) -> float:
98
120
  if self._past_abs_seasonal_error is None:
99
121
  raise AssertionError("Call `save_past_metrics` before `compute_metric`")
100
122
 
101
123
  y_true, q_pred, quantile_levels = self._get_quantile_forecast_score_inputs(data_future, predictions, target)
102
- q_pred = q_pred.values
103
- values_true = y_true.values[:, None] # shape [N, 1]
104
-
105
- ql = np.abs((q_pred - values_true) * ((values_true <= q_pred) - quantile_levels)).mean(axis=1)
106
- num_items = len(self._past_abs_seasonal_error)
107
- # Reshape quantile losses values into [num_items, prediction_length] to normalize per item without groupby
108
- quantile_losses = ql.reshape([num_items, -1])
109
- return 2 * self._safemean(quantile_losses / self._past_abs_seasonal_error.values[:, None])
124
+ q_pred = q_pred.to_numpy()
125
+ y_true = y_true.to_numpy()[:, None] # shape [N, 1]
126
+
127
+ errors = (
128
+ np.abs((q_pred - y_true) * ((y_true <= q_pred) - quantile_levels))
129
+ .mean(axis=1)
130
+ .reshape([-1, self.prediction_length])
131
+ )
132
+ if self.horizon_weight is not None:
133
+ errors *= self.horizon_weight
134
+ return 2 * self._safemean(errors / self._past_abs_seasonal_error.to_numpy()[:, None])
@@ -57,9 +57,6 @@ class TimeSeriesModelBase(ModelBase, ABC):
57
57
  Metric by which predictions will be ultimately evaluated on future test data. This only impacts
58
58
  ``model.score()``, as eval_metric is not used during training. Available metrics can be found in
59
59
  ``autogluon.timeseries.metrics``.
60
- eval_metric_seasonal_period : int, optional
61
- Seasonal period used to compute some evaluation metrics such as mean absolute scaled error (MASE). Defaults to
62
- ``None``, in which case the seasonal period is computed based on the data frequency.
63
60
  hyperparameters : dict, default = None
64
61
  Hyperparameters that will be used by the model (can be search spaces instead of fixed values).
65
62
  If None, model defaults are used. This is identical to passing an empty dictionary.
@@ -88,7 +85,6 @@ class TimeSeriesModelBase(ModelBase, ABC):
88
85
  target: str = "target",
89
86
  quantile_levels: Sequence[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
90
87
  eval_metric: Union[str, TimeSeriesScorer, None] = None,
91
- eval_metric_seasonal_period: Optional[int] = None,
92
88
  ):
93
89
  self.name = name or re.sub(r"Model$", "", self.__class__.__name__)
94
90
 
@@ -103,8 +99,7 @@ class TimeSeriesModelBase(ModelBase, ABC):
103
99
 
104
100
  self.path = os.path.join(self.path_root, self.name)
105
101
 
106
- self.eval_metric: TimeSeriesScorer = check_get_evaluation_metric(eval_metric)
107
- self.eval_metric_seasonal_period = eval_metric_seasonal_period
102
+ self.eval_metric = check_get_evaluation_metric(eval_metric, prediction_length=prediction_length)
108
103
  self.target: str = target
109
104
  self.covariate_metadata = covariate_metadata or CovariateMetadata()
110
105
 
@@ -187,7 +182,7 @@ class TimeSeriesModelBase(ModelBase, ABC):
187
182
  )
188
183
  return hyperparameters, extra_ag_args
189
184
 
190
- def save(self, path: Optional[str] = None, verbose=True) -> str:
185
+ def save(self, path: Optional[str] = None, verbose: bool = True) -> str:
191
186
  if path is None:
192
187
  path = self.path
193
188
 
@@ -393,8 +388,8 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
393
388
  target: str = "target",
394
389
  quantile_levels: Sequence[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
395
390
  eval_metric: Union[str, TimeSeriesScorer, None] = None,
396
- eval_metric_seasonal_period: Optional[int] = None,
397
391
  ):
392
+ # TODO: make freq a required argument in AbstractTimeSeriesModel
398
393
  super().__init__(
399
394
  path=path,
400
395
  name=name,
@@ -405,12 +400,10 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
405
400
  target=target,
406
401
  quantile_levels=quantile_levels,
407
402
  eval_metric=eval_metric,
408
- eval_metric_seasonal_period=eval_metric_seasonal_period,
409
403
  )
410
404
  self.target_scaler: Optional[TargetScaler]
411
405
  self.covariate_scaler: Optional[CovariateScaler]
412
406
  self.covariate_regressor: Optional[CovariateRegressor]
413
- self._initialize_transforms_and_regressor()
414
407
 
415
408
  def _initialize_transforms_and_regressor(self) -> None:
416
409
  self.target_scaler = get_target_scaler(self.get_hyperparameters().get("target_scaler"), target=self.target)
@@ -474,6 +467,7 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
474
467
  The fitted model object
475
468
  """
476
469
  start_time = time.monotonic()
470
+ self._initialize_transforms_and_regressor()
477
471
 
478
472
  if self.target_scaler is not None:
479
473
  train_data = self.target_scaler.fit_transform(train_data)
@@ -699,19 +693,15 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
699
693
  self,
700
694
  data: TimeSeriesDataFrame,
701
695
  predictions: TimeSeriesDataFrame,
702
- metric: Optional[str] = None,
703
696
  ) -> float:
704
697
  """Compute the score measuring how well the predictions align with the data."""
705
- eval_metric = self.eval_metric if metric is None else check_get_evaluation_metric(metric)
706
- return eval_metric.score(
698
+ return self.eval_metric.score(
707
699
  data=data,
708
700
  predictions=predictions,
709
- prediction_length=self.prediction_length,
710
701
  target=self.target,
711
- seasonal_period=self.eval_metric_seasonal_period,
712
702
  )
713
703
 
714
- def score(self, data: TimeSeriesDataFrame, metric: Optional[str] = None) -> float:
704
+ def score(self, data: TimeSeriesDataFrame) -> float:
715
705
  """Return the evaluation scores for given metric and dataset. The last
716
706
  `self.prediction_length` time steps of each time series in the input data set
717
707
  will be held out and used for computing the evaluation score. Time series
@@ -721,9 +711,6 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
721
711
  ----------
722
712
  data: TimeSeriesDataFrame
723
713
  Dataset used for scoring.
724
- metric: str
725
- String identifier of evaluation metric to use, from one of
726
- `autogluon.timeseries.utils.metric_utils.AVAILABLE_METRICS`.
727
714
 
728
715
  Returns
729
716
  -------
@@ -735,7 +722,7 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
735
722
  prediction_length=self.prediction_length, known_covariates_names=self.covariate_metadata.known_covariates
736
723
  )
737
724
  predictions = self.predict(past_data, known_covariates=known_covariates)
738
- return self._score_with_predictions(data=data, predictions=predictions, metric=metric)
725
+ return self._score_with_predictions(data=data, predictions=predictions)
739
726
 
740
727
  def score_and_cache_oof(
741
728
  self,
@@ -2,15 +2,17 @@ import logging
2
2
  import math
3
3
  import os
4
4
  import time
5
- from typing import Any, Dict, List, Optional, Tuple
5
+ from typing import Any, Callable, Collection, Dict, List, Optional, Tuple, Union
6
6
 
7
7
  import numpy as np
8
8
  import pandas as pd
9
9
  from sklearn.base import BaseEstimator
10
+ from typing_extensions import Self
10
11
 
11
12
  import autogluon.core as ag
12
13
  from autogluon.tabular import TabularPredictor
13
14
  from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TIMESTAMP, TimeSeriesDataFrame
15
+ from autogluon.timeseries.metrics.abstract import TimeSeriesScorer
14
16
  from autogluon.timeseries.metrics.utils import in_sample_squared_seasonal_error
15
17
  from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
16
18
  from autogluon.timeseries.models.local import SeasonalNaiveModel
@@ -29,17 +31,21 @@ logger = logging.getLogger(__name__)
29
31
  class TabularEstimator(BaseEstimator):
30
32
  """Scikit-learn compatible interface for TabularPredictor."""
31
33
 
32
- def __init__(self, predictor_init_kwargs: Optional[dict] = None, predictor_fit_kwargs: Optional[dict] = None):
34
+ def __init__(
35
+ self,
36
+ predictor_init_kwargs: Optional[Dict[str, Any]] = None,
37
+ predictor_fit_kwargs: Optional[Dict[str, Any]] = None,
38
+ ):
33
39
  self.predictor_init_kwargs = predictor_init_kwargs if predictor_init_kwargs is not None else {}
34
40
  self.predictor_fit_kwargs = predictor_fit_kwargs if predictor_fit_kwargs is not None else {}
35
41
 
36
- def get_params(self, deep: bool = True) -> dict:
42
+ def get_params(self, deep: bool = True) -> Dict[str, Any]:
37
43
  return {
38
44
  "predictor_init_kwargs": self.predictor_init_kwargs,
39
45
  "predictor_fit_kwargs": self.predictor_fit_kwargs,
40
46
  }
41
47
 
42
- def fit(self, X: pd.DataFrame, y: pd.Series) -> "TabularEstimator":
48
+ def fit(self, X: pd.DataFrame, y: pd.Series) -> Self:
43
49
  assert isinstance(X, pd.DataFrame) and isinstance(y, pd.Series)
44
50
  df = pd.concat([X, y.rename(MLF_TARGET).to_frame()], axis=1)
45
51
  self.predictor = TabularPredictor(**self.predictor_init_kwargs)
@@ -49,7 +55,7 @@ class TabularEstimator(BaseEstimator):
49
55
 
50
56
  def predict(self, X: pd.DataFrame) -> np.ndarray:
51
57
  assert isinstance(X, pd.DataFrame)
52
- return self.predictor.predict(X).values
58
+ return self.predictor.predict(X).values # type: ignore
53
59
 
54
60
 
55
61
  class AbstractMLForecastModel(AbstractTimeSeriesModel):
@@ -62,9 +68,9 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
62
68
  prediction_length: int = 1,
63
69
  path: Optional[str] = None,
64
70
  name: Optional[str] = None,
65
- eval_metric: str = None,
66
- hyperparameters: Dict[str, Any] = None,
67
- **kwargs, # noqa
71
+ eval_metric: Optional[Union[str, TimeSeriesScorer]] = None,
72
+ hyperparameters: Optional[Dict[str, Any]] = None,
73
+ **kwargs,
68
74
  ):
69
75
  super().__init__(
70
76
  path=path,
@@ -80,14 +86,16 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
80
86
 
81
87
  self._sum_of_differences: int = 0 # number of time steps removed from each series by differencing
82
88
  self._max_ts_length: Optional[int] = None
83
- self._target_lags: Optional[List[int]] = None
84
- self._date_features: Optional[List[str]] = None
85
- self._mlf: Optional[MLForecast] = None
89
+ self._target_lags: np.ndarray
90
+ self._date_features: List[Callable]
91
+ self._mlf: MLForecast
86
92
  self._scaler: Optional[BaseTargetTransform] = None
87
- self._residuals_std_per_item: Optional[pd.Series] = None
93
+ self._residuals_std_per_item: pd.Series
88
94
  self._train_target_median: Optional[float] = None
89
95
  self._non_boolean_real_covariates: List[str] = []
90
96
 
97
+ def _initialize_transforms_and_regressor(self):
98
+ super()._initialize_transforms_and_regressor()
91
99
  # Do not create a scaler in the model, scaler will be passed to MLForecast
92
100
  self.target_scaler = None
93
101
 
@@ -95,20 +103,23 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
95
103
  def tabular_predictor_path(self) -> str:
96
104
  return os.path.join(self.path, "tabular_predictor")
97
105
 
98
- def save(self, path: str = None, verbose: bool = True) -> str:
106
+ def save(self, path: Optional[str] = None, verbose: bool = True) -> str:
99
107
  assert "mean" in self._mlf.models_, "TabularPredictor must be trained before saving"
100
- tabular_predictor = self._mlf.models_["mean"].predictor
101
- self._mlf.models_["mean"].predictor = None
108
+
109
+ mean_estimator = self._mlf.models_["mean"]
110
+ assert isinstance(mean_estimator, TabularEstimator)
111
+
112
+ tabular_predictor = mean_estimator.predictor
113
+ mean_estimator.predictor = None # type: ignore
102
114
  save_path = super().save(path=path, verbose=verbose)
103
- self._mlf.models_["mean"].predictor = tabular_predictor
115
+ mean_estimator.predictor = tabular_predictor
104
116
  return save_path
105
117
 
106
118
  @classmethod
107
- def load(
108
- cls, path: str, reset_paths: bool = True, load_oof: bool = False, verbose: bool = True
109
- ) -> "AbstractTimeSeriesModel":
119
+ def load(cls, path: str, reset_paths: bool = True, load_oof: bool = False, verbose: bool = True) -> Self:
110
120
  model = super().load(path=path, reset_paths=reset_paths, load_oof=load_oof, verbose=verbose)
111
121
  assert "mean" in model._mlf.models_, "Loaded model doesn't have a trained TabularPredictor"
122
+ assert isinstance(model._mlf.models_["mean"], TabularEstimator)
112
123
  model._mlf.models_["mean"].predictor = TabularPredictor.load(model.tabular_predictor_path)
113
124
  return model
114
125
 
@@ -131,24 +142,27 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
131
142
  data[self.target] = data[self.target].fillna(value=self._train_target_median)
132
143
  return data, known_covariates
133
144
 
134
- def _get_extra_tabular_init_kwargs(self) -> dict:
145
+ def _get_extra_tabular_init_kwargs(self) -> Dict[str, Any]:
135
146
  raise NotImplementedError
136
147
 
137
- def get_hyperparameters(self) -> dict:
138
- model_params = super().get_hyperparameters().copy()
139
- model_params.setdefault("max_num_items", 20_000)
140
- model_params.setdefault("max_num_samples", 1_000_000)
141
- model_params.setdefault("tabular_hyperparameters", {"GBM": {}})
142
- model_params.setdefault("tabular_fit_kwargs", {})
143
- return model_params
148
+ def _get_default_hyperparameters(self) -> Dict[str, Any]:
149
+ return {
150
+ "max_num_items": 20_000,
151
+ "max_num_samples": 1_000_000,
152
+ "tabular_hyperparameters": {"GBM": {}},
153
+ "tabular_fit_kwargs": {},
154
+ }
144
155
 
145
- def _get_mlforecast_init_args(self, train_data: TimeSeriesDataFrame, model_params: dict) -> dict:
156
+ def _get_mlforecast_init_args(
157
+ self, train_data: TimeSeriesDataFrame, model_params: Dict[str, Any]
158
+ ) -> Dict[str, Any]:
146
159
  from mlforecast.target_transforms import Differences
147
160
 
148
161
  from .transforms import MLForecastScaler
149
162
 
150
163
  lags = model_params.get("lags")
151
164
  if lags is None:
165
+ assert self.freq is not None
152
166
  lags = get_lags_for_frequency(self.freq)
153
167
  self._target_lags = np.array(sorted(set(lags)), dtype=np.int64)
154
168
 
@@ -159,6 +173,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
159
173
 
160
174
  target_transforms = []
161
175
  differences = model_params.get("differences")
176
+ assert isinstance(differences, Collection)
162
177
 
163
178
  ts_lengths = train_data.num_timesteps_per_item()
164
179
  required_ts_length = sum(differences) + 1
@@ -196,7 +211,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
196
211
  return df
197
212
 
198
213
  @staticmethod
199
- def _shorten_all_series(mlforecast_df: pd.DataFrame, max_length: int):
214
+ def _shorten_all_series(mlforecast_df: pd.DataFrame, max_length: int) -> pd.DataFrame:
200
215
  logger.debug(f"Shortening all series to at most {max_length}")
201
216
  return mlforecast_df.groupby(MLF_ITEMID, as_index=False, sort=False).tail(max_length)
202
217
 
@@ -231,7 +246,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
231
246
  # Unless we set static_features=[], MLForecast interprets all known covariates as static features
232
247
  df = self._mlf.preprocess(mlforecast_df, dropna=False, static_features=[])
233
248
  # df.query results in 2x memory saving compared to df.dropna(subset="y")
234
- df = df.query("y.notnull()")
249
+ df = df.query("y.notnull()") # type: ignore
235
250
 
236
251
  df = self._mask_df(df)
237
252
 
@@ -250,12 +265,12 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
250
265
  val_df = grouped_df.tail(val_rows_per_item)
251
266
  logger.debug(f"train_df shape: {train_df.shape}, val_df shape: {val_df.shape}")
252
267
 
253
- return train_df.drop(columns=[MLF_TIMESTAMP]), val_df.drop(columns=[MLF_TIMESTAMP])
268
+ return train_df.drop(columns=[MLF_TIMESTAMP]), val_df.drop(columns=[MLF_TIMESTAMP]) # type: ignore
254
269
 
255
270
  def _to_mlforecast_df(
256
271
  self,
257
272
  data: TimeSeriesDataFrame,
258
- static_features: pd.DataFrame,
273
+ static_features: Optional[pd.DataFrame],
259
274
  include_target: bool = True,
260
275
  ) -> pd.DataFrame:
261
276
  """Convert TimeSeriesDataFrame to a format expected by MLForecast methods `predict` and `preprocess`.
@@ -288,7 +303,9 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
288
303
  self,
289
304
  train_data: TimeSeriesDataFrame,
290
305
  val_data: Optional[TimeSeriesDataFrame] = None,
291
- time_limit: Optional[int] = None,
306
+ time_limit: Optional[float] = None,
307
+ num_cpus: Optional[int] = None,
308
+ num_gpus: Optional[int] = None,
292
309
  verbosity: int = 2,
293
310
  **kwargs,
294
311
  ) -> None:
@@ -304,6 +321,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
304
321
  model_params = self.get_hyperparameters()
305
322
 
306
323
  mlforecast_init_args = self._get_mlforecast_init_args(train_data, model_params)
324
+ assert self.freq is not None
307
325
  self._mlf = MLForecast(models={}, freq=self.freq, **mlforecast_init_args)
308
326
 
309
327
  # We generate train/val splits from train_data and ignore val_data to avoid overfitting
@@ -327,10 +345,10 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
327
345
  **model_params["tabular_fit_kwargs"],
328
346
  },
329
347
  )
330
- self._mlf.models = {"mean": estimator}
348
+ self._mlf.models = {"mean": estimator} # type: ignore
331
349
 
332
350
  with warning_filter():
333
- self._mlf.fit_models(X=train_df.drop(columns=[MLF_TARGET, MLF_ITEMID]), y=train_df[MLF_TARGET])
351
+ self._mlf.fit_models(X=train_df.drop(columns=[MLF_TARGET, MLF_ITEMID]), y=train_df[MLF_TARGET]) # type: ignore
334
352
 
335
353
  self._save_residuals_std(val_df)
336
354
 
@@ -340,14 +358,19 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
340
358
  Saves per-item residuals to `self.residuals_std_per_item`.
341
359
  """
342
360
  residuals_df = val_df[[MLF_ITEMID, MLF_TARGET]]
343
- residuals_df = residuals_df.assign(y_pred=self._mlf.models_["mean"].predict(val_df))
361
+ mean_estimator = self._mlf.models_["mean"]
362
+ assert isinstance(mean_estimator, TabularEstimator)
363
+
364
+ residuals_df = residuals_df.assign(y_pred=mean_estimator.predict(val_df))
344
365
  if self._scaler is not None:
345
366
  # Scaler expects to find column MLF_TIMESTAMP even though it's not used - fill with dummy
346
- residuals_df = residuals_df.assign(**{MLF_TIMESTAMP: 1})
367
+ residuals_df = residuals_df.assign(**{MLF_TIMESTAMP: np.datetime64("2010-01-01")})
347
368
  residuals_df = self._scaler.inverse_transform(residuals_df)
369
+
370
+ assert isinstance(residuals_df, pd.DataFrame)
348
371
  residuals = residuals_df[MLF_TARGET] - residuals_df["y_pred"]
349
372
  self._residuals_std_per_item = (
350
- residuals.pow(2.0).groupby(val_df[MLF_ITEMID].values, sort=False).mean().pow(0.5)
373
+ residuals.pow(2.0).groupby(val_df[MLF_ITEMID].values, sort=False).mean().pow(0.5) # type: ignore
351
374
  )
352
375
 
353
376
  def _remove_short_ts_and_generate_fallback_forecast(
@@ -395,7 +418,9 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
395
418
  forecast_for_short_series = None
396
419
  return data_long, known_covariates_long, forecast_for_short_series
397
420
 
398
- def _add_gaussian_quantiles(self, predictions: pd.DataFrame, repeated_item_ids: pd.Series, past_target: pd.Series):
421
+ def _add_gaussian_quantiles(
422
+ self, predictions: pd.DataFrame, repeated_item_ids: pd.Series, past_target: pd.Series
423
+ ) -> pd.DataFrame:
399
424
  """
400
425
  Add quantile levels assuming that residuals follow normal distribution
401
426
  """
@@ -410,9 +435,9 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
410
435
  # Use in-sample seasonal error in for items not seen during fit
411
436
  items_not_seen_during_fit = residuals_std_per_timestep.index[residuals_std_per_timestep.isna()].unique()
412
437
  if len(items_not_seen_during_fit) > 0:
413
- scale_for_new_items: pd.Series = np.sqrt(
414
- in_sample_squared_seasonal_error(y_past=past_target.loc[items_not_seen_during_fit])
415
- )
438
+ scale_for_new_items: pd.Series = in_sample_squared_seasonal_error(
439
+ y_past=past_target.loc[items_not_seen_during_fit]
440
+ ).pow(0.5)
416
441
  residuals_std_per_timestep = residuals_std_per_timestep.fillna(scale_for_new_items)
417
442
 
418
443
  std_per_timestep = residuals_std_per_timestep * normal_scale_per_timestep
@@ -420,7 +445,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
420
445
  predictions[str(q)] = predictions["mean"] + norm.ppf(q) * std_per_timestep.to_numpy()
421
446
  return predictions
422
447
 
423
- def _more_tags(self) -> dict:
448
+ def _more_tags(self) -> Dict[str, Any]:
424
449
  return {"allow_nan": True, "can_refit_full": True}
425
450
 
426
451
 
@@ -473,7 +498,7 @@ class DirectTabularModel(AbstractMLForecastModel):
473
498
  def is_quantile_model(self) -> bool:
474
499
  return self.eval_metric.needs_quantile
475
500
 
476
- def get_hyperparameters(self) -> dict:
501
+ def get_hyperparameters(self) -> Dict[str, Any]:
477
502
  model_params = super().get_hyperparameters()
478
503
  model_params.setdefault("target_scaler", "mean_abs")
479
504
  if "differences" not in model_params or model_params["differences"] is None:
@@ -512,6 +537,7 @@ class DirectTabularModel(AbstractMLForecastModel):
512
537
  )
513
538
  if len(data) == 0:
514
539
  # All time series are too short for chosen differences
540
+ assert forecast_for_short_series is not None
515
541
  return forecast_for_short_series
516
542
 
517
543
  if known_covariates is not None:
@@ -522,15 +548,19 @@ class DirectTabularModel(AbstractMLForecastModel):
522
548
  # MLForecast raises exception of target contains NaN. We use inf as placeholder, replace them by NaN afterwards
523
549
  data_future[self.target] = float("inf")
524
550
  data_extended = pd.concat([data, data_future])
525
- mlforecast_df = self._to_mlforecast_df(data_extended, data.static_features)
551
+ mlforecast_df = self._to_mlforecast_df(data_extended, data.static_features) # type: ignore
526
552
  if self._max_ts_length is not None:
527
553
  # We appended `prediction_length` time steps to each series, so increase length
528
554
  mlforecast_df = self._shorten_all_series(mlforecast_df, self._max_ts_length + self.prediction_length)
529
555
  df = self._mlf.preprocess(mlforecast_df, dropna=False, static_features=[])
556
+ assert isinstance(df, pd.DataFrame)
557
+
530
558
  df = df.groupby(MLF_ITEMID, sort=False).tail(self.prediction_length)
531
559
  df = df.replace(float("inf"), float("nan"))
532
560
 
533
- raw_predictions = self._mlf.models_["mean"].predict(df)
561
+ mean_estimator = self._mlf.models_["mean"]
562
+ assert isinstance(mean_estimator, TabularEstimator)
563
+ raw_predictions = mean_estimator.predict(df)
534
564
  predictions = self._postprocess_predictions(raw_predictions, repeated_item_ids=df[MLF_ITEMID])
535
565
  # Paste columns one by one to preserve dtypes
536
566
  predictions[MLF_ITEMID] = df[MLF_ITEMID].values
@@ -542,6 +572,7 @@ class DirectTabularModel(AbstractMLForecastModel):
542
572
  if self._max_ts_length is not None:
543
573
  mlforecast_df_past = self._shorten_all_series(mlforecast_df_past, self._max_ts_length)
544
574
  self._mlf.preprocess(mlforecast_df_past, static_features=[], dropna=False)
575
+ assert self._mlf.ts.target_transforms is not None
545
576
  for tfm in self._mlf.ts.target_transforms[::-1]:
546
577
  predictions = apply_inverse_transform(predictions, transform=tfm)
547
578
 
@@ -549,25 +580,30 @@ class DirectTabularModel(AbstractMLForecastModel):
549
580
  predictions = self._add_gaussian_quantiles(
550
581
  predictions, repeated_item_ids=predictions[MLF_ITEMID], past_target=data[self.target]
551
582
  )
552
- predictions = TimeSeriesDataFrame(predictions.rename(columns={MLF_ITEMID: ITEMID, MLF_TIMESTAMP: TIMESTAMP}))
583
+ predictions_tsdf: TimeSeriesDataFrame = TimeSeriesDataFrame(
584
+ predictions.rename(columns={MLF_ITEMID: ITEMID, MLF_TIMESTAMP: TIMESTAMP})
585
+ )
553
586
 
554
587
  if forecast_for_short_series is not None:
555
- predictions = pd.concat([predictions, forecast_for_short_series])
556
- predictions = predictions.reindex(original_item_id_order, level=ITEMID)
557
- return predictions
588
+ predictions_tsdf = pd.concat([predictions_tsdf, forecast_for_short_series]) # type: ignore
589
+ predictions_tsdf = predictions_tsdf.reindex(original_item_id_order, level=ITEMID)
558
590
 
559
- def _postprocess_predictions(self, predictions: np.ndarray, repeated_item_ids: pd.Series) -> pd.DataFrame:
591
+ return predictions_tsdf
592
+
593
+ def _postprocess_predictions(
594
+ self, predictions: Union[np.ndarray, pd.Series], repeated_item_ids: pd.Series
595
+ ) -> pd.DataFrame:
560
596
  if self.is_quantile_model:
561
- predictions = pd.DataFrame(predictions, columns=[str(q) for q in self.quantile_levels])
562
- predictions.values.sort(axis=1)
563
- predictions["mean"] = predictions["0.5"]
597
+ predictions_df = pd.DataFrame(predictions, columns=[str(q) for q in self.quantile_levels])
598
+ predictions_df.values.sort(axis=1)
599
+ predictions_df["mean"] = predictions_df["0.5"]
564
600
  else:
565
- predictions = pd.DataFrame(predictions, columns=["mean"])
601
+ predictions_df = pd.DataFrame(predictions, columns=["mean"])
566
602
 
567
- column_order = ["mean"] + [col for col in predictions.columns if col != "mean"]
568
- return predictions[column_order]
603
+ column_order = ["mean"] + [col for col in predictions_df.columns if col != "mean"]
604
+ return predictions_df[column_order]
569
605
 
570
- def _get_extra_tabular_init_kwargs(self) -> dict:
606
+ def _get_extra_tabular_init_kwargs(self) -> Dict[str, Any]:
571
607
  if self.is_quantile_model:
572
608
  return {
573
609
  "problem_type": ag.constants.QUANTILE,
@@ -622,7 +658,7 @@ class RecursiveTabularModel(AbstractMLForecastModel):
622
658
  end of each time series).
623
659
  """
624
660
 
625
- def get_hyperparameters(self) -> dict:
661
+ def get_hyperparameters(self) -> Dict[str, Any]:
626
662
  model_params = super().get_hyperparameters()
627
663
  model_params.setdefault("target_scaler", "standard")
628
664
  if "differences" not in model_params or model_params["differences"] is None:
@@ -641,6 +677,7 @@ class RecursiveTabularModel(AbstractMLForecastModel):
641
677
  )
642
678
  if len(data) == 0:
643
679
  # All time series are too short for chosen differences
680
+ assert forecast_for_short_series is not None
644
681
  return forecast_for_short_series
645
682
 
646
683
  new_df = self._to_mlforecast_df(data, data.static_features)
@@ -648,7 +685,9 @@ class RecursiveTabularModel(AbstractMLForecastModel):
648
685
  new_df = self._shorten_all_series(new_df, self._max_ts_length)
649
686
  if known_covariates is None:
650
687
  future_index = self.get_forecast_horizon_index(data)
651
- known_covariates = pd.DataFrame(columns=[self.target], index=future_index, dtype="float32")
688
+ known_covariates = TimeSeriesDataFrame(
689
+ pd.DataFrame(columns=[self.target], index=future_index, dtype="float32")
690
+ )
652
691
  X_df = self._to_mlforecast_df(known_covariates, data.static_features, include_target=False)
653
692
  # If both covariates & static features are missing, set X_df = None to avoid exception from MLForecast
654
693
  if len(X_df.columns.difference([MLF_ITEMID, MLF_TIMESTAMP])) == 0:
@@ -659,18 +698,19 @@ class RecursiveTabularModel(AbstractMLForecastModel):
659
698
  new_df=new_df,
660
699
  X_df=X_df,
661
700
  )
662
- predictions = raw_predictions.rename(columns={MLF_ITEMID: ITEMID, MLF_TIMESTAMP: TIMESTAMP})
663
- predictions = TimeSeriesDataFrame(
701
+ assert isinstance(raw_predictions, pd.DataFrame)
702
+ raw_predictions = raw_predictions.rename(columns={MLF_ITEMID: ITEMID, MLF_TIMESTAMP: TIMESTAMP})
703
+
704
+ predictions: TimeSeriesDataFrame = TimeSeriesDataFrame(
664
705
  self._add_gaussian_quantiles(
665
- predictions, repeated_item_ids=predictions[ITEMID], past_target=data[self.target]
706
+ raw_predictions, repeated_item_ids=raw_predictions[ITEMID], past_target=data[self.target]
666
707
  )
667
708
  )
668
-
669
709
  if forecast_for_short_series is not None:
670
- predictions = pd.concat([predictions, forecast_for_short_series])
710
+ predictions = pd.concat([predictions, forecast_for_short_series]) # type: ignore
671
711
  return predictions.reindex(original_item_id_order, level=ITEMID)
672
712
 
673
- def _get_extra_tabular_init_kwargs(self) -> dict:
713
+ def _get_extra_tabular_init_kwargs(self) -> Dict[str, Any]:
674
714
  return {
675
715
  "problem_type": ag.constants.REGRESSION,
676
716
  "eval_metric": self.eval_metric.equivalent_tabular_regression_metric or "mean_absolute_error",