autogluon.timeseries 1.0.1b20240407__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (31) hide show
  1. autogluon/timeseries/configs/presets_configs.py +2 -2
  2. autogluon/timeseries/dataset/ts_dataframe.py +9 -9
  3. autogluon/timeseries/learner.py +8 -3
  4. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +3 -2
  5. autogluon/timeseries/models/autogluon_tabular/mlforecast.py +12 -3
  6. autogluon/timeseries/models/chronos/model.py +27 -44
  7. autogluon/timeseries/models/chronos/utils.py +66 -0
  8. autogluon/timeseries/models/gluonts/abstract_gluonts.py +22 -27
  9. autogluon/timeseries/models/gluonts/torch/models.py +0 -2
  10. autogluon/timeseries/models/local/abstract_local_model.py +14 -11
  11. autogluon/timeseries/models/multi_window/multi_window_model.py +1 -0
  12. autogluon/timeseries/models/presets.py +23 -13
  13. autogluon/timeseries/predictor.py +19 -7
  14. autogluon/timeseries/trainer/abstract_trainer.py +1 -1
  15. autogluon/timeseries/utils/datetime/base.py +38 -20
  16. autogluon/timeseries/utils/datetime/lags.py +13 -12
  17. autogluon/timeseries/utils/datetime/seasonality.py +11 -11
  18. autogluon/timeseries/utils/datetime/time_features.py +12 -11
  19. autogluon/timeseries/utils/features.py +12 -7
  20. autogluon/timeseries/utils/forecast.py +7 -2
  21. autogluon/timeseries/utils/warning_filters.py +1 -3
  22. autogluon/timeseries/version.py +1 -1
  23. {autogluon.timeseries-1.0.1b20240407.dist-info → autogluon.timeseries-1.1.0.dist-info}/METADATA +13 -14
  24. {autogluon.timeseries-1.0.1b20240407.dist-info → autogluon.timeseries-1.1.0.dist-info}/RECORD +31 -30
  25. /autogluon.timeseries-1.0.1b20240407-py3.8-nspkg.pth → /autogluon.timeseries-1.1.0-py3.8-nspkg.pth +0 -0
  26. {autogluon.timeseries-1.0.1b20240407.dist-info → autogluon.timeseries-1.1.0.dist-info}/LICENSE +0 -0
  27. {autogluon.timeseries-1.0.1b20240407.dist-info → autogluon.timeseries-1.1.0.dist-info}/NOTICE +0 -0
  28. {autogluon.timeseries-1.0.1b20240407.dist-info → autogluon.timeseries-1.1.0.dist-info}/WHEEL +0 -0
  29. {autogluon.timeseries-1.0.1b20240407.dist-info → autogluon.timeseries-1.1.0.dist-info}/namespace_packages.txt +0 -0
  30. {autogluon.timeseries-1.0.1b20240407.dist-info → autogluon.timeseries-1.1.0.dist-info}/top_level.txt +0 -0
  31. {autogluon.timeseries-1.0.1b20240407.dist-info → autogluon.timeseries-1.1.0.dist-info}/zip-safe +0 -0
@@ -33,13 +33,13 @@ TIMESERIES_PRESETS_CONFIGS = dict(
33
33
  chronos_ensemble={
34
34
  "hyperparameters": {
35
35
  "Chronos": {"model_path": "small"},
36
- **get_default_hps("default"),
36
+ **get_default_hps("light_inference"),
37
37
  }
38
38
  },
39
39
  chronos_large_ensemble={
40
40
  "hyperparameters": {
41
41
  "Chronos": {"model_path": "large", "batch_size": 8},
42
- **get_default_hps("default"),
42
+ **get_default_hps("light_inference"),
43
43
  }
44
44
  },
45
45
  )
@@ -134,7 +134,7 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
134
134
  ----------
135
135
  freq : str
136
136
  A pandas-compatible string describing the frequency of the time series. For example ``"D"`` for daily data,
137
- ``"H"`` for hourly data, etc. This attribute is determined automatically based on the timestamps. For the full
137
+ ``"h"`` for hourly data, etc. This attribute is determined automatically based on the timestamps. For the full
138
138
  list of possible values, see `pandas documentation <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_.
139
139
  num_items : int
140
140
  Number of items (time series) in the data set.
@@ -759,12 +759,6 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
759
759
  2019-02-07 4.0
760
760
 
761
761
  """
762
- if self.freq is None:
763
- raise ValueError(
764
- "Please make sure that all time series have a regular index before calling `fill_missing_values`"
765
- "(for example, using the `convert_frequency` method)."
766
- )
767
-
768
762
  # Convert to pd.DataFrame for faster processing
769
763
  df = pd.DataFrame(self)
770
764
 
@@ -772,6 +766,12 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
772
766
  if not df.isna().any(axis=None):
773
767
  return self
774
768
 
769
+ if not self.index.is_monotonic_increasing:
770
+ logger.warning(
771
+ "Trying to fill missing values in an unsorted dataframe. "
772
+ "It is highly recommended to call `ts_df.sort_index()` before calling `ts_df.fill_missing_values()`"
773
+ )
774
+
775
775
  grouped_df = df.groupby(level=ITEMID, sort=False, group_keys=False)
776
776
  if method == "auto":
777
777
  filled_df = grouped_df.ffill()
@@ -961,12 +961,12 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
961
961
  2021-06-30 6.0
962
962
  2021-09-30 7.0
963
963
  2021-12-31 8.0
964
- >>> ts_df.convert_frequency("Y")
964
+ >>> ts_df.convert_frequency("YE")
965
965
  target
966
966
  item_id timestamp
967
967
  0 2020-12-31 2.5
968
968
  2021-12-31 6.5
969
- >>> ts_df.convert_frequency("Y", agg_numeric="sum")
969
+ >>> ts_df.convert_frequency("YE", agg_numeric="sum")
970
970
  target
971
971
  item_id timestamp
972
972
  0 2020-12-31 10.0
@@ -43,6 +43,7 @@ class TimeSeriesLearner(AbstractLearner):
43
43
  self.prediction_length = prediction_length
44
44
  self.quantile_levels = kwargs.get("quantile_levels", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
45
45
  self.cache_predictions = cache_predictions
46
+ self.freq: Optional[str] = None
46
47
 
47
48
  self.feature_generator = TimeSeriesFeatureGenerator(
48
49
  target=self.target, known_covariates_names=self.known_covariates_names
@@ -87,6 +88,8 @@ class TimeSeriesLearner(AbstractLearner):
87
88
  if val_data is not None:
88
89
  val_data = self.feature_generator.transform(val_data, data_frame_name="tuning_data")
89
90
 
91
+ self.freq = train_data.freq
92
+
90
93
  trainer_init_kwargs = kwargs.copy()
91
94
  trainer_init_kwargs.update(
92
95
  dict(
@@ -155,7 +158,9 @@ class TimeSeriesLearner(AbstractLearner):
155
158
  f"known_covariates are missing information for the following item_ids: {reprlib.repr(missing_item_ids.to_list())}."
156
159
  )
157
160
 
158
- forecast_index = get_forecast_horizon_index_ts_dataframe(data, prediction_length=self.prediction_length)
161
+ forecast_index = get_forecast_horizon_index_ts_dataframe(
162
+ data, prediction_length=self.prediction_length, freq=self.freq
163
+ )
159
164
  try:
160
165
  known_covariates = known_covariates.loc[forecast_index]
161
166
  except KeyError:
@@ -245,8 +250,8 @@ class TimeSeriesLearner(AbstractLearner):
245
250
  raise ValueError(f"Feature {fn} not found in covariate metadata or the dataset.")
246
251
 
247
252
  if len(set(features)) < len(features):
248
- logger.warning(
249
- "Duplicate feature names provided to compute feature importance. This will lead to unexpected behavior. "
253
+ raise ValueError(
254
+ "Duplicate feature names provided to compute feature importance. "
250
255
  "Please provide unique feature names across both static features and covariates."
251
256
  )
252
257
 
@@ -31,7 +31,7 @@ class AbstractTimeSeriesModel(AbstractModel):
31
31
  If None, a new unique time-stamped directory is chosen.
32
32
  freq: str
33
33
  Frequency string (cf. gluonts frequency strings) describing the frequency
34
- of the time series data. For example, "H" for hourly or "D" for daily data.
34
+ of the time series data. For example, "h" for hourly or "D" for daily data.
35
35
  prediction_length: int
36
36
  Length of the prediction horizon, i.e., the number of time steps the model
37
37
  is fit to forecast.
@@ -373,13 +373,14 @@ class AbstractTimeSeriesModel(AbstractModel):
373
373
  val_data: TimeSeriesDataFrame,
374
374
  store_val_score: bool = False,
375
375
  store_predict_time: bool = False,
376
+ **predict_kwargs,
376
377
  ) -> None:
377
378
  """Compute val_score, predict_time and cache out-of-fold (OOF) predictions."""
378
379
  past_data, known_covariates = val_data.get_model_inputs_for_scoring(
379
380
  prediction_length=self.prediction_length, known_covariates_names=self.metadata.known_covariates
380
381
  )
381
382
  predict_start_time = time.time()
382
- oof_predictions = self.predict(past_data, known_covariates=known_covariates)
383
+ oof_predictions = self.predict(past_data, known_covariates=known_covariates, **predict_kwargs)
383
384
  self._oof_predictions = [oof_predictions]
384
385
  if store_predict_time:
385
386
  self.predict_time = time.time() - predict_start_time
@@ -252,6 +252,15 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
252
252
  if static_features is not None:
253
253
  df = pd.merge(df, static_features, how="left", on=ITEMID, suffixes=(None, "_static_feat"))
254
254
 
255
+ for col in self.metadata.known_covariates_real:
256
+ # Normalize non-boolean features using mean_abs scaling
257
+ if not df[col].isin([0, 1]).all():
258
+ df[f"__scaled_{col}"] = df[col] / df[col].abs().groupby(df[ITEMID]).mean().reindex(df[ITEMID]).values
259
+
260
+ # Convert float64 to float32 to reduce memory usage
261
+ float64_cols = list(df.select_dtypes(include="float64"))
262
+ df[float64_cols] = df[float64_cols].astype("float32")
263
+
255
264
  # We assume that df is sorted by 'unique_id' inside `TimeSeriesPredictor._check_and_prepare_data_frame`
256
265
  return df.rename(columns=column_name_mapping)
257
266
 
@@ -332,7 +341,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
332
341
  Seasonal naive forecast for short series, if there are any in the dataset.
333
342
  """
334
343
  ts_lengths = data.num_timesteps_per_item()
335
- short_series = ts_lengths.index[ts_lengths <= self._sum_of_differences]
344
+ short_series = ts_lengths.index[ts_lengths <= self._sum_of_differences + 1]
336
345
  if len(short_series) > 0:
337
346
  logger.warning(
338
347
  f"Warning: {len(short_series)} time series ({len(short_series) / len(ts_lengths):.1%}) are shorter "
@@ -474,7 +483,7 @@ class DirectTabularModel(AbstractMLForecastModel):
474
483
  if known_covariates is not None:
475
484
  data_future = known_covariates.copy()
476
485
  else:
477
- future_index = get_forecast_horizon_index_ts_dataframe(data, self.prediction_length)
486
+ future_index = get_forecast_horizon_index_ts_dataframe(data, self.prediction_length, freq=self.freq)
478
487
  data_future = pd.DataFrame(columns=[self.target], index=future_index, dtype="float32")
479
488
  # MLForecast raises exception of target contains NaN. We use inf as placeholder, replace them by NaN afterwards
480
489
  data_future[self.target] = float("inf")
@@ -606,7 +615,7 @@ class RecursiveTabularModel(AbstractMLForecastModel):
606
615
  if self._max_ts_length is not None:
607
616
  new_df = self._shorten_all_series(new_df, self._max_ts_length)
608
617
  if known_covariates is None:
609
- future_index = get_forecast_horizon_index_ts_dataframe(data, self.prediction_length)
618
+ future_index = get_forecast_horizon_index_ts_dataframe(data, self.prediction_length, freq=self.freq)
610
619
  known_covariates = pd.DataFrame(columns=[self.target], index=future_index, dtype="float32")
611
620
  X_df = self._to_mlforecast_df(known_covariates, data.static_features, include_target=False)
612
621
  # If both covariates & static features are missing, set X_df = None to avoid exception from MLForecast
@@ -53,42 +53,6 @@ MODEL_ALIASES = {
53
53
  }
54
54
 
55
55
 
56
- class ChronosInferenceDataset:
57
- """A container for time series datasets that implements the ``torch.utils.data.Dataset`` interface"""
58
-
59
- def __init__(
60
- self,
61
- target_df: TimeSeriesDataFrame,
62
- context_length: int,
63
- target_column: str = "target",
64
- ):
65
- assert context_length > 0
66
- self.context_length = context_length
67
- self.target_array = target_df[target_column].to_numpy(dtype=np.float32)
68
- self.freq = target_df.freq
69
-
70
- # store pointer to start:end of each time series
71
- cum_sizes = target_df.num_timesteps_per_item().values.cumsum()
72
- self.indptr = np.append(0, cum_sizes).astype(np.int32)
73
-
74
- def __len__(self):
75
- return len(self.indptr) - 1 # noqa
76
-
77
- def _get_context(self, a: np.ndarray, pad_value=np.nan):
78
- a = a[-self.context_length :]
79
- pad_size = self.context_length - len(a)
80
- if pad_size > 0:
81
- pad = np.full(shape=(pad_size,), fill_value=pad_value)
82
- a = np.concatenate((pad, a))
83
- return a
84
-
85
- def __getitem__(self, idx) -> np.ndarray:
86
- start_idx = self.indptr[idx]
87
- end_idx = self.indptr[idx + 1]
88
-
89
- return self._get_context(self.target_array[start_idx:end_idx])
90
-
91
-
92
56
  class ChronosModel(AbstractTimeSeriesModel):
93
57
  """Chronos pretrained time series forecasting models, based on the original
94
58
  `ChronosModel <https://github.com/amazon-science/chronos-forecasting>`_ implementation.
@@ -196,6 +160,7 @@ class ChronosModel(AbstractTimeSeriesModel):
196
160
  )
197
161
 
198
162
  self.model_pipeline: Optional[Any] = None # of type OptimizedChronosPipeline
163
+ self.time_limit: Optional[float] = None
199
164
 
200
165
  def save(self, path: str = None, verbose: bool = True) -> str:
201
166
  pipeline = self.model_pipeline
@@ -288,14 +253,16 @@ class ChronosModel(AbstractTimeSeriesModel):
288
253
  **kwargs,
289
254
  ) -> None:
290
255
  self._check_fit_params()
256
+ self.time_limit = time_limit
291
257
 
292
258
  def _get_inference_data_loader(
293
259
  self,
294
260
  data: TimeSeriesDataFrame,
295
261
  context_length: int,
296
262
  num_workers: int = 0,
263
+ time_limit: Optional[float] = None,
297
264
  ):
298
- import torch
265
+ from .utils import ChronosInferenceDataLoader, ChronosInferenceDataset, timeout_callback
299
266
 
300
267
  chronos_dataset = ChronosInferenceDataset(
301
268
  target_df=data,
@@ -303,11 +270,12 @@ class ChronosModel(AbstractTimeSeriesModel):
303
270
  context_length=context_length,
304
271
  )
305
272
 
306
- return torch.utils.data.DataLoader(
273
+ return ChronosInferenceDataLoader(
307
274
  chronos_dataset,
308
275
  batch_size=self.batch_size,
309
276
  shuffle=False,
310
277
  num_workers=num_workers,
278
+ on_batch=timeout_callback(seconds=time_limit),
311
279
  )
312
280
 
313
281
  def _predict(
@@ -333,6 +301,12 @@ class ChronosModel(AbstractTimeSeriesModel):
333
301
  # load model pipeline to device memory
334
302
  self.load_model_pipeline(context_length=context_length)
335
303
 
304
+ inference_data_loader = self._get_inference_data_loader(
305
+ data=data,
306
+ num_workers=self.data_loader_num_workers,
307
+ context_length=context_length,
308
+ time_limit=kwargs.get("time_limit"),
309
+ )
336
310
  self.model_pipeline.model.eval()
337
311
  with torch.inference_mode():
338
312
  prediction_samples = [
@@ -345,11 +319,7 @@ class ChronosModel(AbstractTimeSeriesModel):
345
319
  .detach()
346
320
  .cpu()
347
321
  .numpy()
348
- for batch in self._get_inference_data_loader(
349
- data=data,
350
- num_workers=self.data_loader_num_workers,
351
- context_length=context_length,
352
- )
322
+ for batch in inference_data_loader
353
323
  ]
354
324
 
355
325
  samples = np.concatenate(prediction_samples, axis=0).swapaxes(1, 2).reshape(-1, self.num_samples)
@@ -360,10 +330,23 @@ class ChronosModel(AbstractTimeSeriesModel):
360
330
  df = pd.DataFrame(
361
331
  np.concatenate([mean, quantiles], axis=1),
362
332
  columns=["mean"] + [str(q) for q in self.quantile_levels],
363
- index=get_forecast_horizon_index_ts_dataframe(data, self.prediction_length),
333
+ index=get_forecast_horizon_index_ts_dataframe(data, self.prediction_length, freq=self.freq),
364
334
  )
365
335
 
366
336
  return TimeSeriesDataFrame(df)
367
337
 
368
338
  def _more_tags(self) -> Dict:
369
339
  return {"allow_nan": True}
340
+
341
+ def score_and_cache_oof(
342
+ self,
343
+ val_data: TimeSeriesDataFrame,
344
+ store_val_score: bool = False,
345
+ store_predict_time: bool = False,
346
+ **predict_kwargs,
347
+ ) -> None:
348
+ # All computation happens during inference, so we provide the time_limit at prediction time
349
+ # TODO: Once custom predict_kwargs is allowed, make sure that `time_limit` is not among the keys
350
+ super().score_and_cache_oof(
351
+ val_data, store_val_score, store_predict_time, time_limit=self.time_limit, **predict_kwargs
352
+ )
@@ -0,0 +1,66 @@
1
+ import time
2
+ from typing import Callable, Optional
3
+
4
+ import numpy as np
5
+ import torch
6
+
7
+ from autogluon.core.utils.exceptions import TimeLimitExceeded
8
+ from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
9
+
10
+
11
+ class ChronosInferenceDataset:
12
+ """A container for time series datasets that implements the ``torch.utils.data.Dataset`` interface"""
13
+
14
+ def __init__(
15
+ self,
16
+ target_df: TimeSeriesDataFrame,
17
+ context_length: int,
18
+ target_column: str = "target",
19
+ ):
20
+ assert context_length > 0
21
+ self.context_length = context_length
22
+ self.target_array = target_df[target_column].to_numpy(dtype=np.float32)
23
+ self.freq = target_df.freq
24
+
25
+ # store pointer to start:end of each time series
26
+ cum_sizes = target_df.num_timesteps_per_item().values.cumsum()
27
+ self.indptr = np.append(0, cum_sizes).astype(np.int32)
28
+
29
+ def __len__(self):
30
+ return len(self.indptr) - 1 # noqa
31
+
32
+ def _get_context(self, a: np.ndarray, pad_value=np.nan):
33
+ a = a[-self.context_length :]
34
+ pad_size = self.context_length - len(a)
35
+ if pad_size > 0:
36
+ pad = np.full(shape=(pad_size,), fill_value=pad_value)
37
+ a = np.concatenate((pad, a))
38
+ return a
39
+
40
+ def __getitem__(self, idx) -> np.ndarray:
41
+ start_idx = self.indptr[idx]
42
+ end_idx = self.indptr[idx + 1]
43
+
44
+ return self._get_context(self.target_array[start_idx:end_idx])
45
+
46
+
47
+ class ChronosInferenceDataLoader(torch.utils.data.DataLoader):
48
+ def __init__(self, *args, **kwargs):
49
+ self.callback: Callable = kwargs.pop("on_batch", lambda: None)
50
+ super().__init__(*args, **kwargs)
51
+
52
+ def __iter__(self):
53
+ for item in super().__iter__():
54
+ yield item
55
+ self.callback()
56
+
57
+
58
+ def timeout_callback(seconds: Optional[float]) -> Callable:
59
+ """Return a callback object that raises an exception if time limit is exceeded."""
60
+ start_time = time.time()
61
+
62
+ def callback() -> None:
63
+ if seconds is not None and time.time() - start_time > seconds:
64
+ raise TimeLimitExceeded
65
+
66
+ return callback
@@ -15,7 +15,6 @@ from gluonts.dataset.field_names import FieldName
15
15
  from gluonts.model.estimator import Estimator as GluonTSEstimator
16
16
  from gluonts.model.forecast import Forecast, QuantileForecast, SampleForecast
17
17
  from gluonts.model.predictor import Predictor as GluonTSPredictor
18
- from pandas.tseries.frequencies import to_offset
19
18
  from sklearn.compose import ColumnTransformer
20
19
  from sklearn.preprocessing import QuantileTransformer, StandardScaler
21
20
 
@@ -37,15 +36,13 @@ logger = logging.getLogger(__name__)
37
36
  gts_logger = logging.getLogger(gluonts.__name__)
38
37
 
39
38
 
40
- GLUONTS_SUPPORTED_OFFSETS = ["Y", "Q", "M", "W", "D", "B", "H", "T", "min", "S"]
41
-
42
-
43
39
  class SimpleGluonTSDataset(GluonTSDataset):
44
40
  """Wrapper for TimeSeriesDataFrame that is compatible with the GluonTS Dataset API."""
45
41
 
46
42
  def __init__(
47
43
  self,
48
44
  target_df: TimeSeriesDataFrame,
45
+ freq: str,
49
46
  target_column: str = "target",
50
47
  feat_static_cat: Optional[np.ndarray] = None,
51
48
  feat_static_real: Optional[np.ndarray] = None,
@@ -57,7 +54,6 @@ class SimpleGluonTSDataset(GluonTSDataset):
57
54
  prediction_length: int = None,
58
55
  ):
59
56
  assert target_df is not None
60
- assert target_df.freq, "Initializing GluonTS data sets without freq is not allowed"
61
57
  # Convert TimeSeriesDataFrame to pd.Series for faster processing
62
58
  self.target_array = target_df[target_column].to_numpy(np.float32)
63
59
  self.feat_static_cat = self._astype(feat_static_cat, dtype=np.int64)
@@ -66,7 +62,7 @@ class SimpleGluonTSDataset(GluonTSDataset):
66
62
  self.feat_dynamic_real = self._astype(feat_dynamic_real, dtype=np.float32)
67
63
  self.past_feat_dynamic_cat = self._astype(past_feat_dynamic_cat, dtype=np.int64)
68
64
  self.past_feat_dynamic_real = self._astype(past_feat_dynamic_real, dtype=np.float32)
69
- self.freq = self._to_gluonts_freq(target_df.freq)
65
+ self.freq = self._get_freq_for_period(freq)
70
66
 
71
67
  # Necessary to compute indptr for known_covariates at prediction time
72
68
  self.includes_future = includes_future
@@ -89,19 +85,22 @@ class SimpleGluonTSDataset(GluonTSDataset):
89
85
  return array.astype(dtype)
90
86
 
91
87
  @staticmethod
92
- def _to_gluonts_freq(freq: str) -> str:
93
- # FIXME: GluonTS expects a frequency string, but only supports a limited number of such strings
94
- # for feature generation. If the frequency string doesn't match or is not provided, it raises an exception.
95
- # Here we bypass this by issuing a default "yearly" frequency, tricking it into not producing
96
- # any lags or features.
97
- pd_offset = to_offset(freq)
98
-
99
- # normalize freq str to handle peculiarities such as W-SUN
100
- offset_base_alias = norm_freq_str(pd_offset)
101
- if offset_base_alias not in GLUONTS_SUPPORTED_OFFSETS:
102
- return "A"
88
+ def _get_freq_for_period(freq: str) -> str:
89
+ """Convert freq to format compatible with pd.Period.
90
+
91
+ For example, ME freq must be converted to M when creating a pd.Period.
92
+ """
93
+ offset = pd.tseries.frequencies.to_offset(freq)
94
+ freq_name = norm_freq_str(offset)
95
+ if freq_name == "SME":
96
+ # Replace unsupported frequency "SME" with "2W"
97
+ return "2W"
98
+ elif freq_name == "bh":
99
+ # Replace unsupported frequency "bh" with dummy value "Y"
100
+ return "Y"
103
101
  else:
104
- return f"{pd_offset.n}{offset_base_alias}"
102
+ freq_name_for_period = {"YE": "Y", "QE": "Q", "ME": "M"}.get(freq_name, freq_name)
103
+ return f"{offset.n}{freq_name_for_period}"
105
104
 
106
105
  def __len__(self):
107
106
  return len(self.indptr) - 1 # noqa
@@ -161,6 +160,8 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
161
160
  """
162
161
 
163
162
  gluonts_model_path = "gluon_ts"
163
+ # we pass dummy freq compatible with pandas 2.1 & 2.2 to GluonTS models
164
+ _dummy_gluonts_freq = "D"
164
165
  # default number of samples for prediction
165
166
  default_num_samples: int = 250
166
167
  supports_cat_covariates: bool = False
@@ -234,13 +235,6 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
234
235
 
235
236
  def _deferred_init_params_aux(self, dataset: TimeSeriesDataFrame) -> None:
236
237
  """Update GluonTS specific parameters with information available only at training time."""
237
- self.freq = dataset.freq or self.freq
238
- if not self.freq:
239
- raise ValueError(
240
- "Dataset frequency not provided in the dataset, fit arguments or "
241
- "during initialization. Please provide a `freq` string to `fit`."
242
- )
243
-
244
238
  model_params = self._get_model_params()
245
239
  disable_static_features = model_params.get("disable_static_features", False)
246
240
  if not disable_static_features:
@@ -371,7 +365,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
371
365
  init_args.setdefault("early_stopping_patience", 20)
372
366
  init_args.update(
373
367
  dict(
374
- freq=self.freq,
368
+ freq=self._dummy_gluonts_freq,
375
369
  prediction_length=self.prediction_length,
376
370
  quantiles=self.quantile_levels,
377
371
  callbacks=self.callbacks,
@@ -502,6 +496,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
502
496
 
503
497
  return SimpleGluonTSDataset(
504
498
  target_df=time_series_df[[self.target]],
499
+ freq=self.freq,
505
500
  target_column=self.target,
506
501
  feat_static_cat=feat_static_cat,
507
502
  feat_static_real=feat_static_real,
@@ -592,7 +587,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
592
587
  predicted_targets = self._predict_gluonts_forecasts(data, known_covariates=known_covariates, **kwargs)
593
588
  df = self._gluonts_forecasts_to_data_frame(
594
589
  predicted_targets,
595
- forecast_index=get_forecast_horizon_index_ts_dataframe(data, self.prediction_length),
590
+ forecast_index=get_forecast_horizon_index_ts_dataframe(data, self.prediction_length, freq=self.freq),
596
591
  )
597
592
  return df
598
593
 
@@ -423,6 +423,4 @@ class WaveNetModel(AbstractGluonTSModel):
423
423
  init_kwargs.setdefault("seasonality", get_seasonality(self.freq))
424
424
  init_kwargs.setdefault("time_features", get_time_features_for_frequency(self.freq))
425
425
  init_kwargs.setdefault("num_parallel_samples", self.default_num_samples)
426
- # WaveNet model fails if an unsupported frequency such as "SM" is provided. We provide a dummy freq instead
427
- init_kwargs["freq"] = "H"
428
426
  return init_kwargs
@@ -144,9 +144,10 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
144
144
 
145
145
  # timeout ensures that no individual job takes longer than time_limit
146
146
  # TODO: a job started late may still exceed time_limit - how to prevent that?
147
- timeout = None if self.n_jobs == 1 else self.time_limit
147
+ time_limit = kwargs.get("time_limit")
148
+ timeout = None if self.n_jobs == 1 else time_limit
148
149
  # end_time ensures that no new jobs are started after time_limit is exceeded
149
- end_time = None if self.time_limit is None else time.time() + self.time_limit
150
+ end_time = None if time_limit is None else time.time() + time_limit
150
151
  executor = Parallel(self.n_jobs, timeout=timeout)
151
152
 
152
153
  try:
@@ -165,23 +166,28 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
165
166
  f"({fraction_failed_models:.1%}). Fallback model SeasonalNaive was used for these time series."
166
167
  )
167
168
  predictions_df = pd.concat([pred for pred, _ in predictions_with_flags])
168
- predictions_df.index = get_forecast_horizon_index_ts_dataframe(data, self.prediction_length)
169
+ predictions_df.index = get_forecast_horizon_index_ts_dataframe(data, self.prediction_length, freq=self.freq)
169
170
  return TimeSeriesDataFrame(predictions_df)
170
171
 
171
172
  def score_and_cache_oof(
172
- self, val_data: TimeSeriesDataFrame, store_val_score: bool = False, store_predict_time: bool = False
173
+ self,
174
+ val_data: TimeSeriesDataFrame,
175
+ store_val_score: bool = False,
176
+ store_predict_time: bool = False,
177
+ **predict_kwargs,
173
178
  ) -> None:
174
- super().score_and_cache_oof(val_data, store_val_score, store_predict_time)
175
- # Remove time_limit for future predictions
176
- self.time_limit = None
179
+ # All computation happens during inference, so we provide the time_limit at prediction time
180
+ super().score_and_cache_oof(
181
+ val_data, store_val_score, store_predict_time, time_limit=self.time_limit, **predict_kwargs
182
+ )
177
183
 
178
184
  def _predict_wrapper(self, time_series: pd.Series, end_time: Optional[float] = None) -> Tuple[pd.DataFrame, bool]:
179
185
  if end_time is not None and time.time() >= end_time:
180
186
  raise TimeLimitExceeded
181
187
 
188
+ model_failed = False
182
189
  if time_series.isna().all():
183
190
  result = self._dummy_forecast.copy()
184
- model_failed = True
185
191
  else:
186
192
  try:
187
193
  result = self._predict_with_local_model(
@@ -190,7 +196,6 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
190
196
  )
191
197
  if not np.isfinite(result.values).all():
192
198
  raise RuntimeError("Forecast contains NaN or Inf values.")
193
- model_failed = False
194
199
  except Exception:
195
200
  if self.use_fallback_model:
196
201
  result = seasonal_naive_forecast(
@@ -225,8 +230,6 @@ def seasonal_naive_forecast(
225
230
  return arr[np.maximum.accumulate(idx)]
226
231
 
227
232
  forecast = {}
228
- # Convert to float64 since std computation can be unstable in float32
229
- target = target.astype(np.float64)
230
233
  # At least seasonal_period + 2 values are required to compute sigma for seasonal naive
231
234
  if len(target) > seasonal_period + 1 and seasonal_period > 1:
232
235
  if np.isnan(target[-(seasonal_period + 2) :]).any():
@@ -189,6 +189,7 @@ class MultiWindowBacktestingModel(AbstractTimeSeriesModel):
189
189
  val_data: TimeSeriesDataFrame,
190
190
  store_val_score: bool = False,
191
191
  store_predict_time: bool = False,
192
+ **predict_kwargs,
192
193
  ) -> None:
193
194
  # self.val_score, self.predict_time, self._oof_predictions already saved during _fit()
194
195
  assert self._oof_predictions is not None
@@ -79,21 +79,23 @@ DEFAULT_MODEL_PRIORITY = dict(
79
79
  Average=100,
80
80
  SeasonalAverage=100,
81
81
  Zero=100,
82
- NPTS=90,
83
- ETS=90,
84
- CrostonSBA=90,
85
- Theta=80,
86
- DynamicOptimizedTheta=80,
87
- AutoETS=80,
88
- AutoARIMA=70,
89
- RecursiveTabular=60,
90
- Chronos=50,
91
- DirectTabular=50,
82
+ RecursiveTabular=90,
83
+ DirectTabular=85,
84
+ # All local models are grouped together to make sure that joblib parallel pool is reused
85
+ NPTS=80,
86
+ ETS=80,
87
+ CrostonSBA=80,
88
+ Theta=75,
89
+ DynamicOptimizedTheta=75,
90
+ AutoETS=70,
91
+ AutoARIMA=60,
92
+ Chronos=55,
93
+ # Models that can early stop are trained at the end
94
+ TemporalFusionTransformer=45,
92
95
  DeepAR=40,
93
- TemporalFusionTransformer=30,
94
- WaveNet=25,
95
- PatchTST=20,
96
+ PatchTST=30,
96
97
  # Models below are not included in any presets
98
+ WaveNet=25,
97
99
  AutoCES=10,
98
100
  ARIMA=10,
99
101
  ADIDA=10,
@@ -128,6 +130,13 @@ def get_default_hps(key):
128
130
  "DirectTabular": {},
129
131
  "TemporalFusionTransformer": {},
130
132
  },
133
+ "light_inference": {
134
+ "SeasonalNaive": {},
135
+ "DirectTabular": {},
136
+ "RecursiveTabular": {},
137
+ "TemporalFusionTransformer": {},
138
+ "PatchTST": {},
139
+ },
131
140
  "default": {
132
141
  "SeasonalNaive": {},
133
142
  "CrostonSBA": {},
@@ -143,6 +152,7 @@ def get_default_hps(key):
143
152
  "TemporalFusionTransformer": {},
144
153
  "PatchTST": {},
145
154
  "DeepAR": {},
155
+ "Chronos": {"model_path": "base"},
146
156
  },
147
157
  }
148
158
  return default_model_hps[key]