autogluon.timeseries 1.0.1b20240304__py3-none-any.whl → 1.4.1b20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (108) hide show
  1. autogluon/timeseries/configs/__init__.py +3 -2
  2. autogluon/timeseries/configs/hyperparameter_presets.py +62 -0
  3. autogluon/timeseries/configs/predictor_presets.py +84 -0
  4. autogluon/timeseries/dataset/ts_dataframe.py +339 -186
  5. autogluon/timeseries/learner.py +192 -60
  6. autogluon/timeseries/metrics/__init__.py +55 -11
  7. autogluon/timeseries/metrics/abstract.py +96 -25
  8. autogluon/timeseries/metrics/point.py +186 -39
  9. autogluon/timeseries/metrics/quantile.py +47 -20
  10. autogluon/timeseries/metrics/utils.py +6 -6
  11. autogluon/timeseries/models/__init__.py +13 -7
  12. autogluon/timeseries/models/abstract/__init__.py +2 -2
  13. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +533 -273
  14. autogluon/timeseries/models/abstract/model_trial.py +10 -10
  15. autogluon/timeseries/models/abstract/tunable.py +189 -0
  16. autogluon/timeseries/models/autogluon_tabular/__init__.py +2 -0
  17. autogluon/timeseries/models/autogluon_tabular/mlforecast.py +369 -215
  18. autogluon/timeseries/models/autogluon_tabular/per_step.py +513 -0
  19. autogluon/timeseries/models/autogluon_tabular/transforms.py +67 -0
  20. autogluon/timeseries/models/autogluon_tabular/utils.py +3 -51
  21. autogluon/timeseries/models/chronos/__init__.py +4 -0
  22. autogluon/timeseries/models/chronos/chronos2.py +361 -0
  23. autogluon/timeseries/models/chronos/model.py +738 -0
  24. autogluon/timeseries/models/chronos/utils.py +369 -0
  25. autogluon/timeseries/models/ensemble/__init__.py +35 -2
  26. autogluon/timeseries/models/ensemble/{abstract_timeseries_ensemble.py → abstract.py} +50 -26
  27. autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
  28. autogluon/timeseries/models/ensemble/array_based/abstract.py +236 -0
  29. autogluon/timeseries/models/ensemble/array_based/models.py +73 -0
  30. autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
  31. autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
  32. autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +167 -0
  33. autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
  34. autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
  35. autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
  36. autogluon/timeseries/models/ensemble/per_item_greedy.py +162 -0
  37. autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
  38. autogluon/timeseries/models/ensemble/weighted/abstract.py +40 -0
  39. autogluon/timeseries/models/ensemble/weighted/basic.py +78 -0
  40. autogluon/timeseries/models/ensemble/weighted/greedy.py +57 -0
  41. autogluon/timeseries/models/gluonts/__init__.py +3 -1
  42. autogluon/timeseries/models/gluonts/abstract.py +583 -0
  43. autogluon/timeseries/models/gluonts/dataset.py +109 -0
  44. autogluon/timeseries/models/gluonts/{torch/models.py → models.py} +185 -44
  45. autogluon/timeseries/models/local/__init__.py +1 -10
  46. autogluon/timeseries/models/local/abstract_local_model.py +150 -97
  47. autogluon/timeseries/models/local/naive.py +31 -23
  48. autogluon/timeseries/models/local/npts.py +6 -2
  49. autogluon/timeseries/models/local/statsforecast.py +99 -112
  50. autogluon/timeseries/models/multi_window/multi_window_model.py +99 -40
  51. autogluon/timeseries/models/registry.py +64 -0
  52. autogluon/timeseries/models/toto/__init__.py +3 -0
  53. autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
  54. autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
  55. autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
  56. autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
  57. autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
  58. autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
  59. autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
  60. autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
  61. autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
  62. autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
  63. autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
  64. autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
  65. autogluon/timeseries/models/toto/dataloader.py +108 -0
  66. autogluon/timeseries/models/toto/hf_pretrained_model.py +118 -0
  67. autogluon/timeseries/models/toto/model.py +236 -0
  68. autogluon/timeseries/predictor.py +826 -305
  69. autogluon/timeseries/regressor.py +253 -0
  70. autogluon/timeseries/splitter.py +10 -31
  71. autogluon/timeseries/trainer/__init__.py +2 -3
  72. autogluon/timeseries/trainer/ensemble_composer.py +439 -0
  73. autogluon/timeseries/trainer/model_set_builder.py +256 -0
  74. autogluon/timeseries/trainer/prediction_cache.py +149 -0
  75. autogluon/timeseries/trainer/trainer.py +1298 -0
  76. autogluon/timeseries/trainer/utils.py +17 -0
  77. autogluon/timeseries/transforms/__init__.py +2 -0
  78. autogluon/timeseries/transforms/covariate_scaler.py +164 -0
  79. autogluon/timeseries/transforms/target_scaler.py +149 -0
  80. autogluon/timeseries/utils/constants.py +10 -0
  81. autogluon/timeseries/utils/datetime/base.py +38 -20
  82. autogluon/timeseries/utils/datetime/lags.py +18 -16
  83. autogluon/timeseries/utils/datetime/seasonality.py +14 -14
  84. autogluon/timeseries/utils/datetime/time_features.py +17 -14
  85. autogluon/timeseries/utils/features.py +317 -53
  86. autogluon/timeseries/utils/forecast.py +31 -17
  87. autogluon/timeseries/utils/timer.py +173 -0
  88. autogluon/timeseries/utils/warning_filters.py +44 -6
  89. autogluon/timeseries/version.py +2 -1
  90. autogluon.timeseries-1.4.1b20251210-py3.11-nspkg.pth +1 -0
  91. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/METADATA +71 -47
  92. autogluon_timeseries-1.4.1b20251210.dist-info/RECORD +103 -0
  93. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/WHEEL +1 -1
  94. autogluon/timeseries/configs/presets_configs.py +0 -11
  95. autogluon/timeseries/evaluator.py +0 -6
  96. autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -170
  97. autogluon/timeseries/models/gluonts/abstract_gluonts.py +0 -550
  98. autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
  99. autogluon/timeseries/models/presets.py +0 -325
  100. autogluon/timeseries/trainer/abstract_trainer.py +0 -1144
  101. autogluon/timeseries/trainer/auto_trainer.py +0 -74
  102. autogluon.timeseries-1.0.1b20240304-py3.8-nspkg.pth +0 -1
  103. autogluon.timeseries-1.0.1b20240304.dist-info/RECORD +0 -58
  104. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/LICENSE +0 -0
  105. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/NOTICE +0 -0
  106. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/namespace_packages.txt +0 -0
  107. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/top_level.txt +0 -0
  108. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/zip-safe +0 -0
@@ -1,110 +1,110 @@
1
+ import copy
1
2
  import logging
2
3
  import os
3
4
  import re
4
5
  import time
5
- from contextlib import nullcontext
6
- from typing import Any, Dict, List, Optional, Union
6
+ from abc import ABC, abstractmethod
7
+ from typing import Any, Sequence
8
+
9
+ import pandas as pd
10
+ from typing_extensions import Self
7
11
 
8
12
  from autogluon.common import space
9
13
  from autogluon.common.loaders import load_pkl
10
14
  from autogluon.common.savers import save_pkl
11
- from autogluon.core.hpo.exceptions import EmptySearchSpace
12
- from autogluon.core.hpo.executors import HpoExecutor, RayHpoExecutor
13
- from autogluon.core.models import AbstractModel
15
+ from autogluon.common.utils.resource_utils import get_resource_manager
16
+ from autogluon.common.utils.utils import setup_outputdir
17
+ from autogluon.core.constants import AG_ARGS_FIT, REFIT_FULL_SUFFIX
18
+ from autogluon.core.models import ModelBase
19
+ from autogluon.core.utils.exceptions import TimeLimitExceeded
14
20
  from autogluon.timeseries.dataset import TimeSeriesDataFrame
15
21
  from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
22
+ from autogluon.timeseries.models.registry import ModelRegistry
23
+ from autogluon.timeseries.regressor import CovariateRegressor, get_covariate_regressor
24
+ from autogluon.timeseries.transforms import CovariateScaler, TargetScaler, get_covariate_scaler, get_target_scaler
16
25
  from autogluon.timeseries.utils.features import CovariateMetadata
17
- from autogluon.timeseries.utils.warning_filters import disable_stdout, warning_filter
26
+ from autogluon.timeseries.utils.forecast import make_future_data_frame
18
27
 
19
- from .model_trial import model_trial, skip_hpo
28
+ from .tunable import TimeSeriesTunable
20
29
 
21
30
  logger = logging.getLogger(__name__)
22
31
 
23
32
 
24
- class AbstractTimeSeriesModel(AbstractModel):
25
- """Abstract class for all `Model` objects in autogluon.timeseries.
33
+ class TimeSeriesModelBase(ModelBase, ABC):
34
+ """Abstract base class for all `Model` objects in autogluon.timeseries, including both
35
+ forecasting models and forecast combination/ensemble models.
26
36
 
27
37
  Parameters
28
38
  ----------
29
- path : str, default = None
39
+ path
30
40
  Directory location to store all outputs.
31
41
  If None, a new unique time-stamped directory is chosen.
32
- freq: str
42
+ freq
33
43
  Frequency string (cf. gluonts frequency strings) describing the frequency
34
- of the time series data. For example, "H" for hourly or "D" for daily data.
35
- prediction_length: int
44
+ of the time series data. For example, "h" for hourly or "D" for daily data.
45
+ prediction_length
36
46
  Length of the prediction horizon, i.e., the number of time steps the model
37
47
  is fit to forecast.
38
- name : str, default = None
48
+ name
39
49
  Name of the subdirectory inside path where model will be saved.
40
50
  The final model directory will be os.path.join(path, name)
41
51
  If None, defaults to the model's class name: self.__class__.__name__
42
- metadata: CovariateMetadata
52
+ covariate_metadata
43
53
  A mapping of different covariate types known to autogluon.timeseries to column names
44
54
  in the data set.
45
- eval_metric : Union[str, TimeSeriesScorer], default = "WQL"
55
+ eval_metric
46
56
  Metric by which predictions will be ultimately evaluated on future test data. This only impacts
47
57
  ``model.score()``, as eval_metric is not used during training. Available metrics can be found in
48
58
  ``autogluon.timeseries.metrics``.
49
- eval_metric_seasonal_period : int, optional
50
- Seasonal period used to compute some evaluation metrics such as mean absolute scaled error (MASE). Defaults to
51
- ``None``, in which case the seasonal period is computed based on the data frequency.
52
- hyperparameters : dict, default = None
59
+ hyperparameters
53
60
  Hyperparameters that will be used by the model (can be search spaces instead of fixed values).
54
61
  If None, model defaults are used. This is identical to passing an empty dictionary.
55
62
  """
56
63
 
64
+ model_file_name = "model.pkl"
65
+ model_info_name = "info.pkl"
57
66
  _oof_filename = "oof.pkl"
58
67
 
59
- # TODO: refactor "pruned" methods after AbstractModel is refactored
60
- predict_proba = None
61
- score_with_y_pred_proba = None
62
- disk_usage = None # disk / memory size
63
- estimate_memory_usage = None
64
- reduce_memory_size = None
65
- compute_feature_importance = None # feature processing and importance
66
- get_features = None
67
- _apply_conformalization = None
68
- _apply_temperature_scaling = None
69
- _predict_proba = None
70
- _convert_proba_to_unified_form = None
71
- _compute_permutation_importance = None
72
- _estimate_memory_usage = None
73
- _preprocess = None
74
- _preprocess_nonadaptive = None
75
- _preprocess_set_features = None
68
+ # TODO: For which models should we override this parameter?
69
+ _covariate_regressor_fit_time_fraction: float = 0.5
70
+ default_max_time_limit_ratio: float = 0.9
71
+
72
+ _supports_known_covariates: bool = False
73
+ _supports_past_covariates: bool = False
74
+ _supports_static_features: bool = False
76
75
 
77
76
  def __init__(
78
77
  self,
79
- freq: Optional[str] = None,
78
+ path: str | None = None,
79
+ name: str | None = None,
80
+ hyperparameters: dict[str, Any] | None = None,
81
+ freq: str | None = None,
80
82
  prediction_length: int = 1,
81
- path: Optional[str] = None,
82
- name: Optional[str] = None,
83
- metadata: Optional[CovariateMetadata] = None,
84
- eval_metric: Union[str, TimeSeriesScorer, None] = None,
85
- eval_metric_seasonal_period: Optional[int] = None,
86
- hyperparameters: Dict[str, Union[int, float, str, space.Space]] = None,
87
- **kwargs,
83
+ covariate_metadata: CovariateMetadata | None = None,
84
+ target: str = "target",
85
+ quantile_levels: Sequence[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
86
+ eval_metric: str | TimeSeriesScorer | None = None,
88
87
  ):
89
- name = name or re.sub(r"Model$", "", self.__class__.__name__)
90
- super().__init__(
91
- path=path,
92
- name=name,
93
- problem_type=None,
94
- eval_metric=None,
95
- hyperparameters=hyperparameters,
96
- )
97
- self.eval_metric: TimeSeriesScorer = check_get_evaluation_metric(eval_metric)
98
- self.eval_metric_seasonal_period = eval_metric_seasonal_period
99
- self.stopping_metric = None
100
- self.problem_type = "timeseries"
101
- self.conformalize = False
102
- self.target: str = kwargs.get("target", "target")
103
- self.metadata = metadata or CovariateMetadata()
104
-
105
- self.freq: str = freq
88
+ self.name = name or re.sub(r"Model$", "", self.__class__.__name__)
89
+
90
+ self.path_root = path
91
+ if self.path_root is None:
92
+ path_suffix = self.name
93
+ # TODO: Would be ideal to not create dir, but still track that it is unique. However, this isn't possible
94
+ # to do without a global list of used dirs or using UUID.
95
+ path_cur = setup_outputdir(path=None, create_dir=True, path_suffix=path_suffix)
96
+ self.path_root = path_cur.rsplit(self.name, 1)[0]
97
+ logger.log(20, f"Warning: No path was specified for model, defaulting to: {self.path_root}")
98
+
99
+ self.path = os.path.join(self.path_root, self.name)
100
+
101
+ self.eval_metric = check_get_evaluation_metric(eval_metric, prediction_length=prediction_length)
102
+ self.target: str = target
103
+ self.covariate_metadata = covariate_metadata or CovariateMetadata()
104
+
105
+ self.freq: str | None = freq
106
106
  self.prediction_length: int = prediction_length
107
- self.quantile_levels = kwargs.get("quantile_levels", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
107
+ self.quantile_levels: list[float] = list(quantile_levels)
108
108
 
109
109
  if not all(0 < q < 1 for q in self.quantile_levels):
110
110
  raise ValueError("Invalid quantile_levels specified. Quantiles must be between 0 and 1 (exclusive).")
@@ -117,76 +117,151 @@ class AbstractTimeSeriesModel(AbstractModel):
117
117
  else:
118
118
  self.must_drop_median = False
119
119
 
120
- self._oof_predictions: Optional[List[TimeSeriesDataFrame]] = None
120
+ self._oof_predictions: list[TimeSeriesDataFrame] | None = None
121
+
122
+ # user provided hyperparameters and extra arguments that are used during model training
123
+ self._hyperparameters, self._extra_ag_args = self._check_and_split_hyperparameters(hyperparameters)
124
+
125
+ # Time taken to fit in seconds (Training data)
126
+ self.fit_time: float | None = None
127
+ # Time taken to predict in seconds, for a single prediction horizon on validation data
128
+ self.predict_time: float | None = None
129
+ # Time taken to predict 1 row of data in seconds (with batch size `predict_1_batch_size`)
130
+ self.predict_1_time: float | None = None
131
+ # Useful for ensembles, additional prediction time excluding base models. None for base models.
132
+ self.predict_time_marginal: float | None = None
133
+ # Score with eval_metric on validation data
134
+ self.val_score: float | None = None
121
135
 
122
136
  def __repr__(self) -> str:
123
137
  return self.name
124
138
 
125
- def save(self, path: str = None, verbose=True) -> str:
139
+ def rename(self, name: str) -> None:
140
+ if self.name is not None and len(self.name) > 0:
141
+ self.path = os.path.join(os.path.dirname(self.path), name)
142
+ else:
143
+ self.path = os.path.join(self.path, name)
144
+ self.name = name
145
+
146
+ def set_contexts(self, path_context):
147
+ self.path = path_context
148
+ self.path_root = self.path.rsplit(self.name, 1)[0]
149
+
150
+ def cache_oof_predictions(self, predictions: TimeSeriesDataFrame | list[TimeSeriesDataFrame]) -> None:
151
+ if isinstance(predictions, TimeSeriesDataFrame):
152
+ predictions = [predictions]
153
+ self._oof_predictions = predictions
154
+
155
+ @classmethod
156
+ def _check_and_split_hyperparameters(
157
+ cls, hyperparameters: dict[str, Any] | None = None
158
+ ) -> tuple[dict[str, Any], dict[str, Any]]:
159
+ """Given the user-specified hyperparameters, split into `hyperparameters` and `extra_ag_args`, intended
160
+ to be used during model initialization.
161
+
162
+ Parameters
163
+ ----------
164
+ hyperparameters
165
+ The model hyperparameters dictionary provided to the model constructor.
166
+
167
+ Returns
168
+ -------
169
+ hyperparameters
170
+ Native model hyperparameters that are passed into the "inner model" AutoGluon wraps
171
+ extra_ag_args
172
+ Special auxiliary parameters that modify the model training process used by AutoGluon
173
+ """
174
+ hyperparameters = copy.deepcopy(hyperparameters) if hyperparameters is not None else dict()
175
+ assert isinstance(hyperparameters, dict), (
176
+ f"Invalid dtype for hyperparameters. Expected dict, but got {type(hyperparameters)}"
177
+ )
178
+ for k in hyperparameters.keys():
179
+ if not isinstance(k, str):
180
+ logger.warning(
181
+ f"Warning: Specified hyperparameter key is not of type str: {k} (type={type(k)}). "
182
+ f"There might be a bug in your configuration."
183
+ )
184
+
185
+ extra_ag_args = hyperparameters.pop(AG_ARGS_FIT, {})
186
+ if not isinstance(extra_ag_args, dict):
187
+ raise ValueError(
188
+ f"Invalid hyperparameter type for `{AG_ARGS_FIT}`. Expected dict, but got {type(extra_ag_args)}"
189
+ )
190
+ return hyperparameters, extra_ag_args
191
+
192
+ def save(self, path: str | None = None, verbose: bool = True) -> str:
193
+ if path is None:
194
+ path = self.path
195
+
126
196
  # Save self._oof_predictions as a separate file, not model attribute
127
197
  if self._oof_predictions is not None:
128
198
  save_pkl.save(
129
- path=os.path.join(self.path, "utils", self._oof_filename),
199
+ path=os.path.join(path, "utils", self._oof_filename),
130
200
  object=self._oof_predictions,
131
201
  verbose=verbose,
132
202
  )
133
203
  oof_predictions = self._oof_predictions
134
204
  self._oof_predictions = None
135
- save_path = super().save(path=path, verbose=verbose)
205
+
206
+ file_path = os.path.join(path, self.model_file_name)
207
+ save_pkl.save(path=file_path, object=self, verbose=verbose)
208
+
136
209
  self._oof_predictions = oof_predictions
137
- return save_path
210
+ return path
138
211
 
139
212
  @classmethod
140
- def load(
141
- cls, path: str, reset_paths: bool = True, load_oof: bool = False, verbose: bool = True
142
- ) -> "AbstractTimeSeriesModel":
143
- model = super().load(path=path, reset_paths=reset_paths, verbose=verbose)
213
+ def load(cls, path: str, reset_paths: bool = True, load_oof: bool = False, verbose: bool = True) -> Self:
214
+ file_path = os.path.join(path, cls.model_file_name)
215
+ model = load_pkl.load(path=file_path, verbose=verbose)
216
+ if reset_paths:
217
+ model.set_contexts(path)
144
218
  if load_oof and model._oof_predictions is None:
145
219
  model._oof_predictions = cls.load_oof_predictions(path=path, verbose=verbose)
146
220
  return model
147
221
 
148
222
  @classmethod
149
- def load_oof_predictions(cls, path: str, verbose: bool = True) -> List[TimeSeriesDataFrame]:
223
+ def load_oof_predictions(cls, path: str, verbose: bool = True) -> list[TimeSeriesDataFrame]:
150
224
  """Load the cached OOF predictions from disk."""
151
225
  return load_pkl.load(path=os.path.join(path, "utils", cls._oof_filename), verbose=verbose)
152
226
 
227
+ @property
228
+ def supports_known_covariates(self) -> bool:
229
+ return (
230
+ self.get_hyperparameters().get("covariate_regressor") is not None
231
+ or self.__class__._supports_known_covariates
232
+ )
233
+
234
+ @property
235
+ def supports_past_covariates(self) -> bool:
236
+ return self.__class__._supports_past_covariates
237
+
238
+ @property
239
+ def supports_static_features(self) -> bool:
240
+ return (
241
+ self.get_hyperparameters().get("covariate_regressor") is not None
242
+ or self.__class__._supports_static_features
243
+ )
244
+
153
245
  def get_oof_predictions(self):
154
246
  if self._oof_predictions is None:
155
247
  self._oof_predictions = self.load_oof_predictions(self.path)
156
248
  return self._oof_predictions
157
249
 
158
- def _initialize(self, **kwargs) -> None:
159
- self._init_params_aux()
160
- self._init_params()
250
+ def _get_default_hyperparameters(self) -> dict:
251
+ return {}
161
252
 
162
- def _compute_fit_metadata(self, val_data: TimeSeriesDataFrame = None, **kwargs):
163
- fit_metadata = dict(
164
- val_in_fit=val_data is not None,
165
- )
166
- return fit_metadata
253
+ def get_hyperparameters(self) -> dict:
254
+ """Get dictionary of hyperparameters that will be passed to the "inner model" that AutoGluon wraps."""
255
+ return {**self._get_default_hyperparameters(), **self._hyperparameters}
167
256
 
168
- def _validate_fit_memory_usage(self, **kwargs):
169
- # memory usage handling not implemented for timeseries models
170
- pass
171
-
172
- def get_params(self) -> dict:
173
- params = super().get_params()
174
- params.update(
175
- dict(
176
- freq=self.freq,
177
- prediction_length=self.prediction_length,
178
- quantile_levels=self.quantile_levels,
179
- metadata=self.metadata,
180
- target=self.target,
181
- )
182
- )
183
- return params
257
+ def get_hyperparameter(self, key: str) -> Any:
258
+ """Get a single hyperparameter value for the "inner model"."""
259
+ return self.get_hyperparameters()[key]
184
260
 
185
261
  def get_info(self) -> dict:
186
262
  """
187
263
  Returns a dictionary of numerous fields describing the model.
188
264
  """
189
- # TODO: Include self.metadata
190
265
  info = {
191
266
  "name": self.name,
192
267
  "model_type": type(self).__name__,
@@ -197,53 +272,278 @@ class AbstractTimeSeriesModel(AbstractModel):
197
272
  "prediction_length": self.prediction_length,
198
273
  "quantile_levels": self.quantile_levels,
199
274
  "val_score": self.val_score,
200
- "hyperparameters": self.params,
275
+ "hyperparameters": self.get_hyperparameters(),
276
+ "covariate_metadata": self.covariate_metadata.to_dict(),
201
277
  }
202
278
  return info
203
279
 
204
- def fit(self, **kwargs) -> "AbstractTimeSeriesModel":
280
+ @classmethod
281
+ def load_info(cls, path: str, load_model_if_required: bool = True) -> dict:
282
+ # TODO: remove?
283
+ load_path = os.path.join(path, cls.model_info_name)
284
+ try:
285
+ return load_pkl.load(path=load_path)
286
+ except:
287
+ if load_model_if_required:
288
+ model = cls.load(path=path, reset_paths=True)
289
+ return model.get_info()
290
+ else:
291
+ raise
292
+
293
+ def _is_gpu_available(self) -> bool:
294
+ return False
295
+
296
+ @staticmethod
297
+ def _get_system_resources() -> dict[str, Any]:
298
+ resource_manager = get_resource_manager()
299
+ system_num_cpus = resource_manager.get_cpu_count()
300
+ system_num_gpus = resource_manager.get_gpu_count()
301
+ return {
302
+ "num_cpus": system_num_cpus,
303
+ "num_gpus": system_num_gpus,
304
+ }
305
+
306
+ def _get_model_base(self) -> Self:
307
+ return self
308
+
309
+ def persist(self) -> Self:
310
+ """Ask the model to persist its assets in memory, i.e., to predict with low latency. In practice
311
+ this is used for pretrained models that have to lazy-load model parameters to device memory at
312
+ prediction time.
313
+ """
314
+ return self
315
+
316
+ def _more_tags(self) -> dict:
317
+ """Encode model properties using tags, similar to sklearn & autogluon.tabular.
318
+
319
+ For more details, see `autogluon.core.models.abstract.AbstractModel._get_tags()` and
320
+ https://scikit-learn.org/stable/_sources/developers/develop.rst.txt.
321
+
322
+ List of currently supported tags:
323
+ - allow_nan: Can the model handle data with missing values represented by np.nan?
324
+ - can_refit_full: Does it make sense to retrain the model without validation data?
325
+ See `autogluon.core.models.abstract._tags._DEFAULT_TAGS` for more details.
326
+ - can_use_train_data: Can the model use train_data if it's provided to model.fit()?
327
+ - can_use_val_data: Can the model use val_data if it's provided to model.fit()?
328
+ """
329
+ return {
330
+ "allow_nan": False,
331
+ "can_refit_full": False,
332
+ "can_use_train_data": True,
333
+ "can_use_val_data": False,
334
+ }
335
+
336
+ def get_params(self) -> dict:
337
+ """Get the constructor parameters required for cloning this model object"""
338
+ # We only use the user-provided hyperparameters for cloning. We cannot use the output of get_hyperparameters()
339
+ # since it may contain search spaces that won't be converted to concrete values during HPO
340
+ hyperparameters = self._hyperparameters.copy()
341
+ if self._extra_ag_args:
342
+ hyperparameters[AG_ARGS_FIT] = self._extra_ag_args.copy()
343
+
344
+ return dict(
345
+ path=self.path_root,
346
+ name=self.name,
347
+ eval_metric=self.eval_metric,
348
+ hyperparameters=hyperparameters,
349
+ freq=self.freq,
350
+ prediction_length=self.prediction_length,
351
+ quantile_levels=self.quantile_levels,
352
+ covariate_metadata=self.covariate_metadata,
353
+ target=self.target,
354
+ )
355
+
356
+ def convert_to_refit_full_via_copy(self) -> Self:
357
+ # save the model as a new model on disk
358
+ previous_name = self.name
359
+ self.rename(self.name + REFIT_FULL_SUFFIX)
360
+ refit_model_path = self.path
361
+ self.save(path=self.path, verbose=False)
362
+
363
+ self.rename(previous_name)
364
+
365
+ refit_model = self.load(path=refit_model_path, verbose=False)
366
+ refit_model.val_score = None
367
+ refit_model.predict_time = None
368
+
369
+ return refit_model
370
+
371
+ def convert_to_refit_full_template(self) -> Self:
372
+ """After calling this function, returned model should be able to be fit without `val_data`."""
373
+ params = copy.deepcopy(self.get_params())
374
+
375
+ # Remove 0.5 from quantile_levels so that the cloned model sets its must_drop_median correctly
376
+ if self.must_drop_median:
377
+ params["quantile_levels"].remove(0.5)
378
+
379
+ if "hyperparameters" not in params:
380
+ params["hyperparameters"] = dict()
381
+
382
+ if AG_ARGS_FIT not in params["hyperparameters"]:
383
+ params["hyperparameters"][AG_ARGS_FIT] = dict()
384
+
385
+ params["name"] = params["name"] + REFIT_FULL_SUFFIX
386
+ template = self.__class__(**params)
387
+
388
+ return template
389
+
390
+
391
+ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, metaclass=ModelRegistry):
392
+ """Abstract base class for all time series models that take historical data as input and
393
+ make predictions for the forecast horizon.
394
+ """
395
+
396
+ ag_priority: int = 0
397
+
398
+ def __init__(
399
+ self,
400
+ path: str | None = None,
401
+ name: str | None = None,
402
+ hyperparameters: dict[str, Any] | None = None,
403
+ freq: str | None = None,
404
+ prediction_length: int = 1,
405
+ covariate_metadata: CovariateMetadata | None = None,
406
+ target: str = "target",
407
+ quantile_levels: Sequence[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
408
+ eval_metric: str | TimeSeriesScorer | None = None,
409
+ ):
410
+ # TODO: make freq a required argument in AbstractTimeSeriesModel
411
+ super().__init__(
412
+ path=path,
413
+ name=name,
414
+ hyperparameters=hyperparameters,
415
+ freq=freq,
416
+ prediction_length=prediction_length,
417
+ covariate_metadata=covariate_metadata,
418
+ target=target,
419
+ quantile_levels=quantile_levels,
420
+ eval_metric=eval_metric,
421
+ )
422
+ self.target_scaler: TargetScaler | None
423
+ self.covariate_scaler: CovariateScaler | None
424
+ self.covariate_regressor: CovariateRegressor | None
425
+
426
+ def _initialize_transforms_and_regressor(self) -> None:
427
+ self.target_scaler = get_target_scaler(self.get_hyperparameters().get("target_scaler"), target=self.target)
428
+ self.covariate_scaler = get_covariate_scaler(
429
+ self.get_hyperparameters().get("covariate_scaler"),
430
+ covariate_metadata=self.covariate_metadata,
431
+ use_static_features=self.supports_static_features,
432
+ use_known_covariates=self.supports_known_covariates,
433
+ use_past_covariates=self.supports_past_covariates,
434
+ )
435
+ self.covariate_regressor = get_covariate_regressor(
436
+ self.get_hyperparameters().get("covariate_regressor"),
437
+ target=self.target,
438
+ covariate_metadata=self.covariate_metadata,
439
+ )
440
+
441
+ @property
442
+ def allowed_hyperparameters(self) -> list[str]:
443
+ """List of hyperparameters allowed by the model."""
444
+ return ["target_scaler", "covariate_regressor", "covariate_scaler"]
445
+
446
+ def fit(
447
+ self,
448
+ train_data: TimeSeriesDataFrame,
449
+ val_data: TimeSeriesDataFrame | None = None,
450
+ time_limit: float | None = None,
451
+ verbosity: int = 2,
452
+ **kwargs,
453
+ ) -> Self:
205
454
  """Fit timeseries model.
206
455
 
207
456
  Models should not override the `fit` method, but instead override the `_fit` method which
208
457
  has the same arguments.
209
458
 
210
- Other Parameters
211
- ----------------
212
- train_data : TimeSeriesDataFrame
459
+ Parameters
460
+ ----------
461
+ train_data
213
462
  The training data provided in the library's `autogluon.timeseries.dataset.TimeSeriesDataFrame`
214
463
  format.
215
- val_data : TimeSeriesDataFrame, optional
464
+ val_data
216
465
  The validation data set in the same format as training data.
217
- time_limit : float, default = None
466
+ time_limit
218
467
  Time limit in seconds to adhere to when fitting model.
219
468
  Ideally, model should early stop during fit to avoid going over the time limit if specified.
220
- num_cpus : int, default = 'auto'
469
+ num_cpus
221
470
  How many CPUs to use during fit.
222
471
  This is counted in virtual cores, not in physical cores.
223
472
  If 'auto', model decides.
224
- num_gpus : int, default = 'auto'
473
+ num_gpus
225
474
  How many GPUs to use during fit.
226
475
  If 'auto', model decides.
227
- verbosity : int, default = 2
476
+ verbosity
228
477
  Verbosity levels range from 0 to 4 and control how much information is printed.
229
478
  Higher levels correspond to more detailed print statements (you can set verbosity = 0 to suppress warnings).
230
- **kwargs :
479
+ **kwargs
231
480
  Any additional fit arguments a model supports.
232
481
 
233
482
  Returns
234
483
  -------
235
- model: AbstractTimeSeriesModel
484
+ model
236
485
  The fitted model object
237
486
  """
238
- return super().fit(**kwargs)
487
+ start_time = time.monotonic()
488
+ self._initialize_transforms_and_regressor()
489
+
490
+ if self.target_scaler is not None:
491
+ train_data = self.target_scaler.fit_transform(train_data)
492
+
493
+ if self.covariate_scaler is not None:
494
+ train_data = self.covariate_scaler.fit_transform(train_data)
495
+
496
+ if self.covariate_regressor is not None:
497
+ covariate_regressor_time_limit = (
498
+ self._covariate_regressor_fit_time_fraction * time_limit if time_limit is not None else None
499
+ )
500
+ self.covariate_regressor.fit(
501
+ train_data,
502
+ time_limit=covariate_regressor_time_limit,
503
+ verbosity=verbosity - 1,
504
+ )
239
505
 
506
+ if self._get_tags()["can_use_train_data"]:
507
+ if self.covariate_regressor is not None:
508
+ train_data = self.covariate_regressor.transform(train_data)
509
+ train_data, _ = self.preprocess(train_data, is_train=True)
510
+
511
+ if self._get_tags()["can_use_val_data"] and val_data is not None:
512
+ if self.target_scaler is not None:
513
+ val_data = self.target_scaler.transform(val_data)
514
+ if self.covariate_scaler is not None:
515
+ val_data = self.covariate_scaler.transform(val_data)
516
+ if self.covariate_regressor is not None:
517
+ val_data = self.covariate_regressor.transform(val_data)
518
+ val_data, _ = self.preprocess(val_data, is_train=False)
519
+
520
+ if time_limit is not None:
521
+ time_limit = time_limit - (time.monotonic() - start_time)
522
+ time_limit = self._preprocess_time_limit(time_limit=time_limit)
523
+ if time_limit <= 0:
524
+ logger.warning(
525
+ f"\tWarning: Model has no time left to train, skipping model... (Time Left = {time_limit:.1f}s)"
526
+ )
527
+ raise TimeLimitExceeded
528
+
529
+ self._fit(
530
+ train_data=train_data,
531
+ val_data=val_data,
532
+ time_limit=time_limit,
533
+ verbosity=verbosity,
534
+ **(self._get_system_resources() | kwargs),
535
+ )
536
+
537
+ return self
538
+
539
+ @abstractmethod
240
540
  def _fit(
241
541
  self,
242
542
  train_data: TimeSeriesDataFrame,
243
- val_data: Optional[TimeSeriesDataFrame] = None,
244
- time_limit: Optional[int] = None,
245
- num_cpus: Optional[int] = None,
246
- num_gpus: Optional[int] = None,
543
+ val_data: TimeSeriesDataFrame | None = None,
544
+ time_limit: float | None = None,
545
+ num_cpus: int | None = None,
546
+ num_gpus: int | None = None,
247
547
  verbosity: int = 2,
248
548
  **kwargs,
249
549
  ) -> None:
@@ -251,21 +551,36 @@ class AbstractTimeSeriesModel(AbstractModel):
251
551
  the model training logic, `fit` additionally implements other logic such as keeping
252
552
  track of the time limit, etc.
253
553
  """
254
- # TODO: Make the models respect `num_cpus` and `num_gpus` parameters
255
- raise NotImplementedError
554
+ pass
256
555
 
556
+ # TODO: this check cannot be moved inside fit because of the complex way in which
557
+ # MultiWindowBacktestingModel handles hyperparameter spaces during initialization.
558
+ # Move inside fit() after refactoring MultiWindowBacktestingModel
257
559
  def _check_fit_params(self):
258
560
  # gracefully handle hyperparameter specifications if they are provided to fit instead
259
- if any(isinstance(v, space.Space) for v in self.params.values()):
561
+ if any(isinstance(v, space.Space) for v in self.get_hyperparameters().values()):
260
562
  raise ValueError(
261
563
  "Hyperparameter spaces provided to `fit`. Please provide concrete values "
262
564
  "as hyperparameters when initializing or use `hyperparameter_tune` instead."
263
565
  )
264
566
 
567
+ def _log_unused_hyperparameters(self, extra_allowed_hyperparameters: list[str] | None = None) -> None:
568
+ """Log a warning if unused hyperparameters were provided to the model."""
569
+ allowed_hyperparameters = self.allowed_hyperparameters
570
+ if extra_allowed_hyperparameters is not None:
571
+ allowed_hyperparameters = allowed_hyperparameters + extra_allowed_hyperparameters
572
+
573
+ unused_hyperparameters = [key for key in self.get_hyperparameters() if key not in allowed_hyperparameters]
574
+ if len(unused_hyperparameters) > 0:
575
+ logger.warning(
576
+ f"{self.name} ignores following hyperparameters: {unused_hyperparameters}. "
577
+ f"See the documentation for {self.name} for the list of supported hyperparameters."
578
+ )
579
+
265
580
  def predict(
266
581
  self,
267
- data: Union[TimeSeriesDataFrame, Dict[str, TimeSeriesDataFrame]],
268
- known_covariates: Optional[TimeSeriesDataFrame] = None,
582
+ data: TimeSeriesDataFrame,
583
+ known_covariates: TimeSeriesDataFrame | None = None,
269
584
  **kwargs,
270
585
  ) -> TimeSeriesDataFrame:
271
586
  """Given a dataset, predict the next `self.prediction_length` time steps.
@@ -277,55 +592,123 @@ class AbstractTimeSeriesModel(AbstractModel):
277
592
 
278
593
  Parameters
279
594
  ----------
280
- data: Union[TimeSeriesDataFrame, Dict[str, TimeSeriesDataFrame]]
595
+ data
281
596
  The dataset where each time series is the "context" for predictions. For ensemble models that depend on
282
597
  the predictions of other models, this method may accept a dictionary of previous models' predictions.
283
- known_covariates : Optional[TimeSeriesDataFrame]
598
+ known_covariates
284
599
  A TimeSeriesDataFrame containing the values of the known covariates during the forecast horizon.
285
600
 
286
601
  Returns
287
602
  -------
288
- predictions: TimeSeriesDataFrame
289
- pandas data frames with a timestamp index, where each input item from the input
603
+ predictions
604
+ pandas dataframes with a timestamp index, where each input item from the input
290
605
  data is given as a separate forecast item in the dictionary, keyed by the `item_id`s
291
606
  of input items.
292
607
  """
608
+ if self.target_scaler is not None:
609
+ data = self.target_scaler.fit_transform(data)
610
+ if self.covariate_scaler is not None:
611
+ data = self.covariate_scaler.fit_transform(data)
612
+ known_covariates = self.covariate_scaler.transform_known_covariates(known_covariates)
613
+ if self.covariate_regressor is not None:
614
+ data = self.covariate_regressor.fit_transform(data)
615
+
616
+ data, known_covariates = self.preprocess(data, known_covariates, is_train=False)
617
+
618
+ # FIXME: Set self.covariate_regressor=None so to avoid copying it across processes during _predict
619
+ # FIXME: The clean solution is to convert all methods executed in parallel to @classmethod
620
+ covariate_regressor = self.covariate_regressor
621
+ self.covariate_regressor = None
293
622
  predictions = self._predict(data=data, known_covariates=known_covariates, **kwargs)
294
- logger.debug(f"Predicting with model {self.name}")
295
- # "0.5" might be missing from the quantiles if self is a wrapper (MultiWindowBacktestingModel or ensemble)
623
+ self.covariate_regressor = covariate_regressor
624
+
625
+ # Ensure that 'mean' is the leading column. Trailing columns might not match quantile_levels if self is
626
+ # a MultiWindowBacktestingModel and base_model.must_drop_median=True
627
+ column_order = pd.Index(["mean"] + [col for col in predictions.columns if col != "mean"])
628
+ if not predictions.columns.equals(column_order):
629
+ predictions = predictions.reindex(columns=column_order)
630
+
631
+ # "0.5" might be missing from the quantiles if self is a MultiWindowBacktestingModel
296
632
  if "0.5" in predictions.columns:
297
633
  if self.eval_metric.optimized_by_median:
298
634
  predictions["mean"] = predictions["0.5"]
299
635
  if self.must_drop_median:
300
636
  predictions = predictions.drop("0.5", axis=1)
637
+
638
+ if self.covariate_regressor is not None:
639
+ if known_covariates is None:
640
+ known_covariates = TimeSeriesDataFrame.from_data_frame(
641
+ pd.DataFrame(index=self.get_forecast_horizon_index(data), dtype="float32")
642
+ )
643
+
644
+ predictions = self.covariate_regressor.inverse_transform(
645
+ predictions,
646
+ known_covariates=known_covariates,
647
+ static_features=data.static_features,
648
+ )
649
+
650
+ if self.target_scaler is not None:
651
+ predictions = self.target_scaler.inverse_transform(predictions)
301
652
  return predictions
302
653
 
654
+ def get_forecast_horizon_index(self, data: TimeSeriesDataFrame) -> pd.MultiIndex:
655
+ """For each item in the dataframe, get timestamps for the next `prediction_length` time steps into the future."""
656
+ return pd.MultiIndex.from_frame(
657
+ make_future_data_frame(data, prediction_length=self.prediction_length, freq=self.freq)
658
+ )
659
+
660
+ @abstractmethod
303
661
  def _predict(
304
662
  self,
305
- data: Union[TimeSeriesDataFrame, Dict[str, TimeSeriesDataFrame]],
306
- known_covariates: Optional[TimeSeriesDataFrame] = None,
663
+ data: TimeSeriesDataFrame,
664
+ known_covariates: TimeSeriesDataFrame | None = None,
307
665
  **kwargs,
308
666
  ) -> TimeSeriesDataFrame:
309
667
  """Private method for `predict`. See `predict` for documentation of arguments."""
310
- raise NotImplementedError
668
+ pass
669
+
670
+ def _preprocess_time_limit(self, time_limit: float) -> float:
671
+ original_time_limit = time_limit
672
+ max_time_limit_ratio = self._extra_ag_args.get("max_time_limit_ratio", self.default_max_time_limit_ratio)
673
+ max_time_limit = self._extra_ag_args.get("max_time_limit")
674
+
675
+ time_limit *= max_time_limit_ratio
676
+
677
+ if max_time_limit is not None:
678
+ time_limit = min(time_limit, max_time_limit)
679
+
680
+ if original_time_limit != time_limit:
681
+ time_limit_og_str = f"{original_time_limit:.2f}s" if original_time_limit is not None else "None"
682
+ time_limit_str = f"{time_limit:.2f}s" if time_limit is not None else "None"
683
+ logger.debug(
684
+ f"\tTime limit adjusted due to model hyperparameters: "
685
+ f"{time_limit_og_str} -> {time_limit_str} "
686
+ f"(ag.max_time_limit={max_time_limit}, "
687
+ f"ag.max_time_limit_ratio={max_time_limit_ratio}"
688
+ )
689
+
690
+ return time_limit
691
+
692
+ def _get_search_space(self):
693
+ """Sets up default search space for HPO. Each hyperparameter which user did not specify is converted from
694
+ default fixed value to default search space.
695
+ """
696
+ params = self._hyperparameters.copy()
697
+ return params
311
698
 
312
699
  def _score_with_predictions(
313
700
  self,
314
701
  data: TimeSeriesDataFrame,
315
702
  predictions: TimeSeriesDataFrame,
316
- metric: Optional[str] = None,
317
703
  ) -> float:
318
704
  """Compute the score measuring how well the predictions align with the data."""
319
- eval_metric = self.eval_metric if metric is None else check_get_evaluation_metric(metric)
320
- return eval_metric.score(
705
+ return self.eval_metric.score(
321
706
  data=data,
322
707
  predictions=predictions,
323
- prediction_length=self.prediction_length,
324
708
  target=self.target,
325
- seasonal_period=self.eval_metric_seasonal_period,
326
709
  )
327
710
 
328
- def score(self, data: TimeSeriesDataFrame, metric: Optional[str] = None) -> float:
711
+ def score(self, data: TimeSeriesDataFrame) -> float:
329
712
  """Return the evaluation scores for given metric and dataset. The last
330
713
  `self.prediction_length` time steps of each time series in the input data set
331
714
  will be held out and used for computing the evaluation score. Time series
@@ -333,169 +716,46 @@ class AbstractTimeSeriesModel(AbstractModel):
333
716
 
334
717
  Parameters
335
718
  ----------
336
- data: TimeSeriesDataFrame
719
+ data
337
720
  Dataset used for scoring.
338
- metric: str
339
- String identifier of evaluation metric to use, from one of
340
- `autogluon.timeseries.utils.metric_utils.AVAILABLE_METRICS`.
341
-
342
- Other Parameters
343
- ----------------
344
- num_samples: int
345
- Number of samples to use for making evaluation predictions if the probabilistic
346
- forecasts are generated by forward sampling from the fitted model.
347
721
 
348
722
  Returns
349
723
  -------
350
- score: float
724
+ score
351
725
  The computed forecast evaluation score on the last `self.prediction_length`
352
726
  time steps of each time series.
353
727
  """
354
728
  past_data, known_covariates = data.get_model_inputs_for_scoring(
355
- prediction_length=self.prediction_length, known_covariates_names=self.metadata.known_covariates_real
729
+ prediction_length=self.prediction_length, known_covariates_names=self.covariate_metadata.known_covariates
356
730
  )
357
731
  predictions = self.predict(past_data, known_covariates=known_covariates)
358
- return self._score_with_predictions(data=data, predictions=predictions, metric=metric)
732
+ return self._score_with_predictions(data=data, predictions=predictions)
359
733
 
360
734
  def score_and_cache_oof(
361
735
  self,
362
736
  val_data: TimeSeriesDataFrame,
363
737
  store_val_score: bool = False,
364
738
  store_predict_time: bool = False,
739
+ **predict_kwargs,
365
740
  ) -> None:
366
741
  """Compute val_score, predict_time and cache out-of-fold (OOF) predictions."""
367
742
  past_data, known_covariates = val_data.get_model_inputs_for_scoring(
368
- prediction_length=self.prediction_length, known_covariates_names=self.metadata.known_covariates_real
743
+ prediction_length=self.prediction_length, known_covariates_names=self.covariate_metadata.known_covariates
369
744
  )
370
745
  predict_start_time = time.time()
371
- oof_predictions = self.predict(past_data, known_covariates=known_covariates)
372
- self._oof_predictions = [oof_predictions]
746
+ oof_predictions = self.predict(past_data, known_covariates=known_covariates, **predict_kwargs)
747
+ self.cache_oof_predictions(oof_predictions)
373
748
  if store_predict_time:
374
749
  self.predict_time = time.time() - predict_start_time
375
750
  if store_val_score:
376
751
  self.val_score = self._score_with_predictions(val_data, oof_predictions)
377
752
 
378
- def _get_hpo_train_fn_kwargs(self, **train_fn_kwargs) -> dict:
379
- """Update kwargs passed to model_trial depending on the model configuration.
380
-
381
- These kwargs need to be updated, for example, by MultiWindowBacktestingModel.
382
- """
383
- return train_fn_kwargs
384
-
385
- def _is_gpu_available(self) -> bool:
386
- return False
387
-
388
- def hyperparameter_tune(
389
- self, hyperparameter_tune_kwargs="auto", hpo_executor: HpoExecutor = None, time_limit: float = None, **kwargs
390
- ):
391
- if hpo_executor is None:
392
- hpo_executor = self._get_default_hpo_executor()
393
- default_num_trials = kwargs.pop("default_num_trials", None)
394
- hpo_executor.initialize(
395
- hyperparameter_tune_kwargs, default_num_trials=default_num_trials, time_limit=time_limit
396
- )
397
-
398
- kwargs = self.initialize(time_limit=time_limit, **kwargs)
399
-
400
- self._register_fit_metadata(**kwargs)
401
- self._validate_fit_memory_usage(**kwargs)
402
-
403
- kwargs = self._preprocess_fit_resources(
404
- parallel_hpo=hpo_executor.executor_type == "ray", silent=True, **kwargs
405
- )
406
- self.validate_fit_resources(**kwargs)
407
-
408
- # autogluon.core runs a complicated logic to determine the final number of gpus
409
- # used in trials, which results in unintended setting of num_gpus=0. We override this
410
- # logic here, and set to minimum num_gpus to 1 if it is set to 0 when GPUs are available
411
- kwargs["num_gpus"] = 0 if not self._is_gpu_available() else max(kwargs.get("num_gpus", 1), 1)
412
-
413
- # we use k_fold=1 to circumvent autogluon.core logic to manage resources during parallelization
414
- # of different folds
415
- hpo_executor.register_resources(self, k_fold=1, **kwargs)
416
- return self._hyperparameter_tune(hpo_executor=hpo_executor, **kwargs)
417
-
418
- def _hyperparameter_tune(
753
+ def preprocess(
419
754
  self,
420
- train_data: TimeSeriesDataFrame,
421
- val_data: TimeSeriesDataFrame,
422
- hpo_executor: HpoExecutor,
755
+ data: TimeSeriesDataFrame,
756
+ known_covariates: TimeSeriesDataFrame | None = None,
757
+ is_train: bool = False,
423
758
  **kwargs,
424
- ):
425
- time_start = time.time()
426
- logger.debug(f"\tStarting AbstractTimeSeriesModel hyperparameter tuning for {self.name}")
427
- search_space = self._get_search_space()
428
-
429
- try:
430
- hpo_executor.validate_search_space(search_space, self.name)
431
- except EmptySearchSpace:
432
- return skip_hpo(self, train_data, val_data, time_limit=hpo_executor.time_limit)
433
-
434
- self.set_contexts(os.path.abspath(self.path))
435
- directory = self.path
436
- dataset_train_filename = "dataset_train.pkl"
437
- train_path = os.path.join(self.path, dataset_train_filename)
438
- save_pkl.save(path=train_path, object=train_data)
439
-
440
- dataset_val_filename = "dataset_val.pkl"
441
- val_path = os.path.join(self.path, dataset_val_filename)
442
- save_pkl.save(path=val_path, object=val_data)
443
-
444
- fit_kwargs = dict(
445
- val_splitter=kwargs.get("val_splitter"),
446
- refit_every_n_windows=kwargs.get("refit_every_n_windows", 1),
447
- )
448
- train_fn_kwargs = self._get_hpo_train_fn_kwargs(
449
- model_cls=self.__class__,
450
- init_params=self.get_params(),
451
- time_start=time_start,
452
- time_limit=hpo_executor.time_limit,
453
- fit_kwargs=fit_kwargs,
454
- train_path=train_path,
455
- val_path=val_path,
456
- hpo_executor=hpo_executor,
457
- )
458
-
459
- model_estimate_memory_usage = None
460
- if self.estimate_memory_usage is not None:
461
- model_estimate_memory_usage = self.estimate_memory_usage(**kwargs)
462
-
463
- minimum_resources = self.get_minimum_resources(is_gpu_available=self._is_gpu_available())
464
- hpo_context = disable_stdout if isinstance(hpo_executor, RayHpoExecutor) else nullcontext
465
- with hpo_context(), warning_filter(): # prevent Ray from outputting its results to stdout with print
466
- hpo_executor.execute(
467
- model_trial=model_trial,
468
- train_fn_kwargs=train_fn_kwargs,
469
- directory=directory,
470
- minimum_cpu_per_trial=minimum_resources.get("num_cpus", 1),
471
- minimum_gpu_per_trial=minimum_resources.get("num_gpus", 0),
472
- model_estimate_memory_usage=model_estimate_memory_usage,
473
- adapter_type="timeseries",
474
- )
475
-
476
- hpo_models, analysis = hpo_executor.get_hpo_results(
477
- model_name=self.name,
478
- model_path_root=self.path_root,
479
- time_start=time_start,
480
- )
481
-
482
- return hpo_models, analysis
483
-
484
- def preprocess(self, data: Any, **kwargs) -> Any:
485
- return data
486
-
487
- def get_memory_size(self, **kwargs) -> Optional[int]:
488
- return None
489
-
490
- def convert_to_refit_full_via_copy(self) -> "AbstractTimeSeriesModel":
491
- refit_model = super().convert_to_refit_full_via_copy()
492
- refit_model.val_score = None
493
- refit_model.predict_time = None
494
- return refit_model
495
-
496
- def get_user_params(self) -> dict:
497
- """Used to access user-specified parameters for the model before initialization."""
498
- if self._user_params is None:
499
- return {}
500
- else:
501
- return self._user_params.copy()
759
+ ) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame | None]:
760
+ """Method that implements model-specific preprocessing logic."""
761
+ return data, known_covariates