autogluon.timeseries 1.2.1b20250224__py3-none-any.whl → 1.4.1b20251215__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/__init__.py +3 -2
- autogluon/timeseries/configs/hyperparameter_presets.py +62 -0
- autogluon/timeseries/configs/predictor_presets.py +106 -0
- autogluon/timeseries/dataset/ts_dataframe.py +256 -141
- autogluon/timeseries/learner.py +86 -52
- autogluon/timeseries/metrics/__init__.py +42 -8
- autogluon/timeseries/metrics/abstract.py +89 -19
- autogluon/timeseries/metrics/point.py +142 -53
- autogluon/timeseries/metrics/quantile.py +46 -21
- autogluon/timeseries/metrics/utils.py +4 -4
- autogluon/timeseries/models/__init__.py +8 -2
- autogluon/timeseries/models/abstract/__init__.py +2 -2
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +361 -592
- autogluon/timeseries/models/abstract/model_trial.py +2 -1
- autogluon/timeseries/models/abstract/tunable.py +189 -0
- autogluon/timeseries/models/autogluon_tabular/__init__.py +2 -0
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +282 -194
- autogluon/timeseries/models/autogluon_tabular/per_step.py +513 -0
- autogluon/timeseries/models/autogluon_tabular/transforms.py +25 -18
- autogluon/timeseries/models/chronos/__init__.py +2 -1
- autogluon/timeseries/models/chronos/chronos2.py +361 -0
- autogluon/timeseries/models/chronos/model.py +219 -138
- autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +81 -50
- autogluon/timeseries/models/ensemble/__init__.py +37 -2
- autogluon/timeseries/models/ensemble/abstract.py +107 -0
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +240 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +185 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
- autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
- autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +45 -0
- autogluon/timeseries/models/ensemble/weighted/basic.py +91 -0
- autogluon/timeseries/models/ensemble/weighted/greedy.py +62 -0
- autogluon/timeseries/models/gluonts/__init__.py +1 -1
- autogluon/timeseries/models/gluonts/{abstract_gluonts.py → abstract.py} +148 -208
- autogluon/timeseries/models/gluonts/dataset.py +109 -0
- autogluon/timeseries/models/gluonts/{torch/models.py → models.py} +38 -22
- autogluon/timeseries/models/local/__init__.py +0 -7
- autogluon/timeseries/models/local/abstract_local_model.py +71 -74
- autogluon/timeseries/models/local/naive.py +13 -9
- autogluon/timeseries/models/local/npts.py +9 -2
- autogluon/timeseries/models/local/statsforecast.py +52 -36
- autogluon/timeseries/models/multi_window/multi_window_model.py +65 -45
- autogluon/timeseries/models/registry.py +64 -0
- autogluon/timeseries/models/toto/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
- autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
- autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
- autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
- autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
- autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
- autogluon/timeseries/models/toto/dataloader.py +108 -0
- autogluon/timeseries/models/toto/hf_pretrained_model.py +200 -0
- autogluon/timeseries/models/toto/model.py +249 -0
- autogluon/timeseries/predictor.py +685 -297
- autogluon/timeseries/regressor.py +94 -44
- autogluon/timeseries/splitter.py +8 -32
- autogluon/timeseries/trainer/__init__.py +3 -0
- autogluon/timeseries/trainer/ensemble_composer.py +444 -0
- autogluon/timeseries/trainer/model_set_builder.py +256 -0
- autogluon/timeseries/trainer/prediction_cache.py +149 -0
- autogluon/timeseries/{trainer.py → trainer/trainer.py} +387 -390
- autogluon/timeseries/trainer/utils.py +17 -0
- autogluon/timeseries/transforms/__init__.py +2 -13
- autogluon/timeseries/transforms/covariate_scaler.py +34 -40
- autogluon/timeseries/transforms/target_scaler.py +37 -20
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/lags.py +3 -5
- autogluon/timeseries/utils/datetime/seasonality.py +1 -3
- autogluon/timeseries/utils/datetime/time_features.py +2 -2
- autogluon/timeseries/utils/features.py +70 -47
- autogluon/timeseries/utils/forecast.py +19 -14
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/utils/warning_filters.py +4 -2
- autogluon/timeseries/version.py +1 -1
- autogluon.timeseries-1.4.1b20251215-py3.11-nspkg.pth +1 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/METADATA +49 -36
- autogluon_timeseries-1.4.1b20251215.dist-info/RECORD +103 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/WHEEL +1 -1
- autogluon/timeseries/configs/presets_configs.py +0 -79
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -11
- autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
- autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -585
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -518
- autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -78
- autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -170
- autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
- autogluon/timeseries/models/presets.py +0 -360
- autogluon.timeseries-1.2.1b20250224-py3.9-nspkg.pth +0 -1
- autogluon.timeseries-1.2.1b20250224.dist-info/RECORD +0 -68
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/zip-safe +0 -0
autogluon/timeseries/learner.py
CHANGED
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import reprlib
|
|
3
3
|
import time
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Literal, Type
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
8
|
from autogluon.core.learner import AbstractLearner
|
|
9
|
-
from autogluon.timeseries.dataset
|
|
9
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
10
10
|
from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
|
|
11
11
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
|
12
|
-
from autogluon.timeseries.splitter import AbstractWindowSplitter
|
|
13
12
|
from autogluon.timeseries.trainer import TimeSeriesTrainer
|
|
14
13
|
from autogluon.timeseries.utils.features import TimeSeriesFeatureGenerator
|
|
15
|
-
from autogluon.timeseries.utils.forecast import
|
|
14
|
+
from autogluon.timeseries.utils.forecast import make_future_data_frame
|
|
16
15
|
|
|
17
16
|
logger = logging.getLogger(__name__)
|
|
18
17
|
|
|
@@ -26,25 +25,23 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
26
25
|
self,
|
|
27
26
|
path_context: str,
|
|
28
27
|
target: str = "target",
|
|
29
|
-
known_covariates_names:
|
|
28
|
+
known_covariates_names: list[str] | None = None,
|
|
30
29
|
trainer_type: Type[TimeSeriesTrainer] = TimeSeriesTrainer,
|
|
31
|
-
eval_metric:
|
|
32
|
-
eval_metric_seasonal_period: Optional[int] = None,
|
|
30
|
+
eval_metric: str | TimeSeriesScorer | None = None,
|
|
33
31
|
prediction_length: int = 1,
|
|
34
32
|
cache_predictions: bool = True,
|
|
35
|
-
ensemble_model_type:
|
|
33
|
+
ensemble_model_type: Type | None = None,
|
|
36
34
|
**kwargs,
|
|
37
35
|
):
|
|
38
36
|
super().__init__(path_context=path_context)
|
|
39
|
-
self.eval_metric
|
|
40
|
-
self.eval_metric_seasonal_period = eval_metric_seasonal_period
|
|
37
|
+
self.eval_metric = check_get_evaluation_metric(eval_metric, prediction_length=prediction_length)
|
|
41
38
|
self.trainer_type = trainer_type
|
|
42
39
|
self.target = target
|
|
43
40
|
self.known_covariates_names = [] if known_covariates_names is None else known_covariates_names
|
|
44
41
|
self.prediction_length = prediction_length
|
|
45
42
|
self.quantile_levels = kwargs.get("quantile_levels", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
|
|
46
43
|
self.cache_predictions = cache_predictions
|
|
47
|
-
self.freq:
|
|
44
|
+
self.freq: str | None = None
|
|
48
45
|
self.ensemble_model_type = ensemble_model_type
|
|
49
46
|
|
|
50
47
|
self.feature_generator = TimeSeriesFeatureGenerator(
|
|
@@ -58,13 +55,15 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
58
55
|
def fit(
|
|
59
56
|
self,
|
|
60
57
|
train_data: TimeSeriesDataFrame,
|
|
61
|
-
hyperparameters:
|
|
62
|
-
val_data:
|
|
63
|
-
hyperparameter_tune_kwargs:
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
58
|
+
hyperparameters: str | dict,
|
|
59
|
+
val_data: TimeSeriesDataFrame | None = None,
|
|
60
|
+
hyperparameter_tune_kwargs: str | dict | None = None,
|
|
61
|
+
ensemble_hyperparameters: dict[str, Any] | list[dict[str, Any]] | None = None,
|
|
62
|
+
time_limit: float | None = None,
|
|
63
|
+
num_val_windows: tuple[int, ...] = (1,),
|
|
64
|
+
val_step_size: int | None = None,
|
|
65
|
+
refit_every_n_windows: int | None = 1,
|
|
66
|
+
random_seed: int | None = None,
|
|
68
67
|
**kwargs,
|
|
69
68
|
) -> None:
|
|
70
69
|
self._time_limit = time_limit
|
|
@@ -82,14 +81,14 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
82
81
|
path=self.model_context,
|
|
83
82
|
prediction_length=self.prediction_length,
|
|
84
83
|
eval_metric=self.eval_metric,
|
|
85
|
-
eval_metric_seasonal_period=self.eval_metric_seasonal_period,
|
|
86
84
|
target=self.target,
|
|
87
85
|
quantile_levels=self.quantile_levels,
|
|
88
86
|
verbosity=kwargs.get("verbosity", 2),
|
|
89
87
|
skip_model_selection=kwargs.get("skip_model_selection", False),
|
|
90
88
|
enable_ensemble=kwargs.get("enable_ensemble", True),
|
|
91
|
-
|
|
92
|
-
|
|
89
|
+
covariate_metadata=self.feature_generator.covariate_metadata,
|
|
90
|
+
num_val_windows=num_val_windows,
|
|
91
|
+
val_step_size=val_step_size,
|
|
93
92
|
refit_every_n_windows=refit_every_n_windows,
|
|
94
93
|
cache_predictions=self.cache_predictions,
|
|
95
94
|
ensemble_model_type=self.ensemble_model_type,
|
|
@@ -97,7 +96,7 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
97
96
|
)
|
|
98
97
|
|
|
99
98
|
assert issubclass(self.trainer_type, TimeSeriesTrainer)
|
|
100
|
-
self.trainer:
|
|
99
|
+
self.trainer: TimeSeriesTrainer | None = self.trainer_type(**trainer_init_kwargs)
|
|
101
100
|
self.trainer_path = self.trainer.path
|
|
102
101
|
self.save()
|
|
103
102
|
|
|
@@ -114,6 +113,7 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
114
113
|
val_data=val_data,
|
|
115
114
|
hyperparameters=hyperparameters,
|
|
116
115
|
hyperparameter_tune_kwargs=hyperparameter_tune_kwargs,
|
|
116
|
+
ensemble_hyperparameters=ensemble_hyperparameters,
|
|
117
117
|
excluded_model_types=kwargs.get("excluded_model_types"),
|
|
118
118
|
time_limit=time_limit,
|
|
119
119
|
random_seed=random_seed,
|
|
@@ -124,9 +124,9 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
124
124
|
|
|
125
125
|
def _align_covariates_with_forecast_index(
|
|
126
126
|
self,
|
|
127
|
-
known_covariates:
|
|
127
|
+
known_covariates: TimeSeriesDataFrame | None,
|
|
128
128
|
data: TimeSeriesDataFrame,
|
|
129
|
-
) ->
|
|
129
|
+
) -> TimeSeriesDataFrame | None:
|
|
130
130
|
"""Select the relevant item_ids and timestamps from the known_covariates dataframe.
|
|
131
131
|
|
|
132
132
|
If some of the item_ids or timestamps are missing, an exception is raised.
|
|
@@ -148,25 +148,27 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
148
148
|
f"known_covariates are missing information for the following item_ids: {reprlib.repr(missing_item_ids.to_list())}."
|
|
149
149
|
)
|
|
150
150
|
|
|
151
|
-
forecast_index =
|
|
152
|
-
data, prediction_length=self.prediction_length, freq=self.freq
|
|
151
|
+
forecast_index = pd.MultiIndex.from_frame(
|
|
152
|
+
make_future_data_frame(data, prediction_length=self.prediction_length, freq=self.freq)
|
|
153
153
|
)
|
|
154
154
|
try:
|
|
155
155
|
known_covariates = known_covariates.loc[forecast_index] # type: ignore
|
|
156
156
|
except KeyError:
|
|
157
157
|
raise ValueError(
|
|
158
|
-
|
|
159
|
-
"
|
|
158
|
+
"`known_covariates` should include the `item_id` and `timestamp` values covering the forecast horizon "
|
|
159
|
+
"(i.e., the next `prediction_length` time steps following the end of each time series in the input "
|
|
160
|
+
"data). Use `TimeSeriesPredictor.make_future_data_frame` to generate the required `item_id` and "
|
|
161
|
+
"`timestamp` combinations for the `known_covariates`."
|
|
160
162
|
)
|
|
161
163
|
return known_covariates
|
|
162
164
|
|
|
163
165
|
def predict(
|
|
164
166
|
self,
|
|
165
167
|
data: TimeSeriesDataFrame,
|
|
166
|
-
known_covariates:
|
|
167
|
-
model:
|
|
168
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
169
|
+
model: str | AbstractTimeSeriesModel | None = None,
|
|
168
170
|
use_cache: bool = True,
|
|
169
|
-
random_seed:
|
|
171
|
+
random_seed: int | None = None,
|
|
170
172
|
**kwargs,
|
|
171
173
|
) -> TimeSeriesDataFrame:
|
|
172
174
|
data = self.feature_generator.transform(data)
|
|
@@ -184,8 +186,8 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
184
186
|
def score(
|
|
185
187
|
self,
|
|
186
188
|
data: TimeSeriesDataFrame,
|
|
187
|
-
model:
|
|
188
|
-
metric:
|
|
189
|
+
model: str | AbstractTimeSeriesModel | None = None,
|
|
190
|
+
metric: str | TimeSeriesScorer | None = None,
|
|
189
191
|
use_cache: bool = True,
|
|
190
192
|
) -> float:
|
|
191
193
|
data = self.feature_generator.transform(data)
|
|
@@ -194,24 +196,24 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
194
196
|
def evaluate(
|
|
195
197
|
self,
|
|
196
198
|
data: TimeSeriesDataFrame,
|
|
197
|
-
model:
|
|
198
|
-
metrics:
|
|
199
|
+
model: str | None = None,
|
|
200
|
+
metrics: str | TimeSeriesScorer | list[str | TimeSeriesScorer] | None = None,
|
|
199
201
|
use_cache: bool = True,
|
|
200
|
-
) ->
|
|
202
|
+
) -> dict[str, float]:
|
|
201
203
|
data = self.feature_generator.transform(data)
|
|
202
204
|
return self.load_trainer().evaluate(data=data, model=model, metrics=metrics, use_cache=use_cache)
|
|
203
205
|
|
|
204
206
|
def get_feature_importance(
|
|
205
207
|
self,
|
|
206
|
-
data:
|
|
207
|
-
model:
|
|
208
|
-
metric:
|
|
209
|
-
features:
|
|
210
|
-
time_limit:
|
|
208
|
+
data: TimeSeriesDataFrame | None = None,
|
|
209
|
+
model: str | None = None,
|
|
210
|
+
metric: str | TimeSeriesScorer | None = None,
|
|
211
|
+
features: list[str] | None = None,
|
|
212
|
+
time_limit: float | None = None,
|
|
211
213
|
method: Literal["naive", "permutation"] = "permutation",
|
|
212
214
|
subsample_size: int = 50,
|
|
213
|
-
num_iterations:
|
|
214
|
-
random_seed:
|
|
215
|
+
num_iterations: int | None = None,
|
|
216
|
+
random_seed: int | None = None,
|
|
215
217
|
relative_scores: bool = False,
|
|
216
218
|
include_confidence_band: bool = True,
|
|
217
219
|
confidence_level: float = 0.99,
|
|
@@ -272,9 +274,9 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
272
274
|
|
|
273
275
|
def leaderboard(
|
|
274
276
|
self,
|
|
275
|
-
data:
|
|
277
|
+
data: TimeSeriesDataFrame | None = None,
|
|
276
278
|
extra_info: bool = False,
|
|
277
|
-
extra_metrics:
|
|
279
|
+
extra_metrics: list[str | TimeSeriesScorer] | None = None,
|
|
278
280
|
use_cache: bool = True,
|
|
279
281
|
) -> pd.DataFrame:
|
|
280
282
|
if data is not None:
|
|
@@ -283,7 +285,7 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
283
285
|
data, extra_info=extra_info, extra_metrics=extra_metrics, use_cache=use_cache
|
|
284
286
|
)
|
|
285
287
|
|
|
286
|
-
def get_info(self, include_model_info: bool = False, **kwargs) ->
|
|
288
|
+
def get_info(self, include_model_info: bool = False, **kwargs) -> dict[str, Any]:
|
|
287
289
|
learner_info = super().get_info(include_model_info=include_model_info)
|
|
288
290
|
trainer = self.load_trainer()
|
|
289
291
|
trainer_info = trainer.get_info(include_model_info=include_model_info)
|
|
@@ -301,31 +303,63 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
301
303
|
return learner_info
|
|
302
304
|
|
|
303
305
|
def persist_trainer(
|
|
304
|
-
self, models:
|
|
305
|
-
) ->
|
|
306
|
+
self, models: Literal["all", "best"] | list[str] = "all", with_ancestors: bool = False
|
|
307
|
+
) -> list[str]:
|
|
306
308
|
"""Loads models and trainer in memory so that they don't have to be
|
|
307
309
|
loaded during predictions
|
|
308
310
|
|
|
309
311
|
Returns
|
|
310
312
|
-------
|
|
311
|
-
list_of_models
|
|
313
|
+
list_of_models
|
|
312
314
|
List of models persisted in memory
|
|
313
315
|
"""
|
|
314
316
|
self.trainer = self.load_trainer()
|
|
315
317
|
return self.trainer.persist(models, with_ancestors=with_ancestors)
|
|
316
318
|
|
|
317
|
-
def unpersist_trainer(self) ->
|
|
319
|
+
def unpersist_trainer(self) -> list[str]:
|
|
318
320
|
"""Unloads models and trainer from memory. Models will have to be reloaded from disk
|
|
319
321
|
when predicting.
|
|
320
322
|
|
|
321
323
|
Returns
|
|
322
324
|
-------
|
|
323
|
-
list_of_models
|
|
325
|
+
list_of_models
|
|
324
326
|
List of models removed from memory
|
|
325
327
|
"""
|
|
326
328
|
unpersisted_models = self.load_trainer().unpersist()
|
|
327
329
|
self.trainer = None # type: ignore
|
|
328
330
|
return unpersisted_models
|
|
329
331
|
|
|
330
|
-
def refit_full(self, model: str = "all") ->
|
|
332
|
+
def refit_full(self, model: str = "all") -> dict[str, str]:
|
|
331
333
|
return self.load_trainer().refit_full(model=model)
|
|
334
|
+
|
|
335
|
+
def backtest_predictions(
|
|
336
|
+
self,
|
|
337
|
+
data: TimeSeriesDataFrame | None,
|
|
338
|
+
model_names: list[str],
|
|
339
|
+
num_val_windows: int | None = None,
|
|
340
|
+
val_step_size: int | None = None,
|
|
341
|
+
use_cache: bool = True,
|
|
342
|
+
) -> dict[str, list[TimeSeriesDataFrame]]:
|
|
343
|
+
if data is not None:
|
|
344
|
+
data = self.feature_generator.transform(data)
|
|
345
|
+
return self.load_trainer().backtest_predictions(
|
|
346
|
+
model_names=model_names,
|
|
347
|
+
data=data,
|
|
348
|
+
num_val_windows=num_val_windows,
|
|
349
|
+
val_step_size=val_step_size,
|
|
350
|
+
use_cache=use_cache,
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
def backtest_targets(
|
|
354
|
+
self,
|
|
355
|
+
data: TimeSeriesDataFrame | None,
|
|
356
|
+
num_val_windows: int | None = None,
|
|
357
|
+
val_step_size: int | None = None,
|
|
358
|
+
) -> list[TimeSeriesDataFrame]:
|
|
359
|
+
if data is not None:
|
|
360
|
+
data = self.feature_generator.transform(data)
|
|
361
|
+
return self.load_trainer().backtest_targets(
|
|
362
|
+
data=data,
|
|
363
|
+
num_val_windows=num_val_windows,
|
|
364
|
+
val_step_size=val_step_size,
|
|
365
|
+
)
|
|
@@ -1,11 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from pprint import pformat
|
|
2
|
-
from typing import
|
|
4
|
+
from typing import Any, Sequence, Type
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
3
7
|
|
|
4
8
|
from .abstract import TimeSeriesScorer
|
|
5
9
|
from .point import MAE, MAPE, MASE, MSE, RMSE, RMSLE, RMSSE, SMAPE, WAPE, WCD
|
|
6
10
|
from .quantile import SQL, WQL
|
|
7
11
|
|
|
8
12
|
__all__ = [
|
|
13
|
+
"TimeSeriesScorer",
|
|
14
|
+
"check_get_evaluation_metric",
|
|
9
15
|
"MAE",
|
|
10
16
|
"MAPE",
|
|
11
17
|
"MASE",
|
|
@@ -22,7 +28,7 @@ __all__ = [
|
|
|
22
28
|
|
|
23
29
|
DEFAULT_METRIC_NAME = "WQL"
|
|
24
30
|
|
|
25
|
-
AVAILABLE_METRICS = {
|
|
31
|
+
AVAILABLE_METRICS: dict[str, Type[TimeSeriesScorer]] = {
|
|
26
32
|
"MASE": MASE,
|
|
27
33
|
"MAPE": MAPE,
|
|
28
34
|
"SMAPE": SMAPE,
|
|
@@ -42,33 +48,61 @@ DEPRECATED_METRICS = {
|
|
|
42
48
|
}
|
|
43
49
|
|
|
44
50
|
# Experimental metrics that are not yet user facing
|
|
45
|
-
EXPERIMENTAL_METRICS = {
|
|
51
|
+
EXPERIMENTAL_METRICS: dict[str, Type[TimeSeriesScorer]] = {
|
|
46
52
|
"WCD": WCD,
|
|
47
53
|
}
|
|
48
54
|
|
|
49
55
|
|
|
50
56
|
def check_get_evaluation_metric(
|
|
51
|
-
eval_metric:
|
|
57
|
+
eval_metric: str | TimeSeriesScorer | Type[TimeSeriesScorer] | None,
|
|
58
|
+
prediction_length: int,
|
|
59
|
+
seasonal_period: int | None = None,
|
|
60
|
+
horizon_weight: Sequence[float] | np.ndarray | None = None,
|
|
52
61
|
) -> TimeSeriesScorer:
|
|
62
|
+
"""Factory method for TimeSeriesScorer objects.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
scorer
|
|
67
|
+
A `TimeSeriesScorer` object based on the provided `eval_metric`.
|
|
68
|
+
|
|
69
|
+
`scorer.prediction_length` is always set to the `prediction_length` provided to this method.
|
|
70
|
+
|
|
71
|
+
If `seasonal_period` is not `None`, then `scorer.seasonal_period` is set to this value. Otherwise the original
|
|
72
|
+
value of `seasonal_period` is kept.
|
|
73
|
+
|
|
74
|
+
If `horizon_weight` is not `None`, then `scorer.horizon_weight` is set to this value. Otherwise the original
|
|
75
|
+
value of `horizon_weight` is kept.
|
|
76
|
+
"""
|
|
53
77
|
scorer: TimeSeriesScorer
|
|
78
|
+
metric_kwargs: dict[str, Any] = dict(
|
|
79
|
+
prediction_length=prediction_length, seasonal_period=seasonal_period, horizon_weight=horizon_weight
|
|
80
|
+
)
|
|
54
81
|
if isinstance(eval_metric, TimeSeriesScorer):
|
|
55
82
|
scorer = eval_metric
|
|
83
|
+
scorer.prediction_length = prediction_length
|
|
84
|
+
if seasonal_period is not None:
|
|
85
|
+
scorer.seasonal_period = seasonal_period
|
|
86
|
+
if horizon_weight is not None:
|
|
87
|
+
scorer.horizon_weight = scorer.check_get_horizon_weight(
|
|
88
|
+
horizon_weight, prediction_length=prediction_length
|
|
89
|
+
)
|
|
56
90
|
elif isinstance(eval_metric, type) and issubclass(eval_metric, TimeSeriesScorer):
|
|
57
91
|
# e.g., user passed `eval_metric=CustomMetric` instead of `eval_metric=CustomMetric()`
|
|
58
|
-
scorer = eval_metric()
|
|
92
|
+
scorer = eval_metric(**metric_kwargs)
|
|
59
93
|
elif isinstance(eval_metric, str):
|
|
60
94
|
metric_name = DEPRECATED_METRICS.get(eval_metric, eval_metric).upper()
|
|
61
95
|
if metric_name in AVAILABLE_METRICS:
|
|
62
|
-
scorer = AVAILABLE_METRICS[metric_name]()
|
|
96
|
+
scorer = AVAILABLE_METRICS[metric_name](**metric_kwargs)
|
|
63
97
|
elif metric_name in EXPERIMENTAL_METRICS:
|
|
64
|
-
scorer = EXPERIMENTAL_METRICS[metric_name]()
|
|
98
|
+
scorer = EXPERIMENTAL_METRICS[metric_name](**metric_kwargs)
|
|
65
99
|
else:
|
|
66
100
|
raise ValueError(
|
|
67
101
|
f"Time series metric {eval_metric} not supported. Available metrics are:\n"
|
|
68
102
|
f"{pformat(sorted(AVAILABLE_METRICS.keys()))}"
|
|
69
103
|
)
|
|
70
104
|
elif eval_metric is None:
|
|
71
|
-
scorer = AVAILABLE_METRICS[DEFAULT_METRIC_NAME]()
|
|
105
|
+
scorer = AVAILABLE_METRICS[DEFAULT_METRIC_NAME](**metric_kwargs)
|
|
72
106
|
else:
|
|
73
107
|
raise ValueError(
|
|
74
108
|
f"eval_metric must be of type str, TimeSeriesScorer or None "
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
import warnings
|
|
2
|
+
from typing import Sequence, overload
|
|
2
3
|
|
|
3
4
|
import numpy as np
|
|
4
5
|
import pandas as pd
|
|
@@ -15,6 +16,18 @@ class TimeSeriesScorer:
|
|
|
15
16
|
|
|
16
17
|
Follows the design of ``autogluon.core.metrics.Scorer``.
|
|
17
18
|
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
prediction_length : int, default = 1
|
|
22
|
+
The length of the forecast horizon. The predictions provided to the ``TimeSeriesScorer`` are expected to contain
|
|
23
|
+
a forecast for this many time steps for each time series.
|
|
24
|
+
seasonal_period : int or None, default = None
|
|
25
|
+
Seasonal period used to compute some evaluation metrics such as mean absolute scaled error (MASE). Defaults to
|
|
26
|
+
``None``, in which case the seasonal period is computed based on the data frequency.
|
|
27
|
+
horizon_weight : Sequence[float], np.ndarray or None, default = None
|
|
28
|
+
Weight assigned to each time step in the forecast horizon when computing the metric. If provided, the
|
|
29
|
+
``horizon_weight`` will be stored as a numpy array of shape ``[1, prediction_length]``.
|
|
30
|
+
|
|
18
31
|
Attributes
|
|
19
32
|
----------
|
|
20
33
|
greater_is_better_internal : bool, default = False
|
|
@@ -30,15 +43,28 @@ class TimeSeriesScorer:
|
|
|
30
43
|
Whether the given metric uses the quantile predictions. Some models will modify the training procedure if they
|
|
31
44
|
are trained to optimize a quantile metric.
|
|
32
45
|
equivalent_tabular_regression_metric : str
|
|
33
|
-
Name of an equivalent metric used by AutoGluon-Tabular with ``problem_type="regression"``. Used by
|
|
34
|
-
train
|
|
46
|
+
Name of an equivalent metric used by AutoGluon-Tabular with ``problem_type="regression"``. Used by forecasting
|
|
47
|
+
models that train tabular regression models under the hood. This attribute should only be specified by point
|
|
48
|
+
forecast metrics.
|
|
35
49
|
"""
|
|
36
50
|
|
|
37
51
|
greater_is_better_internal: bool = False
|
|
38
52
|
optimum: float = 0.0
|
|
39
53
|
optimized_by_median: bool = False
|
|
40
54
|
needs_quantile: bool = False
|
|
41
|
-
equivalent_tabular_regression_metric:
|
|
55
|
+
equivalent_tabular_regression_metric: str | None = None
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
prediction_length: int = 1,
|
|
60
|
+
seasonal_period: int | None = None,
|
|
61
|
+
horizon_weight: Sequence[float] | None = None,
|
|
62
|
+
):
|
|
63
|
+
self.prediction_length = int(prediction_length)
|
|
64
|
+
if self.prediction_length < 1:
|
|
65
|
+
raise ValueError(f"prediction_length must be >= 1 (received {prediction_length})")
|
|
66
|
+
self.seasonal_period = seasonal_period
|
|
67
|
+
self.horizon_weight = self.check_get_horizon_weight(horizon_weight, prediction_length=prediction_length)
|
|
42
68
|
|
|
43
69
|
@property
|
|
44
70
|
def sign(self) -> int:
|
|
@@ -66,18 +92,25 @@ class TimeSeriesScorer:
|
|
|
66
92
|
self,
|
|
67
93
|
data: TimeSeriesDataFrame,
|
|
68
94
|
predictions: TimeSeriesDataFrame,
|
|
69
|
-
prediction_length: int = 1,
|
|
70
95
|
target: str = "target",
|
|
71
|
-
seasonal_period: Optional[int] = None,
|
|
72
96
|
**kwargs,
|
|
73
97
|
) -> float:
|
|
74
|
-
seasonal_period = get_seasonality(data.freq) if seasonal_period is None else seasonal_period
|
|
98
|
+
seasonal_period = get_seasonality(data.freq) if self.seasonal_period is None else self.seasonal_period
|
|
75
99
|
|
|
76
|
-
|
|
77
|
-
|
|
100
|
+
if "prediction_length" in kwargs:
|
|
101
|
+
warnings.warn(
|
|
102
|
+
"Passing `prediction_length` to `TimeSeriesScorer.__call__` is deprecated and will be removed in v2.0. "
|
|
103
|
+
"Please set the `eval_metric.prediction_length` attribute instead.",
|
|
104
|
+
category=FutureWarning,
|
|
105
|
+
)
|
|
106
|
+
self.prediction_length = kwargs["prediction_length"]
|
|
107
|
+
self.horizon_weight = self.check_get_horizon_weight(self.horizon_weight, self.prediction_length)
|
|
108
|
+
|
|
109
|
+
data_past = data.slice_by_timestep(None, -self.prediction_length)
|
|
110
|
+
data_future = data.slice_by_timestep(-self.prediction_length, None)
|
|
78
111
|
|
|
79
112
|
assert not predictions.isna().any().any(), "Predictions contain NaN values."
|
|
80
|
-
assert (predictions.num_timesteps_per_item() == prediction_length).all()
|
|
113
|
+
assert (predictions.num_timesteps_per_item() == self.prediction_length).all()
|
|
81
114
|
assert data_future.index.equals(predictions.index), "Prediction and data indices do not match."
|
|
82
115
|
|
|
83
116
|
try:
|
|
@@ -140,7 +173,7 @@ class TimeSeriesScorer:
|
|
|
140
173
|
) -> None:
|
|
141
174
|
"""Compute auxiliary metrics on past data (before forecast horizon), if the chosen metric requires it.
|
|
142
175
|
|
|
143
|
-
This method should only be implemented by metrics that rely on
|
|
176
|
+
This method should only be implemented by metrics that rely on historical (in-sample) data, such as Mean Absolute
|
|
144
177
|
Scaled Error (MASE) https://en.wikipedia.org/wiki/Mean_absolute_scaled_error.
|
|
145
178
|
|
|
146
179
|
We keep this method separate from :meth:`compute_metric` to avoid redundant computations when fitting ensemble.
|
|
@@ -159,21 +192,21 @@ class TimeSeriesScorer:
|
|
|
159
192
|
return self.optimum - self.score(*args, **kwargs)
|
|
160
193
|
|
|
161
194
|
@staticmethod
|
|
162
|
-
def _safemean(array:
|
|
195
|
+
def _safemean(array: np.ndarray | pd.Series) -> float:
|
|
163
196
|
"""Compute mean of a numpy array-like object, ignoring inf, -inf and nan values."""
|
|
164
197
|
return float(np.mean(array[np.isfinite(array)]))
|
|
165
198
|
|
|
166
199
|
@staticmethod
|
|
167
200
|
def _get_point_forecast_score_inputs(
|
|
168
201
|
data_future: TimeSeriesDataFrame, predictions: TimeSeriesDataFrame, target: str = "target"
|
|
169
|
-
) ->
|
|
202
|
+
) -> tuple[pd.Series, pd.Series]:
|
|
170
203
|
"""Get inputs necessary to compute point forecast metrics.
|
|
171
204
|
|
|
172
205
|
Returns
|
|
173
206
|
-------
|
|
174
|
-
y_true
|
|
207
|
+
y_true
|
|
175
208
|
Target time series values during the forecast horizon.
|
|
176
|
-
y_pred
|
|
209
|
+
y_pred
|
|
177
210
|
Predicted time series values during the forecast horizon.
|
|
178
211
|
"""
|
|
179
212
|
y_true = data_future[target]
|
|
@@ -183,16 +216,16 @@ class TimeSeriesScorer:
|
|
|
183
216
|
@staticmethod
|
|
184
217
|
def _get_quantile_forecast_score_inputs(
|
|
185
218
|
data_future: TimeSeriesDataFrame, predictions: TimeSeriesDataFrame, target: str = "target"
|
|
186
|
-
) ->
|
|
219
|
+
) -> tuple[pd.Series, pd.DataFrame, np.ndarray]:
|
|
187
220
|
"""Get inputs necessary to compute quantile forecast metrics.
|
|
188
221
|
|
|
189
222
|
Returns
|
|
190
223
|
-------
|
|
191
|
-
y_true
|
|
224
|
+
y_true
|
|
192
225
|
Target time series values during the forecast horizon.
|
|
193
|
-
q_pred
|
|
226
|
+
q_pred
|
|
194
227
|
Quantile forecast for each predicted quantile level. Column order corresponds to ``quantile_levels``.
|
|
195
|
-
quantile_levels
|
|
228
|
+
quantile_levels
|
|
196
229
|
Quantile levels for which the forecasts are generated (as floats).
|
|
197
230
|
"""
|
|
198
231
|
quantile_columns = [col for col in predictions.columns if col != "mean"]
|
|
@@ -200,3 +233,40 @@ class TimeSeriesScorer:
|
|
|
200
233
|
q_pred = pd.DataFrame(predictions[quantile_columns])
|
|
201
234
|
quantile_levels = np.array(quantile_columns, dtype=float)
|
|
202
235
|
return y_true, q_pred, quantile_levels
|
|
236
|
+
|
|
237
|
+
@overload
|
|
238
|
+
@staticmethod
|
|
239
|
+
def check_get_horizon_weight(horizon_weight: None, prediction_length: int) -> None: ...
|
|
240
|
+
@overload
|
|
241
|
+
@staticmethod
|
|
242
|
+
def check_get_horizon_weight(
|
|
243
|
+
horizon_weight: Sequence[float] | np.ndarray, prediction_length: int
|
|
244
|
+
) -> np.ndarray: ...
|
|
245
|
+
|
|
246
|
+
@staticmethod
|
|
247
|
+
def check_get_horizon_weight(
|
|
248
|
+
horizon_weight: Sequence[float] | np.ndarray | None, prediction_length: int
|
|
249
|
+
) -> np.ndarray | None:
|
|
250
|
+
"""Convert horizon_weight to a non-negative numpy array that sums up to prediction_length.
|
|
251
|
+
Raises an exception if horizon_weight has an invalid shape or contains invalid values.
|
|
252
|
+
|
|
253
|
+
Returns
|
|
254
|
+
-------
|
|
255
|
+
horizon_weight
|
|
256
|
+
None if the input is None, otherwise a numpy array of shape [1, prediction_length].
|
|
257
|
+
"""
|
|
258
|
+
if horizon_weight is None:
|
|
259
|
+
return None
|
|
260
|
+
horizon_weight_np = np.ravel(horizon_weight).astype(np.float64)
|
|
261
|
+
if horizon_weight_np.shape != (prediction_length,):
|
|
262
|
+
raise ValueError(
|
|
263
|
+
f"horizon_weight must have length equal to {prediction_length=} (got {len(horizon_weight)=})"
|
|
264
|
+
)
|
|
265
|
+
if not (horizon_weight_np >= 0).all():
|
|
266
|
+
raise ValueError(f"All values in horizon_weight must be >= 0 (got {horizon_weight})")
|
|
267
|
+
if not horizon_weight_np.sum() > 0:
|
|
268
|
+
raise ValueError(f"At least some values in horizon_weight must be > 0 (got {horizon_weight})")
|
|
269
|
+
if not np.isfinite(horizon_weight_np).all():
|
|
270
|
+
raise ValueError(f"All horizon_weight values must be finite (got {horizon_weight})")
|
|
271
|
+
horizon_weight_np = horizon_weight_np * prediction_length / horizon_weight_np.sum()
|
|
272
|
+
return horizon_weight_np.reshape([1, prediction_length])
|